In [1]:
import numpy as np
left_vectors = np.loadtxt("left_vectors.tsv", dtype=float)
right_vectors = np.loadtxt("right_vectors.tsv", dtype=float)
center_vectors = np.loadtxt("center_vectors.tsv", dtype=float)

In [2]:
with open("left_metadata.tsv", "r") as file:
    left_metadata = [i[:-1] for i in file.readlines()]
with open("right_metadata.tsv", "r") as file:
    right_metadata = [i[:-1] for i in file.readlines()]
with open("center_metadata.tsv", "r") as file:
    center_metadata = [i[:-1] for i in file.readlines()]

In [3]:
def left_embedding(word):
    return left_vectors[left_metadata.index(word)]
def right_embedding(word):
    return right_vectors[right_metadata.index(word)]
def center_embedding(word):
    return center_vectors[center_metadata.index(word)]

In [4]:
import torch.nn as nn
import torch

def normalize(word_vec):
    norm=np.linalg.norm(word_vec)
    if norm == 0: 
       return word_vec
    return word_vec/norm

def cos_sim(x1, x2):
    cos = nn.CosineSimilarity(dim=0)
    x1, x2 = torch.from_numpy(normalize(x1)), torch.from_numpy(normalize(x2))
    return cos(x1, x2)


In [5]:
def compute_polarity(word):
    return cos_sim(left_embedding(word), right_embedding(word))


In [6]:
polarities = []
for word in left_metadata:
    try:
        polarities.append((word, float(compute_polarity(word))))
    except:
        pass

In [10]:
print("The most polarized words:")
print(sorted(polarities, key=lambda x: x[1])[:30])

The most polarized words:
[('shock', -0.16165042171511027), ('sept', -0.15465793415975654), ('wear', -0.15373365649515847), ('oppose', -0.14771471967310196), ('waiver', -0.14656792222543638), ('emerge', -0.14577900209558248), ('95', -0.14560813091109776), ('israeli', -0.14264038413308422), ('preserve', -0.14094034448799903), ('nation’s', -0.140594931937354), ('broken', -0.13869672208643222), ('“to', -0.1366152340850802), ('signatures', -0.13570636050720555), ('as', -0.13557804620232505), ('punish', -0.135280669101486), ('bullet', -0.13519992928920135), ('biased', -0.1349925657233017), ('amendments', -0.13393766602598609), ('services', -0.1337586971887289), ('pollster', -0.13241387573242216), ('explosion', -0.13164113266212024), ('gain', -0.1309050696440565), ('constituency', -0.13016508548110708), ('thompson', -0.12927168568348668), ('centers', -0.12914537490885622), ('mcmullin', -0.12891626534741102), ('let’s', -0.12741980020052956), ('kids', -0.12557560759133155), ('collusion', -0.12

In [11]:
print("The least polarized words:")
print(sorted(polarities, key=lambda x: x[1], reverse=True)[:30])

The least polarized words:
[('roberts', 0.17176801502011618), ('charles', 0.16426862501836845), ('ahead', 0.14447660445220384), ('terminal', 0.13965055481207103), ('soldier', 0.1384593199085538), ('average', 0.1382846175239144), ('wasn’t', 0.13383403796605292), ('entity', 0.133502255766015), ('counties', 0.1334761163753366), ('think', 0.13277060797012805), ('uphold', 0.1321175231244901), ('thomas', 0.1314071174031392), ('blumenthal', 0.12927196470459815), ('rose', 0.12916251420641184), ('ahca', 0.12853442042912355), ('looking', 0.12789618263472957), ('instance', 0.12719656352534595), ('budgets', 0.12466873049556915), ('fighter', 0.12306462702743909), ('fewer', 0.12059855967496863), ('she', 0.11929752666986708), ('significantly', 0.11880695325445738), ('stand', 0.11713917096980365), ('moon', 0.11706267682158018), ('recounts', 0.1168227873160246), ('1993', 0.11613757103163516), ('animal', 0.1157013737657889), ('fit', 0.11557157069730967), ('screening', 0.11530995486940411), ('tiny', 0.11