In [65]:
import numpy as np
left_vectors = np.loadtxt("left_vectors.tsv", dtype=float)
right_vectors = np.loadtxt("right_vectors.tsv", dtype=float)
center_vectors = np.loadtxt("center_vectors.tsv", dtype=float)

In [66]:
with open("left_metadata.tsv", "r") as file:
    left_metadata = [i[:-1] for i in file.readlines()]
with open("right_metadata.tsv", "r") as file:
    right_metadata = [i[:-1] for i in file.readlines()]
with open("center_metadata.tsv", "r") as file:
    center_metadata = [i[:-1] for i in file.readlines()]

In [67]:
def left_embedding(word):
    return left_vectors[left_metadata.index(word)]
def right_embedding(word):
    return right_vectors[right_metadata.index(word)]
def center_embedding(word):
    return center_vectors[center_metadata.index(word)]

In [68]:
import torch.nn as nn
import torch

def normalize(word_vec):
    norm=np.linalg.norm(word_vec)
    if norm == 0: 
       return word_vec
    return word_vec/norm

def cos_sim(x1, x2):
    cos = nn.CosineSimilarity(dim=0)
    x1, x2 = torch.from_numpy(normalize(x1)), torch.from_numpy(normalize(x2))
    return cos(x1, x2)


In [69]:
def compute_polarity(word):
    return cos_sim(left_embedding(word), right_embedding(word))


In [70]:
polarities = []
for word in left_metadata:
    try:
        polarities.append((word, float(compute_polarity(word))))
    except:
        pass

In [71]:
print("The most polarized words:")
print(sorted(polarities, key=lambda x: x[1])[:20])

The most polarized words:
[('independents', -0.33137445492034795), ('refusing', -0.27016478278007794), ('proof', -0.2696272574429734), ('ticket', -0.2563066682687268), ('front', -0.2546644095262576), ('statistics', -0.24401645701046681), ('2003', -0.23589265368008608), ('dakota', -0.23425577306044415), ('institute', -0.23176018429762715), ('increasing', -0.2314189935904756), ('provide', -0.23029745115339148), ('establish', -0.223101310546191), ('journalist', -0.22199345665023412), ('products', -0.2155304280852802), ('rubio', -0.2124818594080773), ('cbs', -0.21223217905286174), ('maine', -0.2103628847498687), ('stick', -0.20704330888725292), ('type', -0.20634811576975878), ('lead', -0.20442447804244332)]


In [72]:
print("The least polarized words:")
print(sorted(polarities, key=lambda x: x[1], reverse=True)[:20])

The least polarized words:
[('basic', 0.3475867057975009), ('robert', 0.3263069936615713), ('again', 0.31756580980308663), ('rhetoric', 0.30845891918928575), ('governors', 0.3079882937589179), ('irs', 0.30540782363014296), ('art', 0.30202571150908625), ('preserve', 0.2946736823351504), ('critic', 0.2881797486255515), ('peace', 0.286346555728567), ('second', 0.2849825950080656), ('operations', 0.27759867673627736), ('workers', 0.26273140499756314), ('to', 0.25987834877237515), ('louisiana', 0.2543764285822426), ('harm', 0.2533405770307847), ('moved', 0.2509741036312394), ('vladimir', 0.24778741869642257), ('loyal', 0.2469642211554301), ('december', 0.2454362571320371)]


In [73]:
left_metadata[3773], left_metadata[1904]

('extensive', 'writes')