# Word Sense Disambiguation using a Sensegram

## Imports and Initializations

We need to import `numpy` for working with arrays.

In [1]:
import numpy as np
import pprint

pp = pprint.PrettyPrinter(indent=2)

## Loading the Sensegram

In [2]:
SENSEGRAM_PATH = "/Users/sounak/Documents/clg/nlp/nlp-projects/data/sensegrams_of_wikipedia_cluster"
f = open(SENSEGRAM_PATH, 'r')
sense_vecs = {}
pos_tags = set()

for line in f.readlines()[:50]:
    t = line.split('\t')
    word, pos = t[0].split('#')
    pos_tags.add(pos)
    if t[1] == '0':
        sense_vecs[(word, pos)] = []
    sense_vecs[(word, pos)].append(np.array(eval(t[2])))
f.close()

pp.pprint(sense_vecs)

{ ('alive', 'JJ'): [ array([-1.59063106e-01,  1.10649628e-01,  3.47384148e-02, -3.25175424e-02,
        2.60145298e-03,  5.72741343e-02,  1.70078851e-02,  3.12158184e-01,
        1.13559698e-01, -9.27139072e-01,  7.10475325e-02, -1.88909053e-02,
       -7.35222350e-02,  9.77083318e-03,  1.88126679e-01,  2.45764709e-02,
       -5.74707324e-02, -2.36559028e-02, -2.10848420e-01,  3.42515297e-01,
       -1.30315754e-01,  2.72998521e-01,  1.27925372e-01, -1.24347534e-01,
       -1.15588574e-01, -9.65191186e-02,  6.49304999e-02,  3.18288807e-02,
        2.36995031e-01,  2.11731644e-01, -1.49647304e-02,  3.52423252e-01,
       -3.66150055e-01, -2.37137657e-01, -1.29556232e-01, -2.54648548e-01,
        5.44347884e-01,  1.04886639e-01,  1.14384377e-01,  1.27452830e-01,
        3.24091672e-01, -3.81147447e-01,  1.09404917e-01, -3.31970615e-02,
        6.29102486e-02,  3.02109972e-02,  3.35712453e-01, -2.38386016e-02,
       -1.83925595e-01, -4.85744819e-02, -4.61066330e-02, -2.15525284e-01,
    

  ('live', 'NN'): [ array([ 0.47889   , -0.39152002, -0.11996   ,  0.062961  , -0.16415   ,
       -0.21241   , -0.50242   ,  0.0023416 ,  0.67234   ,  1.1479    ,
        0.045305  ,  0.44034   , -0.27305   , -0.90692   ,  0.097391  ,
        0.60718   ,  0.34625   ,  0.53584   ,  0.20981999,  0.46927   ,
       -0.63331   , -0.40731   , -0.08518   , -0.48626   , -0.089702  ,
       -0.14191   ,  0.21615   ,  0.61224   , -0.41363004, -0.6939    ,
        0.1258    ,  0.07667399,  1.3246999 ,  0.51735   ,  0.36795   ,
        0.29125   , -0.31452   , -0.15412   ,  0.0082182 ,  0.39155   ,
        0.24385   ,  0.25821   , -0.15278   , -0.96564   , -0.18687   ,
        0.10497   ,  0.51135   , -0.11984   ,  0.092575  ,  0.23587   ,
       -0.27206   ,  0.55104   , -0.26553   , -0.35069   , -0.14272   ,
       -0.63484   , -0.28481   , -0.3151    ,  0.61542   ,  0.26223   ,
       -0.2577    , -0.13471   , -0.71835   , -0.36596   ,  0.38334998,
        0.064157  , -0.059127  , -0.40507   

## Loading the Glove Model

In [3]:
GLOVE_PATH = "/Users/sounak/Documents/clg/nlp/nlp-projects/data/glove.6B.300d.txt"
f = open(GLOVE_PATH, 'r')
word_vecs = {}

words = {}
for line in f.readlines()[:50]:
    t = line.split(' ')
    word_vecs[t[0]] = np.array([float(_) for _ in t[1:]])
f.close()

pp.pprint(word_vecs)

{ '"': array([ 0.6947   ,  0.22184  ,  0.10526  ,  0.012382 , -0.2558   ,
       -0.32645  , -0.48287  ,  0.51755  , -0.0872   , -2.0289   ,
        0.35021  ,  0.045363 , -0.58554  , -0.16147  ,  0.35592  ,
       -0.12698  , -0.389    ,  0.027275 ,  0.10101  ,  0.010271 ,
        0.30711  ,  0.32771  ,  0.11769  ,  0.24432  ,  0.23224  ,
        0.88493  ,  0.084191 ,  0.019132 ,  0.41142  ,  0.17872  ,
        0.28685  ,  0.32401  , -0.1452   ,  0.13602  , -1.0021   ,
        0.15376  ,  0.27661  , -0.24772  , -0.62897  ,  0.47619  ,
       -0.35871  , -0.82761  , -0.41504  ,  0.43143  , -0.0085019,
        0.019796 ,  0.30738  , -0.06202  , -0.41845  , -0.0065542,
       -0.5164   ,  0.14782  , -0.0714   ,  0.14323  , -0.012224 ,
        0.22951  , -0.086194 ,  0.29748  ,  0.46222  , -0.56759  ,
        0.2868   ,  0.068718 ,  0.25969  ,  0.1065   , -0.38421  ,
       -0.24288  ,  0.3652   , -0.28178  ,  0.02779  ,  0.21189  ,
       -0.21838  , -0.26429  , -0.2053   , -0.050045 , 

  'which': array([-2.2232e-01,  2.3856e-01, -4.8047e-02, -2.5248e-01, -8.2310e-02,
        1.0927e-01, -1.5009e-02,  3.6736e-01,  9.7258e-02, -2.1902e+00,
        2.5305e-01,  1.5526e-01,  1.6028e-01, -2.5013e-01,  5.3829e-01,
        2.5968e-01, -3.7403e-01,  2.1711e-01,  1.1890e-01, -2.3249e-01,
       -2.5872e-02,  4.1508e-01,  1.5975e-01,  1.3737e-01, -2.5127e-01,
        2.7905e-02, -4.7097e-02, -1.6902e-01, -3.0578e-01,  1.6238e-01,
        2.7640e-01,  3.7674e-01, -2.1659e-01,  2.8674e-01, -4.1485e-01,
       -1.1651e-01,  1.1826e-01,  3.9304e-02, -1.8228e-01, -3.1823e-01,
       -2.5248e-01,  7.6281e-02, -3.2283e-01,  1.2200e-01,  5.5340e-02,
        9.5988e-02, -1.0934e-01, -1.1307e-01, -1.5736e-01,  2.5971e-01,
       -1.0435e-01, -1.1104e-02,  1.5253e-01,  4.1234e-02,  4.9770e-02,
       -1.5362e-01, -1.5927e-01,  3.2627e-01,  1.9822e-01, -1.6707e-01,
       -7.5370e-02,  9.8912e-02,  1.8531e-01, -2.5441e-01, -1.0133e-01,
       -2.3708e-01,  8.4025e-02,  2.9923e-01, -1.6771