**Reading word embeddings from GloVe model**

- GloVe provides 50, 100, 200, and 300 dimensional embeddings for words. 
- These embeddings are simply looked up and re-used in this paper

In [1]:
import pickle
import time
import numpy as np

In [2]:
class GloVeWordEmbedder:
    def __init__(self, pathGloveFile, pathPickleFile):
        self.pathGloveFile = pathGloveFile
        self.pathPickleFile = pathPickleFile
        self.wordToVectorMap = {}
        self.loadAndParseGloVeData()
    
    def loadAndParseGloVeData(self):
        print("Creating wordToVectorMap for GloVe data:")
        startTime = time.time()
        with open(self.pathGloveFile, 'r', encoding="utf-8") as file:
            for line in file:
                values = line.strip().split()
                word = values[0]
                vector = np.asarray(values[1:], "float32")
                self.wordToVectorMap[word] = vector
        print("Done - Creating wordToVectorMap for GloVe data")
        endTime = time.time()
        loadTime = endTime - startTime
        print(f"Took {loadTime} seconds to read GloVe data")
    
    def getWordEmbedding(self, word):
        return self.wordToVectorMap.get(word.lower(), np.zeros(100))
    
    def getSentenceEmbedding(self, sentence):
        sentenceEmbedding = {}
        words = sentence.split()
        for index in range(len(words)):
            sentenceEmbedding[index] = self.getWordEmbedding(words[index])
        return sentenceEmbedding
            
    def saveGloVeWordEmbeddingToPickle(self):
        with open(self.file_pickle, 'wb') as file:
            pickle.dump(self.wordToVectorMap, file)

In [3]:
pathGloVe100dFile = '../BME Corpora/glove.6B/glove.6B.100d.txt'
pathPickleFile = "glove100dEmbeddings.pickle"

wordEmbedder = GloVeWordEmbedder(pathGloVe100dFile, pathPickleFile)

Creating wordToVectorMap for GloVe data:
Done - Creating wordToVectorMap for GloVe data
Took 7.243454933166504 seconds to read GloVe data


In [4]:
exampleSentence = "The cat is sad due to rain"
exampleEmbedding = wordEmbedder.getSentenceEmbedding(exampleSentence)
print(exampleEmbedding)

{0: array([-0.038194, -0.24487 ,  0.72812 , -0.39961 ,  0.083172,  0.043953,
       -0.39141 ,  0.3344  , -0.57545 ,  0.087459,  0.28787 , -0.06731 ,
        0.30906 , -0.26384 , -0.13231 , -0.20757 ,  0.33395 , -0.33848 ,
       -0.31743 , -0.48336 ,  0.1464  , -0.37304 ,  0.34577 ,  0.052041,
        0.44946 , -0.46971 ,  0.02628 , -0.54155 , -0.15518 , -0.14107 ,
       -0.039722,  0.28277 ,  0.14393 ,  0.23464 , -0.31021 ,  0.086173,
        0.20397 ,  0.52624 ,  0.17164 , -0.082378, -0.71787 , -0.41531 ,
        0.20335 , -0.12763 ,  0.41367 ,  0.55187 ,  0.57908 , -0.33477 ,
       -0.36559 , -0.54857 , -0.062892,  0.26584 ,  0.30205 ,  0.99775 ,
       -0.80481 , -3.0243  ,  0.01254 , -0.36942 ,  2.2167  ,  0.72201 ,
       -0.24978 ,  0.92136 ,  0.034514,  0.46745 ,  1.1079  , -0.19358 ,
       -0.074575,  0.23353 , -0.052062, -0.22044 ,  0.057162, -0.15806 ,
       -0.30798 , -0.41625 ,  0.37972 ,  0.15006 , -0.53212 , -0.2055  ,
       -1.2526  ,  0.071624,  0.70565 ,  0.4974