In [1]:
import itertools
import pandas as pd
import numpy as np
import re
import os
from tqdm import tqdm

# Drawing the embeddings
import matplotlib.pyplot as plt

# Deep learning: 
from keras.models import Input, Model
from keras.layers import Dense

from scipy import sparse

In [2]:
# Reading the text from the input folder
texts = pd.read_table('w2v.txt')
# texts = [x for x in texts['text']]
print(texts)

   In linguistics word embeddings were discussed in the research area of distributional semantics. It aims to quantify and categorize semantic similarities between linguistic items based on their distributional properties in large samples of language data. The underlying idea that "a word is characterized by the company it keeps" was popularized by Firth.
0   The technique of representing words as vectors...                                                                                                                                                                                                                                                                                                                
1   There are many branches and many research grou...                                                                                                                                                                                                                                       

In [3]:
my_file = open("w2v.txt", "r")
content_list = my_file.readlines()
print(content_list)

['In linguistics word embeddings were discussed in the research area of distributional semantics. It aims to quantify and categorize semantic similarities between linguistic items based on their distributional properties in large samples of language data. The underlying idea that "a word is characterized by the company it keeps" was popularized by Firth.\n', '\n', 'The technique of representing words as vectors has roots in the 1960s with the development of the vector space model for information retrieval. Reducing the number of dimensions using singular value decomposition then led to the introduction of latent semantic analysis in the late 1980s.In 2000 Bengio et al. provided in a series of papers the "Neural probabilistic language models" to reduce the high dimensionality of words representations in contexts by "learning a distributed representation for words". (Bengio et al, 2003). Word embeddings come in two different styles, one in which words are expressed as vectors of co-occur

In [4]:
my_file = open("w2v.txt", "r")
content = my_file.read()
print(content)

In linguistics word embeddings were discussed in the research area of distributional semantics. It aims to quantify and categorize semantic similarities between linguistic items based on their distributional properties in large samples of language data. The underlying idea that "a word is characterized by the company it keeps" was popularized by Firth.

The technique of representing words as vectors has roots in the 1960s with the development of the vector space model for information retrieval. Reducing the number of dimensions using singular value decomposition then led to the introduction of latent semantic analysis in the late 1980s.In 2000 Bengio et al. provided in a series of papers the "Neural probabilistic language models" to reduce the high dimensionality of words representations in contexts by "learning a distributed representation for words". (Bengio et al, 2003). Word embeddings come in two different styles, one in which words are expressed as vectors of co-occurring words, 

In [5]:
# Defining the window for context
window = 2

# Creating a placeholder for the scanning of the word list
word_lists = []
all_text = []


In [6]:
# Defining the window for context
window = 2

In [7]:

def create_unique_word_dict(text:list) -> dict:
    """
    A method that creates a dictionary where the keys are unique words
    and key values are indices
    """
    # Getting all the unique words from our text and sorting them alphabetically
    words = list(set(text))
    words.sort()

    # Creating the dictionary for the unique words
    unique_word_dict = {}
    for i, word in enumerate(words):
        unique_word_dict.update({
            word: i
        })

    return unique_word_dict    

def text_preprocessing(
    text:list,
    punctuations = r'''!()-[]{};:'"\,<>./?@#$%^&*_“~''',
    stop_words=['and', 'a', 'is', 'the', 'in', 'be', 'will','on','to','an','are']
    )->list:
    """
    A method to preproces text
    """
    for x in text.lower(): 
        if x in punctuations: 
            text = text.replace(x, "")

    # Removing words that have numbers in them
    text = re.sub(r'\w*\d\w*', '', text)

    # Removing digits
    text = re.sub(r'[0-9]+', '', text)

    # Cleaning the whitespaces
    text = re.sub(r'\s+', ' ', text).strip()

    # Setting every word to lower
    text = text.lower()

    # Converting all our text to a list 
    text = text.split(' ')

    # Droping empty strings
    text = [x for x in text if x!='']

    # Droping stop words
    text = [x for x in text if x not in stop_words]

    return text

# Functions to find the most similar word 
def euclidean(vec1:np.array, vec2:np.array) -> float:
    """
    A function to calculate the euclidean distance between two vectors
    """
    return np.sqrt(np.sum((vec1 - vec2)**2))

def find_similar(word:str, embedding_dict:dict, top_n=10)->list:
    """
    A method to find the most similar word based on the learnt embeddings
    """
    dist_dict = {}
    word_vector = embedding_dict.get(word, [])
    if len(word_vector) > 0:
        for key, value in embedding_dict.items():
            if key!=word:
                dist = euclidean(word_vector, value)
                dist_dict.update({
                    key: dist
                })

        return sorted(dist_dict.items(), key=lambda x: x[1])[0:top_n]       

In [8]:
# Creating a placeholder for the scanning of the word list
word_lists = []
all_text = []

for text in texts:

    # Cleaning the text
    text = text_preprocessing(text)

    # Appending to the all text list
    all_text += text 

    # Creating a context dictionary
    for i, word in enumerate(text):
        for w in range(window):
            # Getting the context that is ahead by *window* words
            if i + 1 + w < len(text): 
                word_lists.append([word] + [text[(i + 1 + w)]])
            # Getting the context that is behind by *window* words    
            if i - w - 1 >= 0:
                word_lists.append([word] + [text[(i - w - 1)]])

In [9]:
word_lists

[['linguistics', 'word'],
 ['linguistics', 'embeddings'],
 ['word', 'embeddings'],
 ['word', 'linguistics'],
 ['word', 'were'],
 ['embeddings', 'were'],
 ['embeddings', 'word'],
 ['embeddings', 'discussed'],
 ['embeddings', 'linguistics'],
 ['were', 'discussed'],
 ['were', 'embeddings'],
 ['were', 'research'],
 ['were', 'word'],
 ['discussed', 'research'],
 ['discussed', 'were'],
 ['discussed', 'area'],
 ['discussed', 'embeddings'],
 ['research', 'area'],
 ['research', 'discussed'],
 ['research', 'of'],
 ['research', 'were'],
 ['area', 'of'],
 ['area', 'research'],
 ['area', 'distributional'],
 ['area', 'discussed'],
 ['of', 'distributional'],
 ['of', 'area'],
 ['of', 'semantics'],
 ['of', 'research'],
 ['distributional', 'semantics'],
 ['distributional', 'of'],
 ['distributional', 'it'],
 ['distributional', 'area'],
 ['semantics', 'it'],
 ['semantics', 'distributional'],
 ['semantics', 'aims'],
 ['semantics', 'of'],
 ['it', 'aims'],
 ['it', 'semantics'],
 ['it', 'quantify'],
 ['it', '

In [10]:
unique_word_dict = create_unique_word_dict(all_text)

# Defining the number of features (unique words)
n_words = len(unique_word_dict)

# Getting all the unique words 
words = list(unique_word_dict.keys())

# Creating the X and Y matrices using one hot encoding
X = []
Y = []

for i, word_list in tqdm(enumerate(word_lists)):
    # Getting the indices
    main_word_index = unique_word_dict.get(word_list[0])
    context_word_index = unique_word_dict.get(word_list[1])

    # Creating the placeholders   
    X_row = np.zeros(n_words)
    Y_row = np.zeros(n_words)

    # One hot encoding the main word
    X_row[main_word_index] = 1

    # One hot encoding the Y matrix words 
    Y_row[context_word_index] = 1

    # Appending to the main matrices
    X.append(X_row)
    Y.append(Y_row)

# Converting the matrices into a sparse format because the vast majority of the data are 0s
# X = sparse.csr_matrix(X)
# Y = sparse.csr_matrix(Y)

# Defining the size of the embedding
embed_size = 2

# Defining the neural network
inp = Input(shape=(len(X),))
x = Dense(units=embed_size, activation='linear')(inp)
x = Dense(units=len(Y), activation='softmax')(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')

158it [00:00, 158124.56it/s]


In [11]:
X

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [12]:
# Optimizing the network weights
model.fit(
    x=X, 
    y=Y, 
    batch_size=10,
    epochs=1000
    )

Epoch 1/1000


ValueError: in user code:

    C:\Users\nismai01\Anaconda3\lib\site-packages\keras\engine\training.py:830 train_function  *
        return step_function(self, iterator)
    C:\Users\nismai01\Anaconda3\lib\site-packages\keras\engine\training.py:813 run_step  *
        outputs = model.train_step(data)
    C:\Users\nismai01\Anaconda3\lib\site-packages\keras\engine\training.py:770 train_step  *
        y_pred = self(x, training=True)
    C:\Users\nismai01\Anaconda3\lib\site-packages\keras\engine\base_layer.py:989 __call__  *
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\Users\nismai01\Anaconda3\lib\site-packages\keras\engine\input_spec.py:197 assert_input_compatibility  *
        raise ValueError('Layer ' + layer_name + ' expects ' +

    ValueError: Layer model expects 1 input(s), but it received 158 input tensors. Inputs received: [<tf.Tensor 'ExpandDims:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_1:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_2:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_3:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_4:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_5:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_7:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_8:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_9:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_10:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_11:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_12:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_13:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_14:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_15:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_16:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_17:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_18:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_19:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_20:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_21:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_22:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_23:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_24:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_25:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_26:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_27:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_28:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_29:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_30:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_31:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_32:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_33:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_34:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_35:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_36:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_37:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_38:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_39:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_40:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_41:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_42:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_43:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_44:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_45:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_46:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_47:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_48:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_49:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_50:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_51:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_52:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_53:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_54:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_55:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_56:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_57:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_58:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_59:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_60:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_61:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_62:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_63:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_64:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_65:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_66:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_67:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_68:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_69:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_70:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_71:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_72:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_73:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_74:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_75:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_76:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_77:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_78:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_79:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_80:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_81:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_82:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_83:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_84:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_85:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_86:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_87:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_88:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_89:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_90:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_91:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_92:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_93:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_94:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_95:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_96:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_97:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_98:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_99:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_100:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_101:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_102:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_103:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_104:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_105:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_106:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_107:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_108:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_109:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_110:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_111:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_112:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_113:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_114:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_115:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_116:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_117:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_118:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_119:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_120:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_121:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_122:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_123:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_124:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_125:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_126:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_127:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_128:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_129:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_130:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_131:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_132:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_133:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_134:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_135:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_136:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_137:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_138:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_139:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_140:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_141:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_142:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_143:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_144:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_145:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_146:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_147:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_148:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_149:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_150:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_151:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_152:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_153:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_154:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_155:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_156:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'ExpandDims_157:0' shape=(None, 1) dtype=float32>]


In [None]:
# Obtaining the weights from the neural network. 
# These are the so called word embeddings
# The input layer 
weights = model.get_weights()[0]
print(weights)

In [None]:
# Creating a dictionary to store the embeddings in. The key is a unique word and 
# the value is the numeric vector
embedding_dict = {}
for word in words: 
    embedding_dict.update({
        word: weights[unique_word_dict.get(word)]
        })

In [None]:
# Ploting the embeddings
plt.figure(figsize=(10, 10))
for word in list(unique_word_dict.keys()):
    coord = embedding_dict.get(word)
    plt.scatter(coord[0], coord[1])
    plt.annotate(word, (coord[0], coord[1]))       
