In [0]:
import pandas as pd
import numpy as np
import random
from keras.models import Sequential,Model
from keras.models import Model
from keras.layers import Input, Dense, Activation, Reshape, Dot
from keras.layers import Concatenate, Dropout, Flatten, LSTM
from keras.layers.embeddings import Embedding
from sklearn.preprocessing import OneHotEncoder
from keras.utils import plot_model
#To get repeatable results OR To get stable results use seed as below. In this case we get same set of weights/embedings in every run
# If we omit below 4 lines at each run different weights/embedings are produced.
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)

emotionsList= ['like','antipathy', 'hostility','love','warmth','loathe','abhor','intimacy','dislike','venom','affection',
               'tenderness','animosity','attachment','infatuation','fondness','hate'] 

emoaffect= ['positive','negative', 'negative','positive','positive','negative','negative','positive','negative','negative','positive',
               'positive','negative','positive','positive','positive','negative']                
embedding_size = int(min(np.ceil((len(emotionsList))/2), 50 ))

#Creating a mapping of emotions to integers. When we feed emotions and emoaffect into the embedding neural network, we will have to represent them as numbers,
#and this mapping will let us keep track of the emotions.
emotion_index = {emotion: idx for idx, emotion in enumerate(emotionsList)}
index_emotion = {idx: emotion for emotion, idx in emotion_index.items()}

emoaffect_index = {effect: idx for idx, effect in enumerate(emoaffect)}
index_emoaffect = {idx: effect for effect, idx in emoaffect_index.items()}


In [0]:
#creating data for training
pairs = []
for emotion, effect in zip(emotionsList, emoaffect):
     pairs.append((emotion_index[emotion], emoaffect_index[effect]))    

#Below are for testing purpose
len(pairs), len(emoaffect), len(emotionsList)    
n=11
index_emotion[pairs[n][0]], index_emoaffect[pairs[n][1]]


('tenderness', 'positive')

In [0]:
#In order for any machine learning model to learn, it needs a training set. We are going to create a supervised learning model to train an embedding neural network.
#Given an emotion and its effect/type (Positive Or Negative) get the three closest emotions to given Type of emotion.

#We need to generate positive samples and negative samples to train the neural network. For positive samples: pick a pair from pairs and assign 
#it a 1. For negative samples: pick one random emotion and one random effect, make sure they are not in pairs, and assign them a 0. 

traindata_size = 30
traindata = np.zeros((traindata_size, 3))
for idx, (emotion_id, effect_id) in enumerate(pairs):
            traindata[idx, :] = (emotion_id, effect_id, 1)
idx += 1            
# Add negative samples until traindata_size is reached
while idx < 30:   
     # random selection
      random_emotion = random.randrange(len(emotionsList))
      random_effect = random.randrange(len(emoaffect))  

      # Check to make sure this is not a positive example
      if (random_emotion, random_effect) not in pairs:     
          # Add to batch and increment index
            traindata[idx, :] = (random_emotion, random_effect, 0)
            idx += 1  


np.random.shuffle(traindata)

In [0]:
#Here the objective is not to build the model with best accuracy, but to generate best embeddings. The supervised task is just the method through which we train our network 
#to make the embeddings. We are not going to test our model on new data, so we don't need to evaluate the performance.

#ValueError: Graph disconnected: cannot obtain value for tensor Tensor solved
max_seq_length = 1 
emotion_inputs = Input(shape=(max_seq_length,))
emotion_embedding = Embedding(len(emotionsList), embedding_size, input_length=max_seq_length, name='emotion_embedding')(emotion_inputs)

effect_inputs = Input(shape=(max_seq_length,))
effect_embedding = Embedding(len(emoaffect), embedding_size, input_length=max_seq_length, name='effect_embedding')(effect_inputs)

merged = Dot(name = 'dot_product', normalize = True, axes=2)([emotion_embedding, effect_embedding])
merged = Reshape(target_shape = (max_seq_length,))(merged)
binary_dense = Dense(16, activation='relu')(merged)
output = Dense(1, activation = 'sigmoid')(binary_dense)
model = Model(inputs = [emotion_inputs, effect_inputs], outputs = output)
model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.summary()






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
emotion_embedding (Embedding)   (None, 1, 9)         153         input_1[0][0]                    
__________________________________________________________________________________________________
effect_embedding (Embedding)    (None, 1, 9)         153         input_2[0][0]              

In [0]:
# Training the Model so that it can learn embeddings
# Extract target variable from traindata
y_train = traindata[:,2]
# Extract all columns except last one
X_train = traindata[:,:-1]
model.fit([X_train[:,0], X_train[:,1]],y_train,epochs=5,verbose=0)
#The list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 2 array(s),
#but instead got the following list of 1 array issue solved
#indices[0,0] = 16 is not in [0, 3) issue is solved

<keras.callbacks.History at 0x7f96c16aca90>

In [0]:


trained_embedding_layer = model.get_layer('emotion_embedding')
trained_emotion_weights = trained_embedding_layer.get_weights()[0]
#We need to normalize the embeddings so that the dot product between two embeddings becomes the cosine similarity.
trained_emotion_weights = trained_emotion_weights/ np.linalg.norm(trained_emotion_weights, axis = 1).reshape((-1, 1))

def find_synonyms(name, weights):
   #Calculate dot product between specific emotion and all others
   try:
       dists = np.dot(weights, weights[emotion_index[name]])       
   except KeyError:
        print("{} Not Found. Please enter an emotion from {}".format(name,emotionsList))
        return
  # Sort distance indexes from smallest to largest
   sorted_dists = np.argsort(dists)   
   closest = sorted_dists[-3:]
   print("Three closest emotions to {} are :".format(name))
   closest = closest[: : -1]
   for i,v in enumerate(closest):        
        print( index_emotion[v])

find_synonyms('hate', trained_emotion_weights)

Three closest emotions to hate are :
hate
venom
loathe
