<a href="https://colab.research.google.com/github/odedgolden/Affective-Computing/blob/master/DimensionsOfEmotions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# imports

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import numpy as np
import pandas as pd
from gensim.models import Word2Vec
import gensim.downloader as api
from sklearn.decomposition import TruncatedSVD, PCA
import random

In [0]:
def load(model_name):
  '''
  Downloading and loading model into memory, as a dictionary of arrays, the keys are the words.
  '''
  wv_from_bin = api.load(model_name)
  vocab = list(wv_from_bin.vocab.keys())
  print("Loaded vocab size %i" % len(vocab))
  return wv_from_bin
  

In [0]:
model_names = ["glove-wiki-gigaword-300","glove-wiki-gigaword-50","word2vec-google-news-300","glove-twitter-200","fasttext-wiki-news-subwords-300"]
model = load(model_names[0])

In [0]:
def reduce_dim(model, number_of_vectors=10000, required_words=["car"],n_components=2):
  '''
  A function that takes a model and reduces it to 3D vectors. 
  Since the reduction procedure is pretty heavy and might take a lot of time, this
  function recieves two additional parameters:
  number_of_vectors: you should first truncate your model to only this number of vectors. Make sure to shuffle the model first, so that you choose random vectors.
  required_words: in addition to the number_of_vectors vectors you keep, you should keep the vectors of these words.
  Return the new, reduced model.
  '''
  model_list = list(model.vocab.keys())
  random.shuffle(model_list)
  short_list = list(required_words)
  i = 0
  while len(short_list) < number_of_vectors:
    short_list.append(model_list[i])
    # print(model_list[i])
    i = i + 1
  print(len(short_list))
  X = []
  for word in short_list:
    X.append(model[word])
  print(len(X))
  reducer = PCA(n_components=n_components)
  reduced_model = reducer.fit_transform(X)
  reduced_dict = {short_list[i]:reduced_model[i] for i in range(len(short_list))}

  return reduced_dict

In [0]:
model_list = list(model.vocab.keys())
len(model_list)

In [0]:
number_of_vectors = 400000
required_words = ('admiration', 'adoration', 'appreciation',  'amusement', 'anger', 'anxiety', 'awe', 'awkwardness', 'boredom',  'calmness', 'confusion', 'craving', 'disgust', 'pain',  'delectation', 'excitement', 'fear', 'horror', 'interest', 'joy', 'nostalgia',  'relief', 'romance', 'sadness', 'satisfaction', 'desire', 'surprise')

reduced_model = reduce_dim(model,number_of_vectors=number_of_vectors,required_words=required_words,n_components=3)

In [0]:
len(reduced_model.keys())

In [0]:
words_embeddings = {word:reduced_model[word] for word in required_words}
df = pd.DataFrame(data=words_embeddings).T
df.columns =['a', 'b','c']

In [0]:
df.head(30)

In [0]:
def show_embeddings(data, labels):
    plt.figure(figsize=(18, 16), dpi= 80)
    plt.scatter(data[:, 0], data[:, 1], marker='o')

    for label, x, y in zip(labels, data[:, 0], data[:, 1]):
        plt.annotate(
            label,
            xy=(x, y), xytext=(-20, 20),
            textcoords='offset points', ha='right', va='bottom',
            bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.2),
            arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))

# Testing  
labels = ['admiration', 'adoration', 'appreciation',  'amusement', 'anger', 'anxiety', 'awe', 'awkwardness', 'boredom',  'calmness', 'confusion', 'craving', 'disgust', 'pain',  'delectation', 'excitement', 'fear', 'horror', 'interest', 'joy', 'nostalgia',  'relief', 'romance', 'sadness', 'satisfaction', 'desire', 'surprise']

print(len(labels))
vectors = np.array([reduced_model[word] for word in list(labels)])
# vectors
show_embeddings(vectors, labels)

In [0]:
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import

ax = plt.figure(figsize=(30,20)).gca(projection='3d')
ax.scatter(
    xs=df["a"], 
    ys=df["b"], 
    zs=df["c"], 
    cmap='tab10',
    c = [float(i)/(27) for i in range(27)]
)


for label, x, y, z in zip(df.index, df['a'], df['b'], df['c']):
    ax.text(x, y, z, label)

ax.set_xlabel('pca-one')
ax.set_ylabel('pca-two')
ax.set_zlabel('pca-three')
plt.show()