# **Prerequisite steps**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import json

with open('/content/drive/MyDrive/goemotions.json', 'r') as f:
  json_file = json.load(f)

In [None]:
import gensim.downloader as api
w2v_model = api.load("word2vec-google-news-300")

In [None]:
import nltk
#nltk.download()

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
json_file = np.asarray(json_file)
# 2.2 Splitting the dataset
training_set, test_set = train_test_split(json_file,train_size=0.8)

# training set
X_train = training_set[:, 0]
y_train = training_set[:, 1:]

# test set
X_test = test_set[:, 0]
y_test = test_set[:, 1:]

In [None]:
nltk.download('punkt')
word_tokens = [nltk.word_tokenize(corpus, language="english") for corpus in X_train]

**3.3**


In [None]:
def get_post_embedding(model, post):
    tokens = nltk.word_tokenize(post)
    words = [word for word in tokens if word in model]
    if len(words)>0:
        return np.mean(model[words], axis=0)
    else:
        return model['unk']

Define helper for reporting stats

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

def generate_stats(Y_test, Y_pred, output_filename):
  '''
  generate a report for the model
  '''
  # report
  print(classification_report(Y_test,Y_pred))
  plt.rcParams['figure.figsize'] = [20, 20]
  ConfusionMatrixDisplay.from_predictions(Y_test,Y_pred)
  plt.savefig(output_filename + ".pdf", format="pdf")
  plt.show()

# **3.5**

## Base MLP

In [None]:
# Training and predict
from sklearn.neural_network import MLPClassifier
vectorized_training_posts = [get_post_embedding(w2v_model, post) for post in X_train]
vectorized_test_posts = [get_post_embedding(w2v_model, post) for post in X_test]
emotions_train = y_train[:,0]
print("n of outputs emotions",len(emotions_train))
print("input length", len(vectorized_training_posts))

### **Emotions**

In [None]:
mlp_model_base_e = MLPClassifier(max_iter=100)
mlp_model_base_e.fit(vectorized_training_posts, emotions_train)

In [None]:
Y_pred = mlp_model_base_e.predict(vectorized_test_posts)

In [None]:
generate_stats(y_test[:,0], Y_pred, "Base mlp emotion embedding")

### **Sentiments**

In [None]:
# Training and predict

mlp_model_base_sen = MLPClassifier(max_iter=100)
mlp_model_base_sen.fit(vectorized_training_posts, y_train[:,1])

In [None]:
Y_pred = mlp_model_base_sen.predict(vectorized_test_posts)
generate_stats(y_test[:,1], Y_pred, "Base mlp sentiments embedding")

# **3.8 Running Base MLP on 2 others pre-trained model**

In [None]:
model_glove_twitter = api.load("glove-twitter-25")

In [None]:
model_glove_wiki_gigaword = api.load("glove-wiki-gigaword-300")

## **Training base MLP on twitter dataset**

In [None]:
# test
print(model_glove_twitter["unk"])

In [None]:
vectorized_training_posts = [get_post_embedding(model_glove_twitter, post) for post in X_train]
vectorized_test_posts = [get_post_embedding(model_glove_twitter, post) for post in X_test]

### Sentiments

In [None]:
# train
mlp_model_base_sen_twitter = MLPClassifier(max_iter=100)
mlp_model_base_sen_twitter.fit(vectorized_training_posts, y_train[:,1])

In [None]:
Y_pred = mlp_model_base_sen_twitter.predict(vectorized_test_posts)
generate_stats(y_test[:,1], Y_pred, "Base mlp sentiments embedding - Twitter-25 dimens")

### Emotions

In [None]:
# train
mlp_model_base_emo_twitter = MLPClassifier(max_iter=100)
mlp_model_base_emo_twitter.fit(vectorized_training_posts, y_train[:,0])

In [None]:
Y_pred = mlp_model_base_emo_twitter.predict(vectorized_test_posts)
generate_stats(y_test[:,0], Y_pred, "Top mlp emotions embedding - Twitter-25 dimens")

## **Training base MLP on glove wiki dataset**

In [None]:
print(model_glove_wiki_gigaword["unk"])

In [None]:
vectorized_training_posts = [get_post_embedding(model_glove_wiki_gigaword, post) for post in X_train]
vectorized_test_posts = [get_post_embedding(model_glove_wiki_gigaword, post) for post in X_test]

### Sentiments

In [None]:
# train
mlp_model_base_sen_wiki = MLPClassifier(max_iter=100)
mlp_model_base_sen_wiki.fit(vectorized_training_posts, y_train[:,1])

In [None]:
Y_pred = mlp_model_base_sen_wiki.predict(vectorized_test_posts)
generate_stats(y_test[:,1], Y_pred, "Base mlp sentiments embedding - Wiki-300 dimens")

### Emotions

In [None]:
# train
mlp_model_base_emo_wiki = MLPClassifier(max_iter=100)
mlp_model_base_emo_wiki.fit(vectorized_training_posts, y_train[:,0])

In [None]:
Y_pred = mlp_model_base_emo_wiki.predict(vectorized_test_posts)
generate_stats(y_test[:,0], Y_pred, "Base mlp emotions embedding - Wiki-300 dimens")