In [1]:
# import the labeled matrix of similarity dataframe
# apply random forest, svm, and gradient boosting tree to it
# report total accurany
# report emotion-wise accuracy

# import the labeled matrix of similarity dataframe
from google.colab import drive
import pandas as pd
drive.mount('/content/drive/')
similarity_matrix = pd.read_csv("/content/drive/MyDrive/featured_dataframe/featured_dataframe.csv")

Mounted at /content/drive/


In [2]:
# split dataset to features & labels
import numpy as np
from sklearn.model_selection import train_test_split

features = list(similarity_matrix.columns)[:-1]
labels = list.pop(list(similarity_matrix.columns))

X = np.array(similarity_matrix[features])           # convert similarity scores to 2-D numpy array
Y = np.array(similarity_matrix[labels]).ravel()     # conver label to 1-d objects

In [3]:
# import necessary modules for classification
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_validate, cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.ensemble import GradientBoostingClassifier

In [4]:
# function to get metrics from resulting dictionary
# and round them to 3 decimal digits
def getScores(dict):
  scores = ['test_accuracy', 'test_f1_macro', 'test_precision_macro', 'test_recall_macro']
  temp_ls= []
  for score in scores:
    temp = 0
    temp = round(dict[score].mean(), 3)
    temp_ls.append(temp)
  return temp_ls

total_metrics_df = pd.DataFrame(index=['Accuracy','F1-score','Precision', 'Recall'], columns=['Logistic_regression','Random_forest', 'SVM', 'Gradient_boosting'])

# Logistic regression code
clf = LogisticRegression(random_state=33).fit(X,Y)
clf.score(X,Y)
cv = cross_validate(clf, X, Y, cv=10, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'])
class_report_log_reg = classification_report(Y, cross_val_predict(clf, X, Y, cv=10), output_dict=True)              # for emotion-wise classification
total_metrics_df['Logistic_regression'] = getScores(cv)

# Random forest code
clf = RandomForestClassifier(n_estimators=100)
cv = cross_validate(clf, X, Y, cv=10,scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'])
class_report_rand_fors = classification_report(Y, cross_val_predict(clf, X, Y, cv=10), output_dict=True)            # for emotion-wise classification
total_metrics_df['Random_forest'] = getScores(cv)

# SVM code
clf = svm.SVC(kernel='rbf', C=1, random_state=42)
cv = cross_validate(clf, X, Y, cv=10, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'])
cross_val_predict(clf, X, Y, cv=10)
class_report_svm = classification_report(Y, cross_val_predict(clf, X, Y, cv=10), output_dict=True)                  # for emotion-wise classification
total_metrics_df['SVM'] = getScores(cv)

# Gradient_boosting code
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
cv = cross_validate(clf, X, Y, cv=10, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'])
class_report_gb = classification_report(Y, cross_val_predict(clf, X, Y, cv=10), output_dict=True)                   # for emotion-wise classification

total_metrics_df['Gradient_boosting'] = getScores(cv)

total_metrics_df

Unnamed: 0,Logistic_regression,Random_forest,SVM,Gradient_boosting
Accuracy,0.673,0.695,0.721,0.695
F1-score,0.669,0.694,0.719,0.693
Precision,0.671,0.696,0.723,0.695
Recall,0.673,0.695,0.721,0.695


In [5]:
# create and fill emotion-wise dataframe
iterables = [["anger", "disgust", "fear", "joy", "sadness"], ["accuracy", "f1-score", "precision", "recall"]]

index = pd.MultiIndex.from_product(iterables, names=["Emotion", "Metrics"])

emotion_wise_df = pd.DataFrame(index = index, columns=['(Logistic regression)', '(Random_Forest)', '(SVM)', '(G-Boosted_Tree)'])
list1 = ['anger', 'disgust', 'fear', 'joy', 'sadness']
list2 = ['f1-score', 'precision', 'recall']

# FOR RANDOM--FOREST
for i in list1:
  for j in list2:
    emotion_wise_df['(Random_Forest)'][i][j] = round(class_report_rand_fors[i][j], ndigits=2)
    emotion_wise_df['(Random_Forest)'][i]['accuracy'] = round(class_report_rand_fors['accuracy'], ndigits=2)

# FOR SVM
for i in list1:
  for j in list2:
    emotion_wise_df['(SVM)'][i][j] = round(class_report_svm[i][j], ndigits=2)
    emotion_wise_df['(SVM)'][i]['accuracy'] = round(class_report_svm['accuracy'], ndigits=2)


# (G-Boosting_Tree)
for i in list1:
  for j in list2:
    emotion_wise_df['(G-Boosted_Tree)'][i][j] = round(class_report_gb[i][j], ndigits=2)
    emotion_wise_df['(G-Boosted_Tree)'][i]['accuracy'] = round(class_report_gb['accuracy'], ndigits=2)


# (Logistic regression)
for i in list1:
  for j in list2:
    emotion_wise_df['(Logistic regression)'][i][j] = round(class_report_log_reg[i][j], ndigits=2)
    emotion_wise_df['(Logistic regression)'][i]['accuracy'] = round(class_report_log_reg['accuracy'], ndigits=2)

In [6]:
emotion_wise_df

Unnamed: 0_level_0,Unnamed: 1_level_0,(Logistic regression),(Random_Forest),(SVM),(G-Boosted_Tree)
Emotion,Metrics,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
anger,accuracy,0.67,0.69,0.72,0.7
anger,f1-score,0.6,0.6,0.64,0.6
anger,precision,0.58,0.58,0.6,0.59
anger,recall,0.61,0.62,0.69,0.61
disgust,accuracy,0.67,0.69,0.72,0.7
disgust,f1-score,0.55,0.58,0.61,0.59
disgust,precision,0.58,0.6,0.68,0.62
disgust,recall,0.52,0.57,0.56,0.56
fear,accuracy,0.67,0.69,0.72,0.7
fear,f1-score,0.65,0.71,0.73,0.7
