In [1]:
import matplotlib
import pandas as pd
import json
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# General Analysis

Data gathered in the pilot study is included in the analysis.

TL;DR: 
- Overall accuracy is 0.36.
- Accuracy for negative emotions is 0.38
- Accuracy for positive emotions is 0.35


In [2]:
from py_sentimotion_mapper.sentimotion_mapper import Mapper
from coding_interpreter.metadata import Metadata

In [3]:
Mapper._load_data_if_needed()

In [4]:
df = pd.read_csv("data/completed_export.csv")
df

## Results overview

In [5]:
y_true = df[["emotion_id"]]
y_pred = df[["emotion_id_reply"]]

In [6]:
report = classification_report(y_true, y_pred,
                               target_names=Mapper.emotion_id_to_emotion.values())

print(report)

In [7]:
conf_matrix = confusion_matrix(y_true, y_pred, normalize="true")

emotion_ids = np.unique(y_true)
emotion_names = Mapper.get_emotion_from_id(emotion_ids)
df_cm = pd.DataFrame(conf_matrix, list(emotion_names), list(emotion_names))


plt.figure(figsize=(20, 15))
sns.heatmap(df_cm, cmap="Blues", annot=True, fmt=".1f")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

## Separating negative and positive emotions

### Negative Emotions

In [8]:
negative_emotions = []
for key, val in Mapper.emotion_to_valence.items():
    if val == "neg" or val == "neu":
        negative_emotions.append(key)

In [9]:
neg_df = df[df['valence'] == 'neg']
y_true = neg_df[["emotion_id"]]
y_pred = neg_df[["emotion_id_reply"]]

report = classification_report(y_true, y_pred,
                               target_names=negative_emotions)

print(report)

In [10]:
conf_matrix = confusion_matrix(y_true, y_pred, normalize="true")

emotion_ids = np.unique(y_true)
emotion_names = Mapper.get_emotion_from_id(emotion_ids)
df_cm = pd.DataFrame(conf_matrix, list(emotion_names), list(emotion_names))

plt.figure(figsize=(12, 10))
sns.heatmap(df_cm, cmap="Blues", annot=True, fmt=".1f", annot_kws={"size": 8})
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

### Positive Emotions

In [11]:
positive_emotions = []
for key, val in Mapper.emotion_to_valence.items():
    if val == "pos" or val == "neu":
        positive_emotions.append(key)

In [12]:
pos_df = df[df['valence'] == 'pos']
y_true = pos_df[["emotion_id"]]
y_pred = pos_df[["emotion_id_reply"]]

report = classification_report(y_true, y_pred,
                               target_names=positive_emotions)

print(report)

In [13]:
conf_matrix = confusion_matrix(y_true, y_pred, normalize="true")

emotion_ids = np.unique(y_true)
emotion_names = Mapper.get_emotion_from_id(emotion_ids)
df_cm = pd.DataFrame(conf_matrix, list(emotion_names), list(emotion_names))

plt.figure(figsize=(12, 10))
sns.heatmap(df_cm, cmap="Blues", annot=True, fmt=".1f", annot_kws={"size": 8})
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()