In [None]:
import numpy as np
import pandas as pd
import data_utils as utils
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from scipy.spatial.distance import cdist
import importlib

def get_distances(x1: list, y1: list, x2: list, y2: list, labels: list):
    distances = []
    for i, emotion in enumerate(labels):
        dist = cdist([[x1[i], y1[i]]], [[x2[i], y2[i]]], 'euclidean')[0][0]
        print("%s: %f" % (emotion, dist))
        distances.append(dist)
    print("Average distance between points: %f" % (np.mean(distances)))

def get_averaged_points(emotion_encoding, x, y):
    unique_emotions_x = {}
    unique_emotions_y = {}
    for i in range(len(emotion_encoding.category_list)):
        emotion = emotion_encoding.category_list[i]
        if emotion not in unique_emotions_x:
            unique_emotions_x[emotion] = []
            unique_emotions_y[emotion] = []
        unique_emotions_x[emotion].append(x[i])
        unique_emotions_y[emotion].append(y[i])

    emotion_labels = list(unique_emotions_x.keys())
    x_avg = []
    y_avg = []
    for emotion in emotion_labels:
        x_avg.append(np.mean(unique_emotions_x[emotion]))
        y_avg.append(np.mean(unique_emotions_y[emotion]))
    return x_avg, y_avg, emotion_labels

def make_plot(axis_file: str, 
              emotion_file: str, 
              x_category_high: str, 
              x_category_low: str, 
              y_category_high: str, 
              y_category_low: str, 
              encoding_mode: str, 
              plot_labels: list, #[title, x_label, y_label]
              return_points=False, #[[x_points], [y_points], [emotion labels]]
              roberta_model_name=None,
              sent_model_name=None,
              emotion_filepath=None):
    if(encoding_mode == 'roberta' and roberta_model_name is None):
        raise ValueError("provide a roberta model name")

    if(encoding_mode == 'sentence_transformers' and sent_model_name is None):
        raise ValueError("provide a sentence transformer model name")
    
    # Define dimensions and get encodings
    axis_dimensions = utils.Emotion_Category(axis_file, roberta_model_name=roberta_model_name, sent_model_name=sent_model_name)
    axis_dimensions.contextualization("", official=False)
    axis_dimensions.new_encoding(mode=encoding_mode, mean_centered=False, official=False)
    
    # Define and create axes
    X_high = utils.generate_averaged_points(category=x_category_high, mode=encoding_mode, official=False, args=[axis_dimensions])
    X_low = utils.generate_averaged_points(category=x_category_low, mode=encoding_mode, official=False, args=[axis_dimensions])
    Y_high = utils.generate_averaged_points(category=y_category_high, mode=encoding_mode, official=False, args=[axis_dimensions])
    Y_low = utils.generate_averaged_points(category=y_category_low, mode=encoding_mode, official=False, args=[axis_dimensions])

    # Encode emotions to plot
    emotion_encoding = utils.Emotion_Category(emotion_file, roberta_model_name=roberta_model_name, sent_model_name=sent_model_name, filepath=emotion_filepath)
    emotion_encoding.contextualization("", official=False)
    emotion_encoding.new_encoding(mode=encoding_mode, mean_centered=False, official=False)
    if(encoding_mode == "bert"):
        x,y,_,_ = utils.project_points_onto_axes(emotion_encoding.bert_unofficial_embedding, X_low, X_high, Y_low, Y_high)
    elif(encoding_mode == "fasttext"):
        x,y,_,_ = utils.project_points_onto_axes(emotion_encoding.fasttext_unofficial_embedding, X_low, X_high, Y_low, Y_high)        
    elif(encoding_mode == "roberta"):
        x,y,_,_ = utils.project_points_onto_axes(emotion_encoding.roberta_unofficial_embedding, X_low, X_high, Y_low, Y_high)

    x_avg, y_avg, emotion_labels = get_averaged_points(emotion_encoding, x, y)
   
    x_axis,y_axis,_,_ = utils.project_points_onto_axes([X_low, X_high, Y_low, Y_high], X_low, X_high, Y_low, Y_high)
    axis_labels = [x_category_low, x_category_high, y_category_low, y_category_high]

    # origin_x, origin_y = utils.line_intersection(x_axis, y_axis)
    # x_avg -= origin_x
    # y_avg -= origin_y
    # x_axis -= origin_x
    # y_axis -= origin_y

    if(return_points):
        return([x_avg, y_avg, emotion_labels])
    else:
        plt.figure(figsize=(7, 7))
        plt.title(plot_labels[0])
        plt.xlabel(plot_labels[1])
        plt.ylabel(plot_labels[2])
    
        plt.scatter(0,0)
        plt.annotate("(0,0)", (0,0))

        plt.scatter(x_avg, y_avg)
        for i, emotion in enumerate(emotion_labels):
            plt.annotate(emotion, (x_avg[i], y_avg[i]))
        
        plt.scatter(x_axis, y_axis)
        for i, emotion in enumerate(axis_labels):
            plt.annotate(emotion, (x_axis[i], y_axis[i]))
        
        # radius = np.average([abs(x_axis[0]), x_axis[1], abs(y_axis[2]), y_axis[3]])
        # ax = plt.add_subplot()
        # circle1 = patches.Circle((0, 0), radius, color='g', fill=False)
        # ax.add_patch(circle1)
        # ax.axis('equal')

        plt.grid()
        return([x_avg, y_avg, emotion_labels])

In [None]:
importlib.reload(utils)

### English

In [None]:
sbert_x, sbert_y, sbert_labels = \
make_plot(axis_file='VA_Russell_English',
          emotion_file='English',
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="bert",
          plot_labels=["English SBERT embeddings on the Valence/Arousal Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

multi_x, multi_y, multi_labels = \
make_plot(axis_file='VA_Russell_English',
          emotion_file='English',
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="roberta",
          roberta_model_name="cardiffnlp/twitter-xlm-roberta-base-sentiment",
          plot_labels=["English RoBERTa-XLM embeddings on the Valence/Arousal Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

get_distances(multi_x, multi_y, sbert_x, sbert_y, sbert_labels)

### Chinese

In [None]:
sbert_x, sbert_y, sbert_labels = \
make_plot(axis_file='VA_Russell_Chinese',        
          emotion_file='Chinese',
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="bert",
          plot_labels=["Chinese SBERT embeddings on the Valence/Arousal Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

multi_x, multi_y, multi_labels = \
make_plot(axis_file='VA_Russell_Chinese',
          emotion_file='Chinese',
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="roberta",
          roberta_model_name="cardiffnlp/twitter-xlm-roberta-base-sentiment",
          plot_labels=["Chinese RoBERTa-XLM embeddings on the Valence/Arousal Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

get_distances(multi_x, multi_y, sbert_x, sbert_y, sbert_labels)

### Japanese

In [None]:
sbert_x, sbert_y, sbert_labels = \
make_plot(axis_file='VA_Russell_Japanese',
          emotion_file='Japanese',
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="bert",
          plot_labels=["Japanese SBERT embeddings on the Valence/Arousal Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

multi_x, multi_y, multi_labels = \
make_plot(axis_file='VA_Russell_Japanese',
          emotion_file='Japanese',
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="roberta",
          roberta_model_name="cardiffnlp/twitter-xlm-roberta-base-sentiment",
          plot_labels=["Japanese RoBERTa-XLM embeddings on the Valence/Arousal Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

get_distances(multi_x, multi_y, sbert_x, sbert_y, sbert_labels)

### Spanish

In [None]:
multi_x, multi_y, multi_labels = \
make_plot(axis_file='VA_Russell_Spanish',
          emotion_file='Spanish',
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="roberta",
          roberta_model_name="xlm-roberta-base",
          plot_labels=["Spanish RoBERTa-XLM embeddings on the Valence/Arousal Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

mono_x, mono_y, mono_labels = \
make_plot(axis_file='VA_Russell_Spanish',
          emotion_file='Spanish',
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="roberta",
          roberta_model_name="bertin-project/bertin-roberta-base-spanish",
          plot_labels=["Spanish RoBERTa embeddings on the Valence/Arousal Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

get_distances(multi_x, multi_y, mono_x, mono_y, mono_labels)