In [None]:
import numpy as np
import pandas as pd
import data_utils as utils
import matplotlib.pyplot as plt
import importlib

In [None]:
def get_averaged_points(emotion_encoding, x, y):
    unique_emotions_x = {}
    unique_emotions_y = {}
    for i in range(len(emotion_encoding.category_list)):
        emotion = emotion_encoding.category_list[i]
        if emotion not in unique_emotions_x:
            unique_emotions_x[emotion] = []
            unique_emotions_y[emotion] = []
        unique_emotions_x[emotion].append(x[i])
        unique_emotions_y[emotion].append(y[i])

    emotion_labels = list(unique_emotions_x.keys())
    x_avg = []
    y_avg = []
    for emotion in emotion_labels:
        x_avg.append(np.mean(unique_emotions_x[emotion]))
        y_avg.append(np.mean(unique_emotions_y[emotion]))
    return x_avg, y_avg, emotion_labels

def make_plot(axis_files: list, 
              emotion_files: list, 
              languages: list,
              x_category_high: str, 
              x_category_low: str, 
              y_category_high: str, 
              y_category_low: str, 
              encoding_mode: str, 
              plot_labels: list, #[title, x_label, y_label]
              roberta_model_name=None,
              sent_model_name=None,
              emotion_filepath=None):
    if(encoding_mode == 'roberta' and roberta_model_name is None):
        raise ValueError("provide a roberta model name")
    
    axis_dimensions = []
    for i, axis_file in enumerate(axis_files):
        # Define dimensions and get encodings
        axis_dimension = utils.Emotion_Category(axis_file, roberta_model_name=roberta_model_name, sent_model_name=sent_model_name)
        axis_dimension.contextualization("", official=False)   
        axis_dimension.new_encoding(mode=encoding_mode, mean_centered=True, official=False)
        axis_dimensions.append(axis_dimension)
        
    # Define and create axes
    X_high = utils.generate_averaged_points(category=x_category_high, mode=encoding_mode, official=False, args=axis_dimensions)
    X_low = utils.generate_averaged_points(category=x_category_low, mode=encoding_mode, official=False, args=axis_dimensions)
    Y_high = utils.generate_averaged_points(category=y_category_high, mode=encoding_mode, official=False, args=axis_dimensions)
    Y_low = utils.generate_averaged_points(category=y_category_low, mode=encoding_mode, official=False, args=axis_dimensions)

    plt.figure(figsize=(10, 10))
    plt.grid()
    plt.title(plot_labels[0])
    plt.xlabel(plot_labels[1])
    plt.ylabel(plot_labels[2])

    x_axis,y_axis,_,_ = utils.project_points_onto_axes([X_low, X_high, Y_low, Y_high], X_low, X_high, Y_low, Y_high)
    # origin_x, origin_y = utils.line_intersection(x_axis, y_axis)

    x_points = []
    y_points = []
    for i, emotion_file in enumerate(emotion_files):
        # Encode emotions to plot
        emotion_encoding = utils.Emotion_Category(emotion_file, roberta_model_name=roberta_model_name, filepath=emotion_filepath, sent_model_name=sent_model_name)
        emotion_encoding.contextualization("", official=False)
        emotion_encoding.new_encoding(mode=encoding_mode, mean_centered=True, official=False)
        if(encoding_mode == "bert"):
            x,y,_,_ = utils.project_points_onto_axes(emotion_encoding.bert_unofficial_embedding, X_low, X_high, Y_low, Y_high)
        elif(encoding_mode == "fasttext"):
            x,y,_,_ = utils.project_points_onto_axes(emotion_encoding.fasttext_unofficial_embedding, X_low, X_high, Y_low, Y_high)
        elif(encoding_mode == "roberta"):
            x,y,_,_ = utils.project_points_onto_axes(emotion_encoding.roberta_unofficial_embedding, X_low, X_high, Y_low, Y_high)
        x_avg, y_avg, emotion_labels = get_averaged_points(emotion_encoding, x, y)
        # x_avg -= origin_x
        # y_avg -= origin_y
        # x_axis -= origin_x
        # y_axis -= origin_y
        x_points.append(x_avg)
        y_points.append(y_avg)
        plt.scatter(x_avg, y_avg, label=languages[i])
        for j, emotion in enumerate(emotion_labels):
            plt.annotate(emotion, (x_avg[j], y_avg[j]))
    plt.scatter(0,0)
    plt.annotate("(0,0)", (0,0))
    plt.legend(loc='upper right')
    return x_points, y_points

### Projection onto the Valence-Arousal Plane

In [None]:
make_plot(axis_files=['VA_Russell_English', 'VA_Russell_Japanese'],
          emotion_files=['English', 'Japanese'],
          languages = ['English', 'Japanese'],
          emotion_filepath="Ekman_emotions/pride_shame/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="bert",
          sent_model_name="paraphrase-multilingual-mpnet-base-v2",
          plot_labels=["Multilingual Paraphrase-RoBERTa-XLM embeddings on the VA Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

make_plot(axis_files=['VA_Russell_English', 'VA_Russell_Japanese'],
          emotion_files=['English', 'Japanese'],
          languages = ['English', 'Japanese'],
          emotion_filepath="Ekman_emotions/pride_shame/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="roberta",
          roberta_model_name="cardiffnlp/twitter-xlm-roberta-base-sentiment",
          plot_labels=["RoBERTa-XLM embeddings on the VA Plane[Russell]", "Valence Dimension", "Arousal Dimension"]
          )

### Projection onto the Valence-IWE Plane

In [None]:
make_plot(axis_files=['IWE_valence_English', 'IWE_valence_Chinese', 'IWE_valence_Spanish', 'IWE_valence_Hindi', 'IWE_valence_Japanese'],
          emotion_files=['English', 'Chinese', 'Spanish', 'Hindi', 'Japanese'],
          languages = ['English', 'Chinese', 'Spanish', 'Hindi', 'Japanese'],
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="Positive valence",
          x_category_low="Negative valence",
          y_category_high="WE",
          y_category_low="I",
          encoding_mode="bert",
          plot_labels=["Multilingual SBERT embeddings on the Valence/IWE Plane[Russell]", "Valence Dimension", "I/WE Dimension (Higher value = WE)"]
          )


### Projection onto the IWE-Arousal Plane

In [None]:
make_plot(axis_files=['IWE_arousal_English', 'IWE_arousal_Chinese', 'IWE_arousal_Spanish', 'IWE_arousal_Hindi', 'IWE_arousal_Japanese'],
          emotion_files=['English', 'Chinese', 'Spanish', 'Hindi', 'Japanese'],
          languages = ['English', 'Chinese', 'Spanish', 'Hindi', 'Japanese'],
          emotion_filepath="Ekman_emotions/Ekman/",
          x_category_high="WE",
          x_category_low="I",
          y_category_high="High arousal",
          y_category_low="Low arousal",
          encoding_mode="bert",
          plot_labels=["Multilingual SBERT embeddings on the IWE/Arousal Plane[Russell]", "I/WE Dimension (Higher value = WE)", "Arousal Dimension"]
          )