In [80]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.preprocessing import LabelEncoder
import os
import json

In [2]:
with open('/project/msoleyma_1026/ecp/data/train.json', 'r') as file:
    train_data = json.load(file)
with open('/project/msoleyma_1026/ecp/data/test.json', 'r') as file:
    test_data = json.load(file)

In [3]:
train_data[0]['conversation'][0]

{'utterance_ID': 1,
 'text': 'Alright , so I am back in high school , I am standing in the middle of the cafeteria , and I realize I am totally naked .',
 'speaker': 'Chandler',
 'emotion': 'neutral',
 'video_name': 'dia1utt1.mp4'}

In [4]:
X_text_train = []
y_train = []
X_text_test = []
y_test = []
filenames_test = []

for conversation in train_data:
    for utterance in conversation['conversation']:
        file_path = os.path.join('/project/msoleyma_1026/ecp/data/text/train-emotion', utterance['video_name'].replace('.mp4', '.npy'))
        if os.path.exists(file_path):
            embedding = np.load(file_path)
            X_text_train.append(embedding)
            y_train.append(utterance['emotion'])
for conversation in test_data:
    for utterance in conversation['conversation']:
        file_path = os.path.join('/project/msoleyma_1026/ecp/data/text/test-emotion', utterance['video_name'].replace('.mp4', '.npy'))
        if os.path.exists(file_path):
            embedding = np.load(file_path)
            X_text_test.append(embedding)
            y_test.append(utterance['emotion'])
            filenames_test.append(utterance['video_name'])

In [5]:
len(X_text_train), len(X_text_test), len(y_train), len(y_test), len(filenames_test)

(11053, 2566, 11053, 2566, 2566)

In [13]:
X_video_train = []
X_video_test = []

for conversation in train_data:
    for utterance in conversation['conversation']:
        file_path = os.path.join('/project/msoleyma_1026/ecp/data/video/train', utterance['video_name'].replace('.mp4', '.npy'))
        if os.path.exists(file_path):
            embedding = np.load(file_path)
            X_video_train.append(embedding)
for conversation in test_data:
    for utterance in conversation['conversation']:
        file_path = os.path.join('/project/msoleyma_1026/ecp/data/video/test', utterance['video_name'].replace('.mp4', '.npy'))
        if os.path.exists(file_path):
            embedding = np.load(file_path)
            X_video_test.append(embedding)

In [15]:
len(X_video_train), len(X_video_test)

(11053, 2566)

In [9]:
X_audio_train = []
X_audio_test = []

for conversation in train_data:
    for utterance in conversation['conversation']:
        file_path = os.path.join('/project/msoleyma_1026/ecp/data/audio/train-emotion', utterance['video_name'].replace('.mp4', '.npy'))
        if os.path.exists(file_path):
            embedding = np.load(file_path)
            X_audio_train.append(embedding)
for conversation in test_data:
    for utterance in conversation['conversation']:
        file_path = os.path.join('/project/msoleyma_1026/ecp/data/audio/test-emotion', utterance['video_name'].replace('.mp4', '.npy'))
        if os.path.exists(file_path):
            embedding = np.load(file_path)
            X_audio_test.append(embedding)

In [10]:
len(X_audio_train), len(X_audio_test)

(11053, 2566)

In [16]:
X_text_train = np.array(X_text_train)
X_text_train = X_text_train.reshape(X_text_train.shape[0], -1)
X_text_test = np.array(X_text_test)
X_text_test = X_text_test.reshape(X_text_test.shape[0], -1)

X_video_train = np.array(X_video_train)
X_video_train = X_video_train.reshape(X_video_train.shape[0], -1)
X_video_test = np.array(X_video_test)
X_video_test = X_video_test.reshape(X_video_test.shape[0], -1)

X_audio_train = np.array(X_audio_train)
X_audio_train = X_audio_train.reshape(X_audio_train.shape[0], -1)
X_audio_test = np.array(X_audio_test)
X_audio_test = X_audio_test.reshape(X_audio_test.shape[0], -1)

y_train = np.array(y_train)
y_test = np.array(y_test)

In [17]:
X_text_train.shape, X_text_test.shape, X_video_train.shape, X_video_test.shape, X_audio_train.shape, X_audio_test.shape, y_train.shape, y_test.shape

((11053, 768),
 (2566, 768),
 (11053, 768),
 (2566, 768),
 (11053, 1024),
 (2566, 1024),
 (11053,),
 (2566,))

In [18]:
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.fit_transform(y_test)

In [19]:
mlp_text = MLPClassifier(hidden_layer_sizes=(300,100), max_iter=300, activation='relu', solver='adam', learning_rate_init=0.001, random_state=42)

mlp_text.fit(X_text_train, y_train)

In [20]:
mlp_video = MLPClassifier(hidden_layer_sizes=(300,100), max_iter=300, activation='relu', solver='adam', learning_rate_init=0.001, random_state=42)

mlp_video.fit(X_video_train, y_train)

In [21]:
mlp_audio = MLPClassifier(hidden_layer_sizes=(300,100), max_iter=300, activation='relu', solver='adam', learning_rate_init=0.001, random_state=42)

mlp_audio.fit(X_audio_train, y_train)

In [22]:
y_text_pred = mlp_text.predict(X_text_test)

accuracy_text = accuracy_score(y_test, y_text_pred)
print(f'Accuracy: {accuracy_text * 100:.2f}%')
class_report_text = classification_report(y_test, y_text_pred)
print(class_report_text)
f1_score_text = f1_score(y_test, y_text_pred, average='weighted')
print(f'F1 Score: {f1_score_text * 100:.2f}%')

Accuracy: 53.74%
              precision    recall  f1-score   support

           0       0.39      0.41      0.40       333
           1       0.13      0.08      0.10        79
           2       0.21      0.14      0.17        56
           3       0.48      0.48      0.48       429
           4       0.65      0.73      0.69      1121
           5       0.31      0.21      0.25       241
           6       0.56      0.50      0.53       307

    accuracy                           0.54      2566
   macro avg       0.39      0.37      0.37      2566
weighted avg       0.52      0.54      0.53      2566

F1 Score: 52.56%


In [23]:
y_video_pred = mlp_video.predict(X_video_test)

accuracy_video = accuracy_score(y_test, y_video_pred)
print(f'Accuracy: {accuracy_video * 100:.2f}%')
class_report_video = classification_report(y_test, y_video_pred)
print(class_report_video)
f1_score_video = f1_score(y_test, y_video_pred, average='weighted')
print(f'F1 Score: {f1_score_video * 100:.2f}%')

Accuracy: 31.61%
              precision    recall  f1-score   support

           0       0.21      0.19      0.20       333
           1       0.02      0.03      0.02        79
           2       0.12      0.12      0.12        56
           3       0.26      0.23      0.25       429
           4       0.48      0.49      0.48      1121
           5       0.14      0.14      0.14       241
           6       0.17      0.20      0.18       307

    accuracy                           0.32      2566
   macro avg       0.20      0.20      0.20      2566
weighted avg       0.32      0.32      0.32      2566

F1 Score: 31.59%


In [24]:
y_audio_pred = mlp_audio.predict(X_audio_test)

accuracy_audio = accuracy_score(y_test, y_audio_pred)
print(f'Accuracy: {accuracy_audio * 100:.2f}%')
class_report_audio = classification_report(y_test, y_audio_pred)
print(class_report_audio)
f1_score_audio = f1_score(y_test, y_audio_pred, average='weighted')
print(f'F1 Score: {f1_score_audio * 100:.2f}%')

Accuracy: 45.95%
              precision    recall  f1-score   support

           0       0.34      0.24      0.28       333
           1       0.13      0.11      0.12        79
           2       0.07      0.02      0.03        56
           3       0.33      0.41      0.36       429
           4       0.56      0.70      0.62      1121
           5       0.46      0.14      0.22       241
           6       0.40      0.32      0.36       307

    accuracy                           0.46      2566
   macro avg       0.33      0.28      0.28      2566
weighted avg       0.44      0.46      0.44      2566

F1 Score: 43.62%


In [25]:
prob_audio = mlp_audio.predict_proba(X_audio_test)
prob_video = mlp_video.predict_proba(X_video_test)
prob_text = mlp_text.predict_proba(X_text_test)

prob_average = (prob_audio + prob_video + prob_text) / 3

final_predictions = np.argmax(prob_average, axis=1)

accuracy = accuracy_score(y_test, final_predictions)
print(f'Accuracy: {accuracy * 100:.2f}%')
class_report = classification_report(y_test, final_predictions)
print(class_report)
f1_score = f1_score(y_test, final_predictions, average='weighted')
print(f'F1 Score: {f1_score * 100:.2f}%')

Accuracy: 54.29%
              precision    recall  f1-score   support

           0       0.46      0.31      0.37       333
           1       0.15      0.04      0.06        79
           2       0.36      0.07      0.12        56
           3       0.47      0.44      0.45       429
           4       0.59      0.84      0.69      1121
           5       0.39      0.14      0.20       241
           6       0.52      0.39      0.44       307

    accuracy                           0.54      2566
   macro avg       0.42      0.32      0.33      2566
weighted avg       0.51      0.54      0.50      2566

F1 Score: 50.37%


In [26]:
y_test_final_labels = label_encoder.inverse_transform(final_predictions)

results_df = pd.DataFrame({
    'filename': filenames_test,
    'predicted_label': y_test_final_labels
})

predicted_emotions = results_df.set_index('filename')['predicted_label'].to_dict()

# results_df.to_csv('/content/drive/MyDrive/CSCI535 Project/Dataset/Processed/late_fusion_predictions_with_filenames.csv', index=False)

In [27]:
print(predicted_emotions)

{'dia16utt1.mp4': 'neutral', 'dia16utt2.mp4': 'sadness', 'dia16utt3.mp4': 'joy', 'dia16utt4.mp4': 'neutral', 'dia16utt5.mp4': 'neutral', 'dia20utt1.mp4': 'neutral', 'dia20utt2.mp4': 'neutral', 'dia20utt3.mp4': 'neutral', 'dia20utt4.mp4': 'joy', 'dia20utt5.mp4': 'joy', 'dia20utt6.mp4': 'neutral', 'dia20utt7.mp4': 'neutral', 'dia20utt8.mp4': 'sadness', 'dia20utt9.mp4': 'joy', 'dia20utt10.mp4': 'neutral', 'dia26utt1.mp4': 'disgust', 'dia26utt2.mp4': 'neutral', 'dia26utt3.mp4': 'neutral', 'dia26utt4.mp4': 'neutral', 'dia26utt5.mp4': 'anger', 'dia43utt1.mp4': 'neutral', 'dia43utt2.mp4': 'neutral', 'dia43utt3.mp4': 'joy', 'dia46utt1.mp4': 'neutral', 'dia46utt2.mp4': 'neutral', 'dia46utt3.mp4': 'neutral', 'dia46utt4.mp4': 'neutral', 'dia46utt5.mp4': 'neutral', 'dia47utt1.mp4': 'neutral', 'dia47utt2.mp4': 'surprise', 'dia47utt3.mp4': 'joy', 'dia47utt4.mp4': 'neutral', 'dia47utt5.mp4': 'neutral', 'dia47utt6.mp4': 'neutral', 'dia47utt7.mp4': 'neutral', 'dia47utt8.mp4': 'neutral', 'dia47utt9.mp4'

In [28]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import json
import os

In [29]:
emotion_list = sorted(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness','surprise'])

encoder = OneHotEncoder(sparse=False)
encoder.fit(np.array(emotion_list).reshape(-1, 1))

def encode_emotion(emotion):
    encoded = encoder.transform([[emotion]])
    return encoded.flatten()



In [30]:
# def generate_utterance_pairs_emotion_cause_flags_train(conversations):
#     pairs_emotion_flags_train = []
#     for conversation in conversations:
#         if str(conversation['conversation_ID']) in train_data:
#             cause_pair_ids = {(int(pair[0].split('_')[0]), int(pair[1])) for pair in conversation['emotion-cause_pairs']}
#             utterances = conversation['conversation']
#             for i, utterance_i in enumerate(utterances):
#                 if utterance_i['emotion'] == 'neutral':
#                     continue
#                 for j, utterance_j in enumerate(utterances[:i+1]):
#                     pair_key = (utterance_i['utterance_ID'], utterance_j['utterance_ID'])
#                     is_cause_pair = 1 if pair_key in cause_pair_ids else 0
#                     pairs_emotion_flags_train.append((conversation['conversation_ID'],pair_key, utterance_i['emotion'], is_cause_pair))
#     return pairs_emotion_flags_train

# def generate_utterance_pairs_emotion_cause_flags_test(conversations):
#     pairs_emotion_flags_test = []
#     for conversation in conversations:
#         if str(conversation['conversation_ID']) in test_data:
#             cause_pair_ids = {(int(pair[0].split('_')[0]), int(pair[1])) for pair in conversation['emotion-cause_pairs']}
#             utterances = conversation['conversation']
#             for i, utterance_i in enumerate(utterances):
#                 predicted_emotion = predicted_emotions.get(utterance_i['video_name'], utterance_i['emotion'])
#                 if predicted_emotion == 'neutral':
#                     continue
#                 for j, utterance_j in enumerate(utterances[:i+1]):
#                     pair_key = (utterance_i['utterance_ID'], utterance_j['utterance_ID'])
#                     is_cause_pair = 1 if pair_key in cause_pair_ids else 0
#                     pairs_emotion_flags_test.append((conversation['conversation_ID'],pair_key, predicted_emotion, is_cause_pair))
#     return pairs_emotion_flags_test

# file_path = '/content/drive/MyDrive/CSCI535 Project/Dataset/text/dev.json'

# with open(file_path, 'r') as file:
#     data = json.load(file)

# utterance_pairs_emotion_flags_train = generate_utterance_pairs_emotion_cause_flags_train(data)
# utterance_pairs_emotion_flags_test = generate_utterance_pairs_emotion_cause_flags_test(data)
with open('/project/msoleyma_1026/ecp/data/utterance_pairs_emotion_flags_train.json', 'r') as f:
    utterance_pairs_emotion_flags_train = json.load(f)
with open('/project/msoleyma_1026/ecp/data/utterance_pairs_emotion_flags_test.json', 'r') as f:
    utterance_pairs_emotion_flags_test = json.load(f)

In [32]:
[utterance_pairs_emotion_flags_train[0]]

[{'1': [{'utterance_pair': [3, 1], 'emotion': 'surprise', 'valid_pair': 1},
   {'utterance_pair': [3, 2], 'emotion': 'surprise', 'valid_pair': 0},
   {'utterance_pair': [3, 3], 'emotion': 'surprise', 'valid_pair': 1},
   {'utterance_pair': [4, 1], 'emotion': 'surprise', 'valid_pair': 1},
   {'utterance_pair': [4, 2], 'emotion': 'surprise', 'valid_pair': 0},
   {'utterance_pair': [4, 3], 'emotion': 'surprise', 'valid_pair': 1},
   {'utterance_pair': [4, 4], 'emotion': 'surprise', 'valid_pair': 1},
   {'utterance_pair': [5, 1], 'emotion': 'anger', 'valid_pair': 1},
   {'utterance_pair': [5, 2], 'emotion': 'anger', 'valid_pair': 0},
   {'utterance_pair': [5, 3], 'emotion': 'anger', 'valid_pair': 1},
   {'utterance_pair': [5, 4], 'emotion': 'anger', 'valid_pair': 1},
   {'utterance_pair': [5, 5], 'emotion': 'anger', 'valid_pair': 0}]}]

In [None]:
# with open('/content/drive/MyDrive/CSCI535 Project/Dataset/Processed/utterance_pairs_emotion_flags_train.json', 'w') as f:
#     json.dump(utterance_pairs_emotion_flags_train, f)

# with open('/content/drive/MyDrive/CSCI535 Project/Dataset/Processed/utterance_pairs_emotion_flags_test.json', 'w') as f:
#     json.dump(utterance_pairs_emotion_flags_test, f)

In [43]:
def process_pairs(input_folder_path, utterance_pairs_emotion_flags, output_folder_path):
    for conversation in utterance_pairs_emotion_flags:
        conv_id = list(conversation.keys())[0]
        for pair in conversation[conv_id]:
            (utt1_id, utt2_id), emotion, flag = pair['utterance_pair'], pair['emotion'], pair['valid_pair']
            utt1_embedding_filename = "dia" + str(conv_id) + "utt" + str(utt1_id)
            utt1_embedding_path = os.path.join(input_folder_path, f'{utt1_embedding_filename}.npy')
            utt2_embedding_filename = "dia" + str(conv_id) + "utt" + str(utt2_id)
            utt2_embedding_path = os.path.join(input_folder_path, f'{utt2_embedding_filename}.npy')

            utt1_embedding = np.load(utt1_embedding_path)
            utt2_embedding = np.load(utt2_embedding_path)

            combined_embedding = np.concatenate([utt1_embedding, utt2_embedding], axis=1)

            # emotion_vector = encode_emotion(emotion)
            # combined_embedding_with_emotion = np.concatenate([combined_embedding, emotion_vector.reshape(1, -1)], axis=1)

            output_filename = f'conv_{conv_id}_utterance_pair_{utt1_id}_{utt2_id}.npy'
            if not os.path.exists(os.path.join(output_folder_path, output_filename)):
              np.save(os.path.join(output_folder_path, output_filename), combined_embedding)

text_folder_path_train = '/project/msoleyma_1026/ecp/data/text/train-emotion'
text_folder_path_test = '/project/msoleyma_1026/ecp/data/text/test-emotion'
text_pair_embeddings_folder_path_train = '/project/msoleyma_1026/ecp/data/pair_embeddings/text/train-emotion'
text_pair_embeddings_folder_path_test = '/project/msoleyma_1026/ecp/data/pair_embeddings/text/train-emotion'



In [44]:
process_pairs(text_folder_path_train, utterance_pairs_emotion_flags_train, text_pair_embeddings_folder_path_train)

In [45]:
process_pairs(text_folder_path_test, utterance_pairs_emotion_flags_test, text_pair_embeddings_folder_path_test)

In [49]:
video_folder_path_train = '/project/msoleyma_1026/ecp/data/video/train'
video_folder_path_test = '/project/msoleyma_1026/ecp/data/video/test'
video_pair_embeddings_folder_path_train = '/project/msoleyma_1026/ecp/data/pair_embeddings/video/train-reshaped'
video_pair_embeddings_folder_path_test = '/project/msoleyma_1026/ecp/data/pair_embeddings/video/test-reshaped'

process_pairs(video_folder_path_train, utterance_pairs_emotion_flags_train, video_pair_embeddings_folder_path_train)
process_pairs(video_folder_path_test, utterance_pairs_emotion_flags_test, video_pair_embeddings_folder_path_test)

In [46]:
audio_folder_path_train = '/project/msoleyma_1026/ecp/data/audio/train-emotion'
audio_folder_path_test = '/project/msoleyma_1026/ecp/data/audio/test-emotion'
audio_pair_embeddings_folder_path_train = '/project/msoleyma_1026/ecp/data/pair_embeddings/audio/train-emotion'
audio_pair_embeddings_folder_path_test = '/project/msoleyma_1026/ecp/data/pair_embeddings/audio/test-emotion'

process_pairs(audio_folder_path_train, utterance_pairs_emotion_flags_train, audio_pair_embeddings_folder_path_train)
process_pairs(audio_folder_path_test, utterance_pairs_emotion_flags_test, audio_pair_embeddings_folder_path_test)

In [50]:
import numpy as np

x = np.load("/project/msoleyma_1026/ecp/data/pair_embeddings/text/train-emotion/conv_18_utterance_pair_9_2.npy")
print(x.shape)

y = np.load("/project/msoleyma_1026/ecp/data/pair_embeddings/audio/train-emotion/conv_18_utterance_pair_9_2.npy")
print(y.shape)

z = np.load("/project/msoleyma_1026/ecp/data/pair_embeddings/video/train-reshaped/conv_18_utterance_pair_9_2.npy")
print(z.shape)

(1, 1536)
(1, 2048)
(1, 1536)


In [None]:
# import numpy as np
# import os

# # Paths to your training and testing directories
# train_dir = '/project/msoleyma_1026/ecp/data/video/train'
# test_dir = '/project/msoleyma_1026/ecp/data/video/test'

# def reshape_and_save(file_path):
#     embedding = np.load(file_path)
#     # Reshape only if necessary
#     if embedding.shape != (1, 768):
#         embedding = embedding.reshape(1, 768)
#         np.save(file_path, embedding)  # Save the reshaped array back to the same file

# # Process train data
# for conversation in train_data:
#     for utterance in conversation['conversation']:
#         video_name = utterance['video_name'].replace('.mp4', '.npy')
#         file_path = os.path.join(train_dir, video_name)
#         if os.path.exists(file_path):
#             reshape_and_save(file_path)

# # Process test data
# for conversation in test_data:
#     for utterance in conversation['conversation']:
#         video_name = utterance['video_name'].replace('.mp4', '.npy')
#         file_path = os.path.join(test_dir, video_name)
#         if os.path.exists(file_path):
#             reshape_and_save(file_path)

# print("All embeddings have been reshaped and saved back to their respective files.")


In [None]:
# !ls -1 /content/drive/MyDrive/CSCI535\ Project/Dataset/Processed/split/train/pair_embeddings/audio | wc -l
# !ls -1 /content/drive/MyDrive/CSCI535\ Project/Dataset/Processed/split/test/pair_embeddings/audio | wc -l

In [51]:
# import numpy as np

# file_path = '/content/drive/MyDrive/CSCI535 Project/Dataset/Processed/split/test/pair_embeddings/text/conv_1166_utterance_pair_10_1.npy'

# embeddings = np.load(file_path)

# embeddings, embeddings.shape

In [None]:
# with open('/project/msoleyma_1026/ecp/data/utterance_pairs_emotion_flags_train.json', 'r') as f:
#     utterance_pairs_emotion_flags_train = json.load(f)
# with open('/project/msoleyma_1026/ecp/data/utterance_pairs_emotion_flags_test.json', 'r') as f:
#     utterance_pairs_emotion_flags_test = json.load(f)
# with open('/content/drive/MyDrive/CSCI535 Project/Dataset/Processed/utterance_pairs_emotion_flags_train.json', 'r') as file:
#     pairs_train = json.load(file)

# with open('/content/drive/MyDrive/CSCI535 Project/Dataset/Processed/utterance_pairs_emotion_flags_test.json', 'r') as file:
#     pairs_test = json.load(file)

In [65]:
predicted_emotions.get('dia16utt3.mp4')

'joy'

In [66]:
def get_pair_embeddings(pairs, base_path, no_y=False, predicted_emotions=None, is_test=False):
    X, y = [], []
    for conversation in pairs:
        conv_id = list(conversation.keys())[0]
        for pair in conversation[conv_id]:
            (utt1_id, utt2_id), emotion, flag = pair['utterance_pair'], pair['emotion'], pair['valid_pair']
            filename = f"conv_{conv_id}_utterance_pair_{utt1_id}_{utt2_id}.npy"
            file_path = os.path.join(base_path, filename)
            if os.path.exists(file_path):
                embedding = np.load(file_path)
                if is_test and predicted_emotions:
                    video_name = f"dia{conv_id}utt{utt1_id}.mp4"  
                    emotion = predicted_emotions.get(video_name, emotion)
                emotion_vector = encode_emotion(emotion)
                combined_embedding_with_emotion = np.concatenate([embedding, emotion_vector.reshape(1, -1)], axis=1)
                X.append(combined_embedding_with_emotion)
                if not no_y:
                    y.append(flag)
            else:
                print(f"File not found: {file_path}")
    if not no_y:
      return X, y
    return X

In [54]:
X_text_cause_pair_train, X_text_cause_pair_test = [], []
y_cause_pair_train, y_cause_pair_test = [], []

text_base_path_train = '/project/msoleyma_1026/ecp/data/pair_embeddings/text/train-emotion'
text_base_path_test = '/project/msoleyma_1026/ecp/data/pair_embeddings/text/test-emotion'

X_text_cause_pair_train, y_cause_pair_train = get_pair_embeddings(utterance_pairs_emotion_flags_train, text_base_path_train)

In [67]:
X_text_cause_pair_test, y_cause_pair_test = get_pair_embeddings(utterance_pairs_emotion_flags_test, text_base_path_test, predicted_emotions=predicted_emotions, is_test=True)

In [56]:
X_video_cause_pair_train, X_video_cause_pair_test = [], []

video_base_path_train = '/project/msoleyma_1026/ecp/data/pair_embeddings/video/train-reshaped'
video_base_path_test = '/project/msoleyma_1026/ecp/data/pair_embeddings/video/test-reshaped'

X_video_cause_pair_train = get_pair_embeddings(utterance_pairs_emotion_flags_train, video_base_path_train, True)

In [68]:
X_video_cause_pair_test = get_pair_embeddings(utterance_pairs_emotion_flags_test, video_base_path_test, True, predicted_emotions=predicted_emotions, is_test=True)

In [57]:
X_audio_cause_pair_train, X_audio_cause_pair_test = [], []

audio_base_path_train = '/project/msoleyma_1026/ecp/data/pair_embeddings/audio/train-emotion'
audio_base_path_test = '/project/msoleyma_1026/ecp/data/pair_embeddings/audio/test-emotion'

X_audio_cause_pair_train = get_pair_embeddings(utterance_pairs_emotion_flags_train, audio_base_path_train, True)

In [69]:
X_audio_cause_pair_test = get_pair_embeddings(utterance_pairs_emotion_flags_test, audio_base_path_test, True, predicted_emotions=predicted_emotions, is_test=True)

In [70]:
X_text_cause_pair_train = np.array(X_text_cause_pair_train)
X_text_cause_pair_train = X_text_cause_pair_train.reshape(X_text_cause_pair_train.shape[0], -1)
X_text_cause_pair_test = np.array(X_text_cause_pair_test)
X_text_cause_pair_test = X_text_cause_pair_test.reshape(X_text_cause_pair_test.shape[0], -1)

X_video_cause_pair_train = np.array(X_video_cause_pair_train)
X_video_cause_pair_train = X_video_cause_pair_train.reshape(X_video_cause_pair_train.shape[0], -1)
X_video_cause_pair_test = np.array(X_video_cause_pair_test)
X_video_cause_pair_test = X_video_cause_pair_test.reshape(X_video_cause_pair_test.shape[0], -1)

X_audio_cause_pair_train = np.array(X_audio_cause_pair_train)
X_audio_cause_pair_train = X_audio_cause_pair_train.reshape(X_audio_cause_pair_train.shape[0], -1)
X_audio_cause_pair_test = np.array(X_audio_cause_pair_test)
X_audio_cause_pair_test = X_audio_cause_pair_test.reshape(X_audio_cause_pair_test.shape[0], -1)

y_cause_pair_train = np.array(y_cause_pair_train)
y_cause_pair_test = np.array(y_cause_pair_test)

In [71]:
X_text_cause_pair_train.shape, X_text_cause_pair_test.shape, X_video_cause_pair_train.shape, X_video_cause_pair_test.shape, X_audio_cause_pair_train.shape, X_audio_cause_pair_test.shape, y_cause_pair_train.shape, y_cause_pair_test.shape

((44702, 1543),
 (10332, 1543),
 (44702, 1543),
 (10332, 1543),
 (44702, 2055),
 (10332, 2055),
 (44702,),
 (10332,))

In [117]:
mlp_cause_pair_text = MLPClassifier(hidden_layer_sizes=(1024,1024), max_iter=10, activation='relu', solver='adam', learning_rate_init=0.001, random_state=42)

mlp_cause_pair_text.fit(X_text_cause_pair_train, y_cause_pair_train)



In [118]:
y_cause_pair_text_pred = mlp_cause_pair_text.predict(X_text_cause_pair_test)

accuracy_cause_pair_text = accuracy_score(y_cause_pair_test, y_cause_pair_text_pred)
print(f'Accuracy: {accuracy_cause_pair_text * 100:.2f}%')
class_report_cause_pair_text = classification_report(y_cause_pair_test, y_cause_pair_text_pred)
print(class_report_cause_pair_text)
f1_score_cause_pair_text = f1_score(y_cause_pair_test, y_cause_pair_text_pred, average='weighted')
print(f'F1 Score: {f1_score_cause_pair_text * 100:.2f}%')

Accuracy: 85.30%
              precision    recall  f1-score   support

           0       0.90      0.93      0.91      8539
           1       0.59      0.48      0.53      1793

    accuracy                           0.85     10332
   macro avg       0.74      0.71      0.72     10332
weighted avg       0.84      0.85      0.85     10332

F1 Score: 84.69%


In [113]:
mlp_cause_pair_video = MLPClassifier(hidden_layer_sizes=(512,512), max_iter=10, activation='relu', solver='adam', learning_rate_init=0.001, random_state=42)

mlp_cause_pair_video.fit(X_video_cause_pair_train, y_cause_pair_train)



In [114]:
y_cause_pair_video_pred = mlp_cause_pair_video.predict(X_video_cause_pair_test)

accuracy_cause_pair_video = accuracy_score(y_cause_pair_test, y_cause_pair_video_pred)
print(f'Accuracy: {accuracy_cause_pair_video * 100:.2f}%')
class_report_cause_pair_video = classification_report(y_cause_pair_test, y_cause_pair_video_pred)
print(class_report_cause_pair_video)
f1_score_cause_pair_video = f1_score(y_cause_pair_test, y_cause_pair_video_pred, average='weighted')
print(f'F1 Score: {f1_score_cause_pair_video * 100:.2f}%')

Accuracy: 85.89%
              precision    recall  f1-score   support

           0       0.87      0.97      0.92      8539
           1       0.69      0.34      0.46      1793

    accuracy                           0.86     10332
   macro avg       0.78      0.65      0.69     10332
weighted avg       0.84      0.86      0.84     10332

F1 Score: 83.86%


In [104]:
mlp_cause_pair_audio = MLPClassifier(hidden_layer_sizes=(512,512), max_iter=10, activation='relu', solver='adam', learning_rate_init=0.001, random_state=42)

mlp_cause_pair_audio.fit(X_audio_cause_pair_train, y_cause_pair_train)



In [None]:
y_cause_pair_audio_pred = mlp_cause_pair_audio.predict(X_audio_cause_pair_test)

accuracy_cause_pair_audio = accuracy_score(y_cause_pair_test, y_cause_pair_audio_pred)
print(f'Accuracy: {accuracy_cause_pair_audio * 100:.2f}%')
class_report_cause_pair_audio = classification_report(y_cause_pair_test, y_cause_pair_audio_pred)
print(class_report_cause_pair_audio)
f1_score_cause_pair_audio = f1_score(y_cause_pair_test, y_cause_pair_audio_pred, average='weighted')
print(f'F1 Score: {f1_score_cause_pair_audio * 100:.2f}%')

Accuracy: 87.00%
              precision    recall  f1-score   support

           0       0.88      0.98      0.93      8539
           1       0.76      0.37      0.50      1793

    accuracy                           0.87     10332
   macro avg       0.82      0.67      0.71     10332
weighted avg       0.86      0.87      0.85     10332

F1 Score: 85.08%


In [None]:
prob_cause_pair_audio = mlp_cause_pair_audio.predict_proba(X_audio_cause_pair_test)
prob_cause_pair_video = mlp_cause_pair_video.predict_proba(X_video_cause_pair_test)
prob_cause_pair_text = mlp_cause_pair_text.predict_proba(X_text_cause_pair_test)

prob_cause_pair_average = (prob_cause_pair_audio + prob_cause_pair_video + prob_cause_pair_text) / 3

final_cause_pair_predictions = np.argmax(prob_cause_pair_average, axis=1)

accuracy_cause_pair = accuracy_score(y_cause_pair_test, final_cause_pair_predictions)
print(f'Accuracy: {accuracy_cause_pair * 100:.2f}%')
class_report_cause_pair = classification_report(y_cause_pair_test, final_cause_pair_predictions)
print(class_report_cause_pair)
f1_score_cause_pair = f1_score(y_cause_pair_test, final_cause_pair_predictions, average='weighted')
print(f'F1 Score: {f1_score_cause_pair * 100:.2f}%')

Accuracy: 88.23%
              precision    recall  f1-score   support

           0       0.89      0.98      0.93      8539
           1       0.81      0.42      0.55      1793

    accuracy                           0.88     10332
   macro avg       0.85      0.70      0.74     10332
weighted avg       0.88      0.88      0.87     10332

F1 Score: 86.67%
