In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix
from speechbrain.inference.interfaces import foreign_class

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier", run_opts={"device":"cuda"})

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
speechbrain.lobes.models.huggingface_transformers.huggingface - Wav2Vec2Model is frozen.


## IEMOCAP confirmation

In [3]:
iemocap_1 = pd.read_csv('../IEMOCAP_full_release/labels/session_1_labels.csv')
iemocap_2 = pd.read_csv('../IEMOCAP_full_release/labels/session_2_labels.csv')
iemocap_3 = pd.read_csv('../IEMOCAP_full_release/labels/session_3_labels.csv')
iemocap_4 = pd.read_csv('../IEMOCAP_full_release/labels/session_4_labels.csv')
iemocap_5 = pd.read_csv('../IEMOCAP_full_release/labels/session_5_labels.csv')

iemocap_1 = iemocap_1[iemocap_1['affect'].isin(['Happiness', 'Sadness', 'Neutral', 'Anger'])]
iemocap_2 = iemocap_2[iemocap_2['affect'].isin(['Happiness', 'Sadness', 'Neutral', 'Anger'])]
iemocap_3 = iemocap_3[iemocap_3['affect'].isin(['Happiness', 'Sadness', 'Neutral', 'Anger'])]
iemocap_4 = iemocap_4[iemocap_4['affect'].isin(['Happiness', 'Sadness', 'Neutral', 'Anger'])]
iemocap_5 = iemocap_5[iemocap_5['affect'].isin(['Happiness', 'Sadness', 'Neutral', 'Anger'])]

combined_sessions = pd.concat([iemocap_1, iemocap_2, iemocap_3, iemocap_4, iemocap_5])

In [4]:
out_probs = []
scores = []
indexes = []
text_labs = []
for index, file in combined_sessions.iterrows():
    out_prob, score, index, text_lab = classifier.classify_file(f"../{file['file_location']}")
    out_probs.append(out_prob)
    scores.append(score)
    indexes.append(index)
    text_labs.append(text_lab)

In [15]:
predictions = [label for file in text_labs for label in file]
predictions = [file.replace('neu', 'Neutral') for file in predictions]
predictions = [file.replace('sad', 'Sadness') for file in predictions]
predictions = [file.replace('hap', 'Happiness') for file in predictions]
predictions = [file.replace('ang', 'Anger') for file in predictions]

In [17]:
combined_sessions['predictions'] = predictions

In [19]:
combined_sessions.to_csv('iemocap_confirmation_4_class.csv', index=False)

In [22]:
print(f"UA score: {accuracy_score(combined_sessions['affect'], combined_sessions['predictions'])}")
print(f"WA score: {balanced_accuracy_score(combined_sessions['affect'], combined_sessions['predictions'])}")

UA score: 0.7519463408791472
WA score: 0.7463301288119966


## BERSt test

In [3]:
berst_test = pd.read_csv('../shout-data/test_data.csv')
berst_train = pd.read_csv('../shout-data/train_data.csv')
berst_validation = pd.read_csv('../shout-data/validation_data.csv')

berst_test = berst_test[berst_test['affect'].isin(['happiness', 'sadness', 'neutral', 'anger'])]
berst_train = berst_train[berst_train['affect'].isin(['happiness', 'sadness', 'neutral', 'anger'])]
berst_validation = berst_validation[berst_validation['affect'].isin(['happiness', 'sadness', 'neutral', 'anger'])]

combined_berst = pd.concat([berst_test, berst_train, berst_validation])

In [4]:
out_probs = []
scores = []
indexes = []
text_labs = []
for index, file in combined_berst.iterrows():
    out_prob, score, index, text_lab = classifier.classify_file(f"../{file['file_location']}")
    out_probs.append(out_prob)
    scores.append(score)
    indexes.append(index)
    text_labs.append(text_lab)

In [29]:
predictions = [label for file in text_labs for label in file]
predictions = [file.replace('neu', 'neutral') for file in predictions]
predictions = [file.replace('sad', 'sadness') for file in predictions]
predictions = [file.replace('hap', 'happiness') for file in predictions]
predictions = [file.replace('ang', 'anger') for file in predictions]

In [30]:
combined_berst['predictions'] = predictions

In [31]:
combined_berst.to_csv('zero_shot_berst_4_class.csv', index=False)

In [32]:
print(f"UA score: {accuracy_score(combined_berst['affect'], combined_berst['predictions'])}")
print(f"WA score: {balanced_accuracy_score(combined_berst['affect'], combined_berst['predictions'])}")

UA score: 0.3440974060349391
WA score: 0.3321385902031063




In [35]:
cf_matrix = confusion_matrix(combined_berst['predictions'], combined_berst['affect'])
print(cf_matrix)

[[504   0 498 441]
 [  7   0  13  10]
 [140   0 146 130]
 [  0   0   0   0]]


## Retrain on 7 classes with IEMOCAP