In [2]:
import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# from utils import *
import os

In [58]:
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
print(input)
print(target)

tensor([[ 1.0593, -0.5259, -0.4126, -1.2237, -1.0999],
        [ 0.2725,  0.1745,  1.4604, -0.2560,  0.4010],
        [ 0.3831,  0.0097,  1.1761, -0.1994,  2.6107]], requires_grad=True)
tensor([[0.2939, 0.3446, 0.2175, 0.0216, 0.1223],
        [0.0919, 0.1213, 0.1271, 0.4115, 0.2481],
        [0.3606, 0.0967, 0.1421, 0.3822, 0.0183]])


In [57]:
cm1 = torch.tensor([[1,0,3],[4,0,6],[7,0,9]])
cm2 = np.array([[1,0,3],[4,0,6],[7,0,9]])
cs1 = cm1.sum(dim=1)
cs1 = cs1.masked_fill_(cs1 == 0, 1e-10)
cs2 = cm2.sum(axis=1, keepdims=True)
cs2 = np.where(cs2 == 0, 1e-10, cs2)
print(cm1/cs1)
print(cm2/cs2)
print(cs1, cs2)

tensor([[0.2500, 0.0000, 0.1875],
        [1.0000, 0.0000, 0.3750],
        [1.7500, 0.0000, 0.5625]])
[[0.25   0.     0.75  ]
 [0.4    0.     0.6   ]
 [0.4375 0.     0.5625]]
tensor([ 4, 10, 16]) [[ 4.]
 [10.]
 [16.]]


In [30]:
seed = 42
corpus = "MSPPODCAST"
text_feature_extractor = 'roberta-large-UTT'
audio_feature_extractor = 'whisper-large-v3-UTT'
seed = 42
batch = 16
# Data Preprocessing
corpus_path = config[corpus]["PATH_TO_LABEL"]
corpus_df = pd.read_csv(corpus_path)
corpus_df["FileName"]= corpus_df["FileName"].str.replace('.wav', '')
# Remove non consensus labels
main_corpus_df = corpus_df[~corpus_df["EmoClass"].isin(["X", "O"])]
# Create train/val splits
train_df = main_corpus_df[corpus_df["Split_Set"] == "Train"]
val_df = main_corpus_df[corpus_df["Split_Set"] == "Development"]
# test_df = pd.read_csv(config[corpus]["PATH_TO_TEST"]) For evaluate.py
text_feature = get_feature_dir(corpus,text_feature_extractor)
audio_feature = get_feature_dir(corpus,audio_feature_extractor)

# Create datasets
train_dataset = CustomDataset(train_df, text_feature, audio_feature, seed=seed)
valid_dataset = CustomDataset(val_df, text_feature, audio_feature, seed=seed)
print(f"Number of training samples: {train_dataset.total_samples}")
print(f"Number of validation samples: {valid_dataset.total_samples}")

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch, num_workers=16)
valid_loader = DataLoader(valid_dataset, batch_size=batch, num_workers=16)

Number of training samples: 66992
Number of validation samples: 25258


  train_df = main_corpus_df[corpus_df["Split_Set"] == "Train"]
  val_df = main_corpus_df[corpus_df["Split_Set"] == "Development"]


In [32]:
def indexEmotion(index_list):
    category_list = []
    emotions = ["Angry", "Sad", "Happy", "Surprise", "Fear", "Disgust", "Contempt", "Neutral"]
    for index in index_list:
        category_list.append(emotions[index])
    return category_list

In [None]:
# Check distribution of emotion categories
total_categories = []
for batch_idx, (data, label) in enumerate(train_loader):
    category = label["category"]
    total_categories.extend(torch.argmax(category, dim=1).tolist())
category_list = indexEmotion(total_categories)

# Create distribution plot
plt.figure(figsize=(6, 4))
category_counts = pd.Series(category_list).value_counts()
sns.barplot(x=category_counts.index, y=category_counts.values)
plt.title('Distribution of Emotion Categories')
plt.xlabel('Emotion')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [41]:
# Calculate class weights inversely proportional to class frequencies
total_samples = len(category_list)
class_weights = []
emotions = ["Angry", "Sad", "Happy", "Surprise", "Fear", "Disgust", "Contempt", "Neutral"]
for emotion in emotions:
    count = category_counts.get(emotion, 0)
    if count == 0:
        weight = 1.0  # Handle case where class has no samples
    else:
        weight = total_samples / (len(emotions) * count)
    class_weights.append(weight)

# Convert to tensor for CrossEntropyLoss
class_weights = torch.FloatTensor(class_weights).cuda()
print("\nClass weights for CrossEntropyLoss:")
for emotion, weight in zip(emotions, class_weights):
    print(f"{emotion}: {weight:.3f}")


Class weights for CrossEntropyLoss:
Angry: 1.247
Sad: 1.308
Happy: 0.503
Surprise: 2.798
Fear: 7.383
Disgust: 5.900
Contempt: 3.324
Neutral: 0.287


In [8]:
m = nn.AdaptiveAvgPool1d(5)
input = torch.randint(1, 64, (8,))
input = input.float()

In [18]:
input

tensor([21., 58., 39., 51., 33., 56., 13., 28.])

In [21]:
test = []
for i in range(0,4):
    test.append(input[i:i+4].mean())
test


[tensor(42.2500), tensor(45.2500), tensor(44.7500), tensor(38.2500)]

In [19]:
output = m(input.unsqueeze(0))
output

tensor([[39.5000, 49.3333, 42.0000, 34.0000, 20.5000]])

In [20]:
m2 = nn.AvgPool1d(4, stride=1)
output2 = m2(input.unsqueeze(0))
output2

tensor([[42.2500, 45.2500, 44.7500, 38.2500, 32.5000]])

In [24]:
x = torch.rand(32,512)
x = x.unsqueeze(1)
x.shape

torch.Size([32, 1, 512])

In [16]:
8//3

2