In [1]:
# put download.sh and this notebook in the same folder on google drive. Remember to modify path!
from google.colab import drive
drive.mount('/content/gdrive')

import os

path = "/content/gdrive/MyDrive/project"
os.chdir(path)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
import librosa
import os
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
#from torchsummaryX import summary
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import csv
import torch.optim as optim
from tqdm import tqdm
from os.path import exists

# **FEATURE EXTRACTION**

In [3]:
configs = [
    [2048, 512, 2048],
    [2048, 512, 1024],
    [2048, 1024, 1024],
    [1024, 512, 1024],
    [1024, 256, 1024]
]

def process_data(personality_dir="Personality_Scores", metadata_dir="Metadata", audio_dir="Audio_clips"):
    df_personality = get_personality_scores(personality_dir)
    df_metadata = get_metadata(metadata_dir)
    df_feature = get_features(audio_dir)
    
    df = pd.merge(df_personality, df_metadata, left_on="Clip_ID", right_on="Clip_ID")
    df = pd.merge(df, df_feature, left_on="Clip_ID", right_on="Clip_ID")
    
    return df

def get_personality_scores(data_dir_path):
    df = pd.read_csv(data_dir_path + "/Score_011.csv")

    for i in range(1, 11):
        df_tmp = pd.read_csv(data_dir_path + f"/Score_0{i:02d}.csv")
        
        df["Extraversion"] = df["Extraversion"] + df_tmp["Extraversion"]
        df["Agreeableness"] = df["Agreeableness"] + df_tmp["Agreeableness"]
        df["Conscientiousness"] = df["Conscientiousness"] + df_tmp["Conscientiousness"]
        df["Neuroticism"] = df["Neuroticism"] + df_tmp["Neuroticism"]
        df["Openness"] = df["Openness"] + df_tmp["Openness"]

    df["Extraversion"] = df["Extraversion"] / 11
    df["Agreeableness"] = df["Agreeableness"] / 11
    df["Conscientiousness"] = df["Conscientiousness"] / 11
    df["Neuroticism"] = df["Neuroticism"] / 11
    df["Openness"] = df["Openness"] / 11
    
    return df

def get_metadata(data_dir_path):
    df = pd.read_csv(data_dir_path + "/Metadata.csv")
    return df

def get_features(data_dir_path):
    data = []
    
    for filename in os.listdir(data_dir_path):
        f = os.path.join(data_dir_path, filename)
        
        y, sr = librosa.load(f)
        output = []
        pad_len = 0
        for config in configs:
            s = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=config[0], hop_length=config[1], win_length=config[2])
            pad_len = max(pad_len, len(s[0]))
            output.append(s)
        
        for i, cur in enumerate(output):
            cur = np.pad(cur, ((0,0), (0, pad_len-len(cur[0]))))
            output[i] = cur
        output = np.array(output)

        data.append([filename.split(".")[0], output])
    
    df = pd.DataFrame(data, columns=["Clip_ID", "features"])
    return df


In [4]:
if exists("./processed_data.pkl"):
  with open("./processed_data.pkl", 'rb') as f:
    data = pickle.load(f)
else:
  ! bash download.sh
  data = process_data()
  data.to_pickle("processed_data.pkl")

# **HYPER PARAMETERS**

In [5]:
agg_type = 'numerical'
features = ['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness']

if agg_type == 'numerical':
    agg = ''
else:
    agg = '_categorical'

batch_size = 16
lr = 1e-3
epochs = 200

# **DATA LOADER**

In [6]:
if agg_type=='numerical':
    cutoff = 0
    data['Extraversion'] = np.where(data['Extraversion'] > cutoff,1,0)
    data['Agreeableness'] = np.where(data['Agreeableness'] > cutoff,1,0)
    data['Conscientiousness'] = np.where(data['Conscientiousness'] > cutoff,1,0)
    data['Neuroticism'] = np.where(data['Neuroticism'] > cutoff,1,0)
    data['Openness'] = np.where(data['Openness'] > cutoff,1,0)


In [7]:
#Split dataframe into train, validation, and test sets
train, val, test = np.split(
    data.sample(frac=1, random_state=101),
    [int(.7*len(data)), int(.85*len(data))]
)

In [8]:

class LibriSamplesTrain(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe.index)
    
    def __getitem__(self, ind):
        X = torch.Tensor(np.array(self.dataframe['features'].iloc[ind])) # 5 * 128 * T
        X = X.T # T * 128 * 5

        ys = [self.dataframe[f].iloc[ind] for f in features]
        Y = torch.Tensor(np.array(ys)) # 5
        return X, Y

    def collate_fn(batch):
        batch_x = [x for x,y in batch] # (B * T * 128 * 5)
        batch_y = [y for x,y in batch] # (B * 5)
        lengths_x = [x.shape[0] for x in batch_x]
        batch_x_pad = pad_sequence(batch_x, batch_first=True) # B, T(max time dimension in the batch), 128, 5
        
        lengths_y = [y.shape[0] for y in batch_y] 
        batch_y_pad = pad_sequence(batch_y, batch_first=True) # B * 5

        return batch_x_pad, batch_y_pad, torch.Tensor(lengths_x), torch.Tensor(lengths_y)

class LibriSamplesTest(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe.index)
    
    def __getitem__(self, ind):
        X = torch.Tensor(np.array(self.dataframe['features'].iloc[ind])) # T * 128 * 5
        return X

    def collate_fn(batch):
        batch_x = [x for x,y in batch] # (B * T * 128 * 5)

        lengths_x = [x.shape[0] for x in batch_x]
        batch_x_pad = pad_sequence(batch_x, batch_first=True) # B, T(max time dimension in the batch), 128, 5

        return batch_x_pad, torch.Tensor(lengths_x)


In [9]:
train_data = LibriSamplesTrain(train)
val_data = LibriSamplesTrain(val)
test_data = LibriSamplesTest(test)

train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle=True, collate_fn = LibriSamplesTrain.collate_fn)
val_loader = torch.utils.data.DataLoader(val_data, batch_size = batch_size, shuffle=False, collate_fn = LibriSamplesTrain.collate_fn) 
test_loader = torch.utils.data.DataLoader(test_data, batch_size = batch_size, shuffle=False, collate_fn = LibriSamplesTest.collate_fn) 

print("Batch size: ", batch_size)
print("Train dataset samples = {}, batches = {}".format(train_data.__len__(), len(train_loader)))
print("Val dataset samples = {}, batches = {}".format(val_data.__len__(), len(val_loader)))
print("Test dataset samples = {}, batches = {}".format(test_data.__len__(), len(test_loader)))

Batch size:  16
Train dataset samples = 448, batches = 28
Val dataset samples = 96, batches = 6
Test dataset samples = 96, batches = 6


# **MODEL**

In [10]:
class Network(nn.Module):

    def __init__(self):

        super(Network, self).__init__()
        
        self.embedding = nn.Sequential(
            nn.Conv2d(in_channels=5, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.GELU(),
            nn.AvgPool2d((3, 3)),
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.GELU(),
            nn.Dropout2d(0.2),
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.GELU(),
            nn.AvgPool2d((3, 3))
        )

        self.flatten = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten()
        )

        self.classification = nn.Sequential(
            nn.Linear(512, 2048),
            nn.BatchNorm1d(2048),
            nn.GELU(),
            nn.Linear(2048, 512),
            nn.BatchNorm1d(512),
            nn.GELU(),
            nn.Dropout2d(0.3),
            nn.Linear(512, 5),
        )

    def forward(self, x):      
        x = x.transpose(1, 3) # B * 5 * 128 * T 
        out = self.embedding(x) # B * 512 * H * W (H and W are from CNN)
        out = self.flatten(out) # B * 512 * 1
        out = self.classification(out) # B * num_class
        out = torch.sigmoid(out)
        return out

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = Network().to(device)
print(model)

Network(
  (embedding): Sequential(
    (0): Conv2d(5, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU()
    (3): AvgPool2d(kernel_size=(3, 3), stride=(3, 3), padding=0)
    (4): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): GELU()
    (7): Dropout2d(p=0.2, inplace=False)
    (8): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (9): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): GELU()
    (11): AvgPool2d(kernel_size=(3, 3), stride=(3, 3), padding=0)
  )
  (flatten): Sequential(
    (0): AdaptiveAvgPool2d(output_size=(1, 1))
    (1): Flatten(start_dim=1, end_dim=-1)
  )
  (classification): Sequential(
    (0): Linear(in_features=512, out_features=2048, bi

In [12]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = lr, weight_decay=5e-6)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 20, gamma=0.5)

# **TRAIN**

In [None]:
train_acc_list = list()
train_loss_list = list()
lr_list = list()
validation_acc_list = list()

print('train_loader: ', len(train_loader))

for epoch in range(epochs):

    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 
    total_loss = 0
    num_correct=0

    #train
    model.train()
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() 

        x, y, xlens, ylens = data
        x = x.to(device)
        y = y.to(device)

        output=model(x)
        
        # prediction_out = (output>0.5).float()
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        # print("loss", loss.item())
        batch_bar.set_postfix(
          loss="{:.04f}".format(float(total_loss/(i+1)))
        )
        batch_bar.update()

    torch.cuda.empty_cache()

    # validate
    model.eval()
    batch_bar.close()
    val_num_correct = 0
    for i, data in enumerate(val_loader):
        x, y, xlens, ylens = data
        x = x.to(device)
        y = y.to(device)

        val_outputs = model(x)
        prediction_val = (val_outputs>0.5).float()

        cur_count = 0
        for i in range(len(prediction_val)):
            predict, real = prediction_val[i], y[i]
            for j in range(len(predict)):
                if int(predict[j]) == int(real[j]):
                  cur_count += 1
        
        val_num_correct += cur_count
        
    torch.cuda.empty_cache()

    train_loss = float(total_loss / len(train_loader))
    train_loss_list.append(train_loss)
    lr = float(optimizer.param_groups[0]['lr'])
    lr_list.append(lr)
    validation_acc = 100 * val_num_correct / (len(val_loader)*batch_size)
    validation_acc_list.append(validation_acc)

    print("Epoch {}/{}: Train Loss {}, Learning Rate {}, Val Acc {:.04f}%".format(
        epoch + 1,
        epochs,
        float(total_loss / len(train_loader)),
        float(optimizer.param_groups[0]['lr']),
        100 * (val_num_correct/5) / (len(val_loader)*batch_size)))
    
    scheduler.step()