# Installs

## imports

In [4]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torchsummaryX import summary
from torch.utils.data import Dataset, DataLoader
import zipfile
import pandas as pd
from tqdm import tqdm
import os
import datetime
import warnings

#Dataset Imports
import csv
from IPython.display import Audio, display
import torchaudio
from transformers import Wav2Vec2FeatureExtractor
from torch.nn.utils.rnn import pad_sequence

warnings.filterwarnings('ignore')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cuda


# Dataset and Dataloader

In [5]:
#Set directory to the Audios folder in MSP dataset
AUDIO_ROOT = 'C:\\Users\\Justin\\Documents\\idl\\2022\\Project\\Audios_fixed\\Audios\\'
#Set path to labels_consensus in MSP dataset
LABELS_DIR = 'C:\\Users\\Justin\\Documents\\idl\\2022\\Project\\labels\\labels\\labels_concensus.csv'

In [6]:
#Load the directory
names = sorted(os.listdir(AUDIO_ROOT))
data1 = AUDIO_ROOT + names[0]
#torchaudio.load requires you to install some programs if you get 'No audio I/O backend is available' error
#https://stackoverflow.com/questions/62543843/cannot-import-torch-audio-no-audio-backend-is-available
waveform, sample_rate = torchaudio.load(data1)
print(waveform.shape)
print(sample_rate)

#Load label csv file
with open('labels_concensus.csv') as f:
    reader = csv.reader(f)
    labels = sorted(list(reader)[1:])
print(labels[0])

torch.Size([1, 167814])
16000
['MSP-PODCAST_0001_0008.wav', 'N', '2.2', '4.0', '2.6', '30', 'Male', 'Test1']


In [7]:
#Emotion Classifier Map (Emotion tag to int for model)
#Angry, Sad, Happy, Surprise, Fear, Disgust, Contempt, Neutral, Other
EMOMAP = {'A':1, 'S':2, 'H':3, 'U':4, 'F':5, 'D':6, 'C':7, 'N':8, 'O':9}

In [13]:
class MSPDataset(torch.utils.data.Dataset):
    
    #Initialize the dataset based on the recommended split in MSP dataset.
    def __init__(self, train = False, valid = False, test1 = False, test2 = False): 
        
        self.audio_dir = AUDIO_ROOT
        self.labels_dir = LABELS_DIR
        self.audio_names = sorted(names)
        self.labels_list = labels
        self.EMOMAP = EMOMAP

        self.audio = []
        self.labels = []
        
        #What type of dataset are we making
        setType = 'Train'
        if valid:
            setType = 'Validation'
        elif test1:
            setType = 'Test1'
        elif test2:
            setType = 'Test2'
        print(setType)       
        
        #Sanitycheck1
        assert(len(self.audio_names) == len(self.labels_list))
        
        for i in tqdm(range(0, len(self.audio_names))):
            assert(self.audio_names[i] == self.labels_list[i][0])
            if self.labels_list[i][7] != setType or self.labels_list[i][1] == 'X':
                continue
            #43 Audio files from 1904 podcast seems to be broken. Torchaudio load returns 'no data chunk'
            if self.labels_list[i][0].startswith('MSP-PODCAST_1904'):
                continue
            self.audio.append(self.audio_dir + self.audio_names[i])
            self.labels.append(self.EMOMAP[self.labels_list[i][1]])         
        
        self.length = len(self.audio)
        
    def __len__(self):
        return self.length

    def __getitem__(self, ind):
        audio = self.audio[ind]
        label = self.labels[ind]
        #load audio when getting the item. If we do it in init, computer blue screens.
        waveform, sample_rate = torchaudio.load(audio)
        return waveform, label
    
    def collate_fn(self, batch):
        batch_audio = [x[0].reshape(-1) for x in batch]
        audio_lengths = torch.LongTensor([len(x) for x in batch_audio])
        batch_audio = pad_sequence(batch_audio, padding_value=0.0, batch_first = True)
        batch_label = [x[1] for x in batch]
        
        return batch_audio, audio_lengths, torch.tensor(batch_label)


### Data loaders

In [5]:
# get me RAMMM!!!! 
import gc 
gc.collect()

482

In [15]:
# Create Dataset objects.
train_data = MSPDataset(train = True) 
val_data = MSPDataset(valid = True) 
test_data = MSPDataset(test1 = True)

print(train_data.__len__())
print(val_data.__len__())
print(test_data.__len__())

Train


100%|███████████████████████████████████████████████████████████████████████| 73042/73042 [00:00<00:00, 1162494.65it/s]


Validation


100%|███████████████████████████████████████████████████████████████████████| 73042/73042 [00:00<00:00, 2092453.85it/s]


Test1


100%|███████████████████████████████████████████████████████████████████████| 73042/73042 [00:00<00:00, 1786326.49it/s]

36011
6346
12371





In [3]:
#Set directory to the labelled_emotion folder in NSC dataset
NSC_Root = 'C:\\Users\\Justin\\Documents\\idl\\2022\\Project\\NSC_part5_labelled_emotion\\'
#quick way of looping subdirectories. Dataset doesn't have Depressed category.
subdirectory = [('Anger\\', 0), ('Sad\\', 1), ('Happy\\', 2), ('Neutral\\', 4)]

#Normally, we would have train/valid/test but we only have test for now, so we split it outside of our dataset class to make it workable.
#I'll update this once we get the full dataset.
trainsplit = 70
validsplit = 10
testsplit = 100 - trainsplit - validsplit

train_names = []
train_labels = []
valid_names = []
valid_labels = []
test_names = []
test_labels = []

#Returns lengths
for sub, label in subdirectory:
    NSCaudios = os.listdir(NSC_Root + sub)
    NSClabels = [label]*len(NSCaudios)
    n = len(NSCaudios)
    trainlen = trainsplit * n //100
    validlen = validsplit * n //100
    train_names += NSCaudios[0:trainlen]
    train_labels += NSClabels[0:trainlen]
    valid_names += NSCaudios[trainlen:trainlen+validlen]
    valid_labels += NSClabels[trainlen:trainlen+validlen]
    test_names += NSCaudios[trainlen+validlen:]
    test_labels += NSClabels[trainlen+validlen:]


In [4]:
class NSCDataset(torch.utils.data.Dataset):
    
    def __init__(self, audio_names, labels, train = False): 
        #Sanitycheck1
        assert(len(audio_names) == len(labels))
        self.labels = labels
        self.audio = audio_names
        self.length = len(labels)
        self.Map = {0:'Anger\\', 1:'Sad\\', 2:'Happy\\', 4:'Neutral\\'}
    def __len__(self):
        return self.length

    def __getitem__(self, ind):
        audio = self.audio[ind]
        label = self.labels[ind]
        #load audio when getting the item. If we do it in init, computer blue screens.
        waveform, sample_rate = torchaudio.load(NSC_Root + self.Map[label] + audio)
        return waveform, label


NameError: name 'torch' is not defined

In [None]:
# get me RAMMM!!!! 
import gc 
gc.collect()

In [None]:
NSC_train_data = NSCDataset(train_names, train_labels)
NSC_valid_data = NSCDataset(valid_names, valid_labels)
NSC_test_Data = NSCDataset(test_names, test_labels)