In [1]:
# Import necessary packages

import numpy as np
import torch 
import torch.nn as nn 
import pandas as pd
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import os 
from sklearn.model_selection import train_test_split
import random
from sklearn.preprocessing import normalize

In [2]:
# Navigate to directory that stores all the files - will need to be changed for each person running this script
os.chdir('/home/ian/Documents/Penn_Fall_2020/CIS_519/CIS 519 Audio Classification Project Local Files')
os.getcwd()

'/home/ian/Documents/Penn_Fall_2020/CIS_519/CIS 519 Audio Classification Project Local Files'

In [3]:
# Reload features (takes a couple minutes)
features = np.load('features_with_gender.npy',allow_pickle=True)
# features = np.load('features/features_spectrogram.npy',allow_pickle=True)

In [4]:
# Changing array type of features (Necessary for it to work with pytorch)

features = [example for example in features]

In [5]:
# Load in labels 
labels = np.load('labels.npy', allow_pickle=True)

In [6]:
# Turn labels into numeric values, create dictionary to map back later 

labels_unique = np.unique(labels)
labels_dict = {}
for i in range (0, len(labels_unique)):
    labels_dict.update({i:labels_unique[i]})

labels_dict_reversed = {value:key for key, value in labels_dict.items()}

def label_to_num(x):
    return labels_dict_reversed[x]

labels_numeric = [label_to_num(country) for country in labels]

In [7]:
# Smaller subset of features/labels for debuggging
example_features = features[0:1000]
example_labels = labels_numeric[0:1000]
example_features

# x_train = features[0:1000]
# y_train = labels_numeric[0:1000]
# x_test = features[1200:1700]
# y_test = labels_numeric[1200:1700]

[array([-13.34187343, -19.36829073, -42.49815068, ..., -43.39887251,
        -43.39887251,   0.        ]),
 array([-23.34872194, -29.37783568, -54.49912388, ..., -76.85649021,
        -54.45778821,   1.        ]),
 array([-28.60158189, -34.66182907, -51.98141101, ..., -54.56450314,
        -54.56450314,   0.        ]),
 array([-14.29097605, -20.31143719, -44.91353647, ..., -46.83912689,
        -46.83912689,   0.        ]),
 array([-14.35837638, -20.37688539, -64.76234986, ..., -80.15478261,
        -64.79328144,   0.        ]),
 array([-18.73061716, -24.84106592, -64.37371491, ..., -82.7641478 ,
        -72.82986199,   0.        ]),
 array([-18.38457192, -24.41054313, -52.75038135, ..., -79.18400266,
        -79.18400266,   0.        ]),
 array([  8.55978477,   6.74288844,   1.54926666, ..., -59.10018164,
        -53.12519039,   0.        ]),
 array([-22.26235543, -28.25166349, -55.86211006, ..., -86.78397627,
        -73.03322246,   0.        ]),
 array([-100., -100., -100., ..., -10

In [8]:
# Normalize features
example_features = normalize(example_features)

In [9]:
# Splitting into train test batches
random.seed(19)
train_indices = random.sample(range(0,len(example_features)), int(len(example_features)*.8))
test_indices = np.setdiff1d(range(0,len(example_features)), train_indices)

x_train = [example_features[x] for x in train_indices]
y_train = [example_labels[y] for y in train_indices]
x_test = [example_features[x] for x in test_indices]
y_test = [example_labels[y] for y in test_indices]

x_train

[array([-4.49472468e-05, -9.72499609e-07,  3.97053328e-05, ...,
        -2.76824705e-03, -2.30333028e-03,  6.24921193e-05]),
 array([-0.00057058, -0.00080475, -0.00245819, ..., -0.0029672 ,
        -0.00293065,  0.        ]),
 array([-2.62464061e-03, -2.79165579e-03, -2.79165579e-03, ...,
        -2.81886194e-03, -2.77160283e-03,  6.18251943e-05]),
 array([-0.0005942 , -0.00088469, -0.0025648 , ..., -0.00312846,
        -0.00263359,  0.        ]),
 array([-1.70131124e-03, -1.97822063e-03, -2.38302002e-03, ...,
        -3.21184103e-03, -2.80857571e-03,  4.52481332e-05]),
 array([-0.00063553, -0.00096799, -0.00278541, ..., -0.0028949 ,
        -0.0028949 ,  0.        ]),
 array([-1.05936873e-03, -1.35791662e-03, -2.71718009e-03, ...,
        -2.81518520e-03, -2.52013917e-03,  4.95837402e-05]),
 array([-0.00028155, -0.00064618, -0.00269039, ..., -0.00285793,
        -0.00285793,  0.        ]),
 array([-0.00130773, -0.00154441, -0.0021718 , ..., -0.00338284,
        -0.00279274,  0.       

In [10]:
# Reading in metadata

meta = pd.read_csv('audioclassification_meta.csv', delimiter='\t')
meta.head(10)

Unnamed: 0,VoxCeleb1 ID\tVGGFace1 ID\tGender\tNationality\tSet
0,id10001\tA.J._Buckley\tm\tIreland\tdev
1,id10002\tA.R._Rahman\tm\tIndia\tdev
2,id10003\tAamir_Khan\tm\tIndia\tdev
3,id10004\tAaron_Tveit\tm\tUSA\tdev
4,id10005\tAaron_Yoo\tm\tUSA\tdev
5,id10006\tAbbie_Cornish\tf\tAustralia\tdev
6,id10007\tAbigail_Breslin\tf\tUSA\tdev
7,id10008\tAbigail_Spencer\tf\tUSA\tdev
8,id10009\tAdam_Beach\tm\tCanada\tdev
9,id10010\tAdam_Brody\tm\tUSA\tdev


In [11]:
# Initializing number of outputs for neural network

output_label_num = len(labels_unique)

In [12]:
# Creating neural network class

class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(145701, 500)
        self.fc2 = nn.Linear(500, 100)
        self.fc3 = nn.Linear(100, output_label_num)

    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)

        return x 

In [13]:
# Creating function to compute loss/accuracy

def test_model(net, test_dataloader):

    total_loss = 0
    total_classified_correct = 0
    total_attempted = 0

    criterion = nn.CrossEntropyLoss()

    for x, y in test_dataloader:
        output = net(x)
        _, predicted = torch.max(output, 1)
        loss = criterion(output, y)
        total_loss += loss.item()
        total_classified_correct += (predicted==y).sum().item()
        total_attempted += len(predicted)
        
    print('Test Average Loss: ' + str(total_loss/total_attempted))
    print('Test Accuracy: ' + str(total_classified_correct/total_attempted))


In [14]:
# Initializing model, parameters, move to GPU

net = Net()
net = net.cuda()

batch = 1
optimizer = optim.SGD(net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


In [15]:
# Creating training function

def train_model(epochs, net, train_dataloader, optimizer, criterion):

    train_loss_per_epoch = []
    train_acc_per_epoch = []

    for epoch in range (0, epochs):

        train_total_loss = 0
        train_total_correct = 0
        train_total_attempted = 0

        for x, y in train_dataloader:
            optimizer.zero_grad()
            output = net(x)
            _, predicted = torch.max(output, 1)
            loss = criterion(output, y)
            train_total_loss += loss.item()
            train_total_correct += (predicted==y).sum().item()
            train_total_attempted += len(predicted)
            loss.backward()
            optimizer.step()
    
        print('Epoch ' + str(epoch) + ':')
        print('Train Average Loss: ' + str(train_total_loss/train_total_attempted))
        print('Train Accuracy: ' + str(train_total_correct/train_total_attempted))

    return None

# train_model(1, net, train_dataloader, optimizer, criterion)

In [16]:
# Create dataloader objects, move to GPU

x_train_tensor = torch.tensor(x_train, dtype=torch.float)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

x_train_tensor = x_train_tensor.cuda()
y_train_tensor = y_train_tensor.cuda()

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_dataloader = DataLoader(train_dataset, batch_size=batch)

x_test_tensor = torch.tensor(x_test, dtype=torch.float)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

x_test_tensor = x_test_tensor.cuda()
y_test_tensor = y_test_tensor.cuda()

test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_dataloader = DataLoader(test_dataset, batch_size=batch)


In [17]:
# for x, y in train_dataloader:
#   _, predicted = torch.max(net(x), 1)
#   print(predicted==y)
#   print(len(y))

# torch.cuda.empty_cache()

In [18]:
train_model(10, net, train_dataloader, optimizer, criterion)

Epoch 0:
Train Average Loss: 3.259448919296265
Train Accuracy: 0.44625
Epoch 1:
Train Average Loss: 2.76243302077055
Train Accuracy: 0.5
Epoch 2:
Train Average Loss: 2.077234193384647
Train Accuracy: 0.49875
Epoch 3:
Train Average Loss: 1.2312619260698556
Train Accuracy: 0.5
Epoch 4:
Train Average Loss: 0.838072184547782
Train Accuracy: 0.50875
Epoch 5:
Train Average Loss: 0.7524749963730574
Train Accuracy: 0.51375
Epoch 6:
Train Average Loss: 0.7269529186189174
Train Accuracy: 0.51125
Epoch 7:
Train Average Loss: 0.7159790027886629
Train Accuracy: 0.515
Epoch 8:
Train Average Loss: 0.7101530817151069
Train Accuracy: 0.515
Epoch 9:
Train Average Loss: 0.7066337883472442
Train Accuracy: 0.51875


In [19]:
test_model(net, test_dataloader)

Test Average Loss: 0.7100214299559593
Test Accuracy: 0.485
