In [1]:
# Import necessary packages

import numpy as np
import torch 
import torch.nn as nn 
import pandas as pd
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import os 
from sklearn.model_selection import train_test_split
import random
from sklearn.preprocessing import normalize
import torch.nn.functional as F

In [2]:
# Navigate to directory that stores all the files - will need to be changed for each person running this script
os.chdir('/home/ian/Documents/Penn_Fall_2020/CIS_519/CIS 519 Audio Classification Project Local Files')
os.getcwd()

'/home/ian/Documents/Penn_Fall_2020/CIS_519/CIS 519 Audio Classification Project Local Files'

In [3]:
# Reload features (takes a couple minutes)
# features = np.load('features_with_gender.npy',allow_pickle=True)
features = np.load('features/features_spectrogram.npy',allow_pickle=True)

In [4]:
# Changing array type of features (Necessary for it to work with pytorch)

features = [example for example in features]

In [5]:
# Load in labels 
labels = np.load('labels.npy', allow_pickle=True)

In [6]:
# Turn labels into numeric values, create dictionary to map back later 

labels_unique = np.unique(labels)
labels_dict = {}
for i in range (0, len(labels_unique)):
    labels_dict.update({i:labels_unique[i]})

labels_dict_reversed = {value:key for key, value in labels_dict.items()}

def label_to_num(x):
    return labels_dict_reversed[x]

labels_numeric = [label_to_num(country) for country in labels]

In [7]:
# Smaller subset of features/labels for debuggging
example_features = features[0:8000]
example_labels = labels_numeric[0:8000]
example_features

# x_train = features[0:1000]
# y_train = labels_numeric[0:1000]
# x_test = features[1200:1700]
# y_test = labels_numeric[1200:1700]

[array([[-13.34187343, -19.36829073, -42.49815068, ..., -42.49815068,
         -42.49815068, -17.31214748],
        [-13.34148203, -19.3621462 , -42.49815068, ..., -42.49815068,
         -42.49815068, -17.30967156],
        [-13.31530769, -19.34300726, -42.49815068, ..., -42.49815068,
         -42.49815068, -17.29370092],
        ...,
        [-42.49815068, -42.49815068, -42.49815068, ..., -42.49815068,
         -42.49815068, -42.49815068],
        [-42.49815068, -42.49815068, -42.49815068, ..., -42.49815068,
         -42.49815068, -42.49815068],
        [-42.49815068, -42.49815068, -42.49815068, ..., -42.49815068,
         -42.49815068, -42.49815068]]),
 array([[-23.34872194, -29.37783568, -54.49912388, ..., -54.49912388,
         -54.49912388, -28.15392076],
        [-23.34843555, -29.36979102, -54.49912388, ..., -54.49912388,
         -54.49912388, -28.16820798],
        [-23.33722839, -29.35470342, -54.49912388, ..., -54.49912388,
         -54.49912388, -28.16610406],
        ...,


In [8]:
# Normalize features
# example_features = normalize(example_features)

In [9]:
# Splitting into train test batches
random.seed(19)
train_indices = random.sample(range(0,len(example_features)), int(len(example_features)*.8))
test_indices = np.setdiff1d(range(0,len(example_features)), train_indices)

x_train = [example_features[x] for x in train_indices]
y_train = [example_labels[y] for y in train_indices]
x_test = [example_features[x] for x in test_indices]
y_test = [example_labels[y] for y in test_indices]

len(x_train[0])

1025

In [10]:
# Reading in metadata

meta = pd.read_csv('audioclassification_meta.csv', delimiter='\t')
meta.head(10)

Unnamed: 0,VoxCeleb1 ID\tVGGFace1 ID\tGender\tNationality\tSet
0,id10001\tA.J._Buckley\tm\tIreland\tdev
1,id10002\tA.R._Rahman\tm\tIndia\tdev
2,id10003\tAamir_Khan\tm\tIndia\tdev
3,id10004\tAaron_Tveit\tm\tUSA\tdev
4,id10005\tAaron_Yoo\tm\tUSA\tdev
5,id10006\tAbbie_Cornish\tf\tAustralia\tdev
6,id10007\tAbigail_Breslin\tf\tUSA\tdev
7,id10008\tAbigail_Spencer\tf\tUSA\tdev
8,id10009\tAdam_Beach\tm\tCanada\tdev
9,id10010\tAdam_Brody\tm\tUSA\tdev


In [11]:
# Initializing number of outputs for neural network

output_label_num = len(labels_unique)

In [12]:
# Creating neural network class

# Simple Feedforward

# class Net(nn.Module):
#     def __init__(self):
#         super().__init__()

#         self.fc1 = nn.Linear(145701, 500)
#         self.fc2 = nn.Linear(500, 100)
#         self.fc3 = nn.Linear(100, output_label_num)

#     def forward(self, x):
#         x = nn.functional.relu(self.fc1(x))
#         x = nn.functional.relu(self.fc2(x))
#         x = self.fc3(x)

#         return x 
    
# Convolutional 

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=1,out_channels=6,kernel_size=3,stride=1,padding=0)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(in_channels=6,out_channels=10,kernel_size=3,stride=1,padding=0)
        self.fc1 = nn.Linear(10*254*10*1, 300)
        self.fc2 = nn.Linear(300, 100)
        self.fc3 = nn.Linear(100, output_label_num)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 1*10*10*254)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    
    

In [39]:
# Creating function to compute loss/accuracy

def test_model(net, test_dataloader):

    total_loss = 0
    total_classified_correct = 0
    total_attempted = 0
    pred_vs_actual = []

    criterion = nn.CrossEntropyLoss()

    for x, y in test_dataloader:
        output = net(x)
        _, predicted = torch.max(output, 1)
        loss = criterion(output, y)
        total_loss += loss.item()
        total_classified_correct += (predicted==y).sum().item()
        total_attempted += len(predicted)
        pred_vs_actual.append((predicted.item(), y.item()))
        
    print('Test Average Loss: ' + str(total_loss/total_attempted))
    print('Test Accuracy: ' + str(total_classified_correct/total_attempted))
    
    
    return total_attempted, pred_vs_actual

In [67]:
def check_preds(pred_vs_acc):
    pred_list = []
    acc_list = []
    total_list = []
    for pred, acc in pred_vs_acc:
        pred_list.append(pred)
        acc_list.append(acc)
        total_list.append(1)
        
    df = pd.DataFrame({'predicted':pred_list, 'actual':acc_list, 'total':total_list})
    df = df.groupby(['predicted', 'actual']).count().reset_index()
    
    def get_correct(row):
        if row.predicted==row.actual:
            return 1 
        else:
            return 0
        
    df['is_correct'] = df.apply(lambda x: get_correct(x), axis=1)
    df['predicted_country'] = df.predicted.apply(lambda x: labels_dict[x])
    df['actual_country'] = df.actual.apply(lambda x: labels_dict[x])
    
    return df

In [43]:
df = pd.DataFrame(columns=['1','3'])
df

Unnamed: 0,1,3


In [28]:
# Initializing model, parameters, move to GPU

net = Net()
net = net.cuda()

batch = 1
optimizer = optim.SGD(net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


In [29]:
# Creating training function

def train_model(epochs, net, train_dataloader, optimizer, criterion):

    train_loss_per_epoch = []
    train_acc_per_epoch = []

    for epoch in range (0, epochs):

        train_total_loss = 0
        train_total_correct = 0
        train_total_attempted = 0

        for x, y in train_dataloader:
            optimizer.zero_grad()
            output = net(x)
            _, predicted = torch.max(output, 1)
            loss = criterion(output, y)
            train_total_loss += loss.item()
            train_total_correct += (predicted==y).sum().item()
            train_total_attempted += len(predicted)
            loss.backward()
            optimizer.step()
    
        print('Epoch ' + str(epoch) + ':')
        print('Train Average Loss: ' + str(train_total_loss/train_total_attempted))
        print('Train Accuracy: ' + str(train_total_correct/train_total_attempted))

    return None

# train_model(1, net, train_dataloader, optimizer, criterion)

In [30]:
# Create dataloader objects, move to GPU

x_train_tensor = torch.tensor(x_train, dtype=torch.float)
x_train_tensor = x_train_tensor.unsqueeze(dim=1)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

x_train_tensor = x_train_tensor.cuda()
y_train_tensor = y_train_tensor.cuda()

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_dataloader = DataLoader(train_dataset, batch_size=batch)

x_test_tensor = torch.tensor(x_test, dtype=torch.float)
x_test_tensor = x_test_tensor.unsqueeze(dim=1)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

x_test_tensor = x_test_tensor.cuda()
y_test_tensor = y_test_tensor.cuda()

test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_dataloader = DataLoader(test_dataset, batch_size=batch)


In [31]:
x_train_tensor.shape

torch.Size([6400, 1, 1025, 47])

In [32]:
train_model(20, net, train_dataloader, optimizer, criterion)

Epoch 0:
Train Average Loss: 3.2031020141020417
Train Accuracy: 0.0965625
Epoch 1:
Train Average Loss: 2.89861531652743
Train Accuracy: 0.1671875
Epoch 2:
Train Average Loss: 2.5817747192265235
Train Accuracy: 0.24546875
Epoch 3:
Train Average Loss: 2.157073152951116
Train Accuracy: 0.36734375
Epoch 4:
Train Average Loss: 1.645644987329244
Train Accuracy: 0.5196875
Epoch 5:
Train Average Loss: 1.2351327674321015
Train Accuracy: 0.63640625
Epoch 6:
Train Average Loss: 0.9742152449741163
Train Accuracy: 0.708125
Epoch 7:
Train Average Loss: 0.7950826493070609
Train Accuracy: 0.7621875
Epoch 8:
Train Average Loss: 0.6738721960834116
Train Accuracy: 0.80140625
Epoch 9:
Train Average Loss: 0.5234476108404078
Train Accuracy: 0.8446875
Epoch 10:
Train Average Loss: 0.47705423382791046
Train Accuracy: 0.85921875
Epoch 11:
Train Average Loss: 0.42508762772201325
Train Accuracy: 0.87890625
Epoch 12:
Train Average Loss: 0.41606336789989046
Train Accuracy: 0.8775
Epoch 13:
Train Average Loss: 0.36

In [68]:
pred_vs_acc = test_model(net, test_dataloader)[1]

Test Average Loss: 9.813248497753166
Test Accuracy: 0.118125


In [76]:
check_preds(pred_vs_acc)[check_preds(pred_vs_acc)['is_correct']==1].sort_values(by='total', ascending=False)

Unnamed: 0,predicted,actual,total,is_correct,predicted_country,actual_country
311,17,17,22,1,New Zealand,New Zealand
175,10,10,17,1,India,India
0,0,0,16,1,Australia,Australia
29,1,1,16,1,Austria,Austria
148,8,8,13,1,Germany,Germany
210,12,12,13,1,Ireland,Ireland
58,3,3,12,1,Canada,Canada
469,30,30,12,1,Sweden,Sweden
399,23,23,9,1,Russia,Russia
550,34,34,8,1,USA,USA


In [77]:
check_preds(pred_vs_acc)[check_preds(pred_vs_acc)['is_correct']==0].sort_values(by='total', ascending=False)

Unnamed: 0,predicted,actual,total,is_correct,predicted_country,actual_country
298,17,0,15,0,New Zealand,Australia
214,12,17,14,0,Ireland,New Zealand
7,0,8,14,0,Australia,Germany
300,17,3,14,0,New Zealand,Canada
177,10,12,14,0,India,Ireland
...,...,...,...,...,...,...
260,15,1,1,0,Mexico,Austria
258,14,34,1,0,Italy,USA
255,14,20,1,0,Italy,Philippines
254,14,18,1,0,Italy,Norway
