In [31]:
# 0. import packages
	
import time
import numpy as np
import torch
import torch.cuda
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt

In [32]:
#1.1 GPU stuff

print ("cuda: ", torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print ("current device: ", device)
print ("count: ", torch.cuda.device_count())

if torch.cuda.is_available():
    print ("device name: ", torch.cuda.get_device_name(0))
    torch.cuda.set_device(0)

cuda:  True
current device:  cuda:0
count:  1
device name:  NVIDIA GeForce GTX 1660 Ti


In [33]:
# 1.2 load provided dataset

data = np.load('lab2_dataset.npz')
# training and testing features
train_feats = torch.tensor(data['train_feats'], requires_grad=True)
test_feats = torch.tensor(data['test_feats'], requires_grad=True)
# training and testing labels
train_labels = torch.tensor(data['train_labels'])
test_labels = torch.tensor(data['test_labels'])
# phonemes
phone_labels = data['phone_labels']

print ("train_feats.shape: ", train_feats.shape)
#print ("train_feats: ", train_feats)
print ("--------")

print ("test_feats.shape: ", test_feats.shape)
# print ("test_feats: ", test_feats) 
print ("--------")

print ("train_labels.shape: ", train_labels.shape)
print ("train_labels: ", train_labels) 
print ("--------")

print ("test_labels.shape: ", test_labels.shape)
print ("test_labels: ", test_labels)
print ("--------")

print ("phone_labels.shape: ", phone_labels.shape)
print ("phone_labels: ", phone_labels)

train_feats.shape:  torch.Size([44730, 11, 40])
--------
test_feats.shape:  torch.Size([4773, 11, 40])
--------
train_labels.shape:  torch.Size([44730])
train_labels:  tensor([ 0,  0,  0,  ..., 47, 47, 47])
--------
test_labels.shape:  torch.Size([4773])
test_labels:  tensor([ 0,  0,  0,  ..., 40, 40, 40])
--------
phone_labels.shape:  (48,)
phone_labels:  ['sil' 's' 'ao' 'l' 'r' 'iy' 'vcl' 'd' 'eh' 'cl' 'p' 'ix' 'z' 'ih' 'sh'
 'n' 'v' 'aa' 'y' 'uw' 'w' 'ey' 'dx' 'b' 'ay' 'ng' 'k' 'epi' 'ch' 'dh'
 'er' 'en' 'g' 'aw' 'hh' 'ae' 'ow' 't' 'ax' 'm' 'zh' 'ah' 'el' 'f' 'jh'
 'uh' 'oy' 'th']


In [34]:
#1.3 place tensors on GPU

if torch.cuda.is_available():
    train_feats = train_feats.cuda()
    test_feats = test_feats.cuda()
    train_labels = train_labels.cuda()
    test_labels = test_labels.cuda()

    print ("train_feats.device: ", train_feats.get_device())
    print ("test_feats.device: ", test_feats.get_device())
    print ("train_labels.device: ", train_labels.get_device())
    print ("test_labels.device: ", test_labels.get_device())

train_feats.device:  0
test_feats.device:  0
train_labels.device:  0
test_labels.device:  0


In [35]:
# 2.  Set up the dataloaders
batch_size = 64

train_dataset = torch.utils.data.TensorDataset(train_feats, train_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)

test_dataset = torch.utils.data.TensorDataset(test_feats, test_labels)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, shuffle = False)

Define the model architecture: /*feed-forward*/ | /*convolutional*/ | /*recurrent*/
try: tensor.view(), .reshape(), .transpose(), and .permute()
output: 48-dimensional vector

In [36]:
# 3.1 Feed-Forward Neural Network

class MyFFNN(nn.Module):
    def __init__(self, model_type, input_dim, output_dim):
        super(MyFFNN, self).__init__()
        # store model type
        self.model_type = model_type
        # store input size
        self.input_size = input_dim
        # batch 1
        self.linear1 = nn.Linear(input_dim, 2048)
        self.relu1 = nn.ReLU()
        # batch 2
        self.linear2 = nn.Linear(2048, 2048)
        self.relu2 = nn.ReLU()
        # batch 3
        self.linear3 = nn.Linear(2048, 2048)
        self.relu3 = nn.ReLU()
        # batch 4
        self.linear4 = nn.Linear(2048, 2048)
        self.relu4 = nn.ReLU()
        # batch 5 output
        self.linearOut = nn.Linear(2048, output_dim)

    def forward(self, x):
        # reshape data to work with model
        out = x.reshape(-1, self.input_size)
        # batch 1
        out = self.linear1(out)
        out = self.relu1(out)
        # batch 2
        out = self.linear2(out)
        out = self.relu2(out)
        # batch 3
        out = self.linear3(out)
        out = self.relu3(out)
        # batch 4
        out = self.linear4(out)
        out = self.relu4(out)
        # batch 5 output
        out = self.linearOut(out)
        return out

In [37]:
# 3.2 Convolutional Neural Network

class MyCNN(nn.Module):
    def __init__(self, model_type, output_dim):
        super(MyCNN, self).__init__()
        # store model type
        self.model_type = model_type
        # batch 1:
        self.conv1 = nn.Conv2d(1, 128, kernel_size=3)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        # batch 2
        self.conv2 = nn.Conv2d(128, 128, kernel_size=3)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        # batch 3
        self.flatten = nn.Flatten()
        self.linear3 = nn.Linear(1024, 1024)
        self.relu3 = nn.ReLU()
        # batch 4
        self.linear4 = nn.Linear(1024, 1024)
        self.relu4 = nn.ReLU()
        # batch 5
        self.linear5 = nn.Linear(1024, output_dim)
        
    def forward(self, x):
        # reshape input
        print ("x.shape: ", x.shape)
        x = x[:, None, :, :]
        print ("x.shape: ", x.shape)
        # layer 1
        out = self.conv1(x)
        print ("[conv1] out.shape: ", out.shape)
        out = self.relu1(out)
        print ("[relu1] out.shape: ", out.shape)
        out = self.pool1(out)
        print ("[pool1] out.shape: ", out.shape)
        # layer 2
        out = self.conv2(out)
        print ("[conv2] out.shape: ", out.shape)
        out = self.relu2(out)
        print ("[relu2] out.shape: ", out.shape)
        out = self.pool2(out)
        print ("[pool2] out.shape: ", out.shape)
        # layer 3
        out = self.flatten(out)
        print ("[flatten] out.shape: ", out.shape)
        out = self.linear3(out)
        print ("[linear3] out.shape: ", out.shape)
        out = self.relu3(out)
        print ("[relu3] out.shape: ", out.shape)
        # layer 4
        out = self.linear4(out)
        print ("[linear4] out.shape: ", out.shape)
        out = self.relu4(out)
        print ("[relu4] out.shape: ", out.shape)
        #layer 5
        out = self.linear5(out)
        print ("[linear5] out.shape: ", out.shape)
        return out

In [38]:
# 4. Instantiate the model, loss function, and optimizer

total_examples = train_feats.shape[0]
input_size =  train_feats.shape[1] * train_feats.shape[2]
hidden_dim = 1000
output_size = 48

print ("examples: ", total_examples)
print ("input_size: ", input_size)
print ("output_size: ", output_size)


model = MyFFNN("FFNN", input_size, output_size)
#model = MyCNN("CNN", output_size)

model = model.to(device)
print ("model.device: ", next(model.parameters()).device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)

examples:  44730
input_size:  440
output_size:  48
model.device:  cuda:0


In [39]:
#  6. Evaluate the model on the held-out test data.

def test_network(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

In [40]:
# 7. determine accuracy for each phoneme individually

def compare_lists(list1, list2):
    return [x == y for x, y in zip(list1, list2)]

# https://www.geeksforgeeks.org/python-program-to-sort-a-list-of-tuples-by-second-item/
def sort_tuple_list(tuple_list):
    # reverse = None (Sorts in Ascending order)
    # key is set to sort using second element of
    # sublist lambda has been used
    tuple_list.sort(key = lambda x: x[0])
    return tuple_list

def calculate_phoneme_accuracy(myModel, test_loader):
    correct_list = [0] * 48
    total_list = [0] * 48
    phoneme_accuracy = [0.0] * 48
    with torch.no_grad():
        for data in test_loader:
            # get input data and labels
            inputs, labels = data
            # calculate predictions
            outputs = myModel(inputs)
            _, predicted = torch.max(outputs.data, 1)
            # determine which are correct
            correct = compare_lists(labels, predicted)
            # update lists
            for i in range(len(labels)):
                total_list[labels[i]] += 1
                if (correct[i]):
                    correct_list[labels[i]] += 1
                
    for i in range(48):
        if (total_list[i] != 0):
            phoneme_accuracy[i] = correct_list[i] / total_list[i]
                    
    # print ("total_list: ", total_list)
    # print ("correct_list: ", correct_list)
    # print ("phoneme_accuracy: ", phoneme_accuracy)
    
    return phoneme_accuracy

In [41]:
# 8. print out statistics and plots

def print_stats(myModel, iteration_list, accuracy_list, loss_list):
    # final accuracy plot        
    plt.plot(iteration_list, accuracy_list)
    plt.title("accuracy over time")
    plt.xlabel("iterations")
    plt.ylabel("accuracy")
    plt.show()
    
    # final loss plot        
    plt.plot(iteration_list, loss_list)
    plt.title("loss over time")
    plt.xlabel("iterations")
    plt.ylabel("loss")
    plt.show()
    
    # calculate phoneme accuracy        
    phoneme_accuracy = calculate_phoneme_accuracy(myModel, test_loader)
    # create phoneme and label list
    phoneme_accuracy_labeled = []
    for i in range(48):
        phoneme_accuracy_labeled.append([phoneme_accuracy[i], phone_labels[i]])
    phoneme_accuracy_labeled = sort_tuple_list(phoneme_accuracy_labeled)
    # split back into two
    a, b = zip(*phoneme_accuracy_labeled)
    
    # https://www.geeksforgeeks.org/bar-plot-in-matplotlib/
    # creating the bar plot
    fig, ax = plt.subplots(figsize =(16, 16))
    # Horizontal Bar Plot
    ax.barh(b, a)
    # Add padding between axes and labels
    ax.xaxis.set_tick_params(pad = 5)
    ax.yaxis.set_tick_params(pad = 5)
    # Add x, y gridlines
    ax.grid(visible = True, color ='grey', linestyle ='-.', linewidth = 0.5, alpha = 0.5)
    # Show top values
    ax.invert_yaxis()
    # Add annotation to bars
    for i in ax.patches:
        plt.text(i.get_width(), i.get_y() + 0.6, str(round((i.get_width()), 4)), fontsize = 10, fontweight ='bold', color ='grey')
    # Add Plot Title
    ax.set_title('accuracy of each phoneme', loc ='left')
    # Show Plot
    # Remove x, y Ticks
    plt.xlabel("accuracy")
    plt.ylabel("phonemes")
    plt.show()

In [42]:
# 5. Train the model with stochastic gradient descent, iterating over the training dataset several times

def train_network(epochs, iterations, current_model, train_loader, criterion, optimizer, print_iteration):
    
    startTime = time.time()

    print ("device name: ", torch.cuda.get_device_name(0))
    print ("model.type: ", current_model.model_type)
    print ("model.device: ", next(current_model.parameters()).device)
    
    current_iteration = 0
    
    # lists for data collection
    iteration_list = []
    accuracy_list = []
    loss_list = []

    for epoch in range(epochs):
        print ("epoch: ", epoch)
        for i, (inputs, labels) in enumerate(train_loader, 0):
            print ("inputs: ", inputs.shape)
            print ("labels: ", labels.shape)
            current_iteration = i + ((epoch) * iterations)
            # forward pass
            outputs = current_model(inputs)
            print ("outputs: ", outputs.shape)
            loss = criterion(outputs, labels)
            # backward pass and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # calculate accuracy
            if current_iteration % 100 == 0:  
                test_accuracy = test_network(current_model, test_loader)
                accuracy_list.append(test_accuracy)
                iteration_list.append(current_iteration)
                loss_list.append(loss.item())
            # print stats during training
            if current_iteration % print_iteration == 0:
                test_accuracy = test_network(current_model, test_loader)
                print(f'\t iteration: {current_iteration}\t loss: {loss.item():.3f}\t accuracy: {test_accuracy:.3f} %')
               
            current_iteration += 1         
    # print out stats
    print_stats(current_model, iteration_list, accuracy_list, loss_list)
    print ("time elapsed: ", round((time.time() - startTime), 2), " sec")

In [43]:
# all together now!

epochs = 100
iterations = total_examples / batch_size
iterations = int(iterations)
input_size =  train_feats.shape[1] * train_feats.shape[2]
output_size = 48

print ("epochs: ", epochs)
print ("total_examples: ", total_examples)
print ("iterations per epoch: ", iterations)
print ("batch_size: ", batch_size)
print ("input_size: ", input_size)
print ("output_size: ", output_size)
print ("---------------------------------")


# feed forward
# myModel = MyFFNN("FFNN", input_size, output_size)
# myModel = myModel.to(device)
# myCriterion = nn.CrossEntropyLoss()
# mOptimizer = optim.SGD(myModel.parameters(), lr = 0.001, momentum = 0.9)
# train_network(epochs, iterations, myModel, train_loader, myCriterion, mOptimizer, int(iterations / 3))

print ("---------------------------------")

# convolutional
myModel = MyCNN("CNN", output_size)
myModel = myModel.to(device)
myCriterion = nn.CrossEntropyLoss()
mOptimizer = optim.SGD(myModel.parameters(), lr = 0.001, momentum = 0.9)
train_network(epochs, iterations, myModel, train_loader, myCriterion, mOptimizer, int(iterations / 3))

epochs:  100
total_examples:  44730
iterations per epoch:  698
batch_size:  64
input_size:  440
output_size:  48
---------------------------------
---------------------------------
device name:  NVIDIA GeForce GTX 1660 Ti
model.type:  CNN
model.device:  cuda:0
epoch:  0
inputs:  torch.Size([64, 11, 40])
labels:  torch.Size([64])
x.shape:  torch.Size([64, 11, 40])
x.shape:  torch.Size([64, 1, 11, 40])
[conv1] out.shape:  torch.Size([64, 128, 9, 38])
[relu1] out.shape:  torch.Size([64, 128, 9, 38])
[pool1] out.shape:  torch.Size([64, 128, 4, 19])
[conv2] out.shape:  torch.Size([64, 128, 2, 17])
[relu2] out.shape:  torch.Size([64, 128, 2, 17])
[pool2] out.shape:  torch.Size([64, 128, 1, 8])
[flatten] out.shape:  torch.Size([64, 1024])
[linear3] out.shape:  torch.Size([64, 1024])
[relu3] out.shape:  torch.Size([64, 1024])
[linear4] out.shape:  torch.Size([64, 1024])
[relu4] out.shape:  torch.Size([64, 1024])
[linear5] out.shape:  torch.Size([64, 48])
outputs:  torch.Size([64, 48])
x.shape:

KeyboardInterrupt: 