In [1]:
import torchaudio
import torch
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
from ast import literal_eval

from helpers.AudioVisionDataset import AudioVisionDataset
from helpers.ModelLayout import AudioVisionModel
from torchsummary import summary

In [2]:
features_file = "../DataAnalysis/8KFeatures.csv"

In [3]:
# featuresdf = pd.read_csv(features_file)
# temp = literal_eval(featuresdf['feature'][0])
# print(temp)
# print(len(temp))

In [4]:
av_dataset = AudioVisionDataset(features_file)

In [5]:
train_size = int(0.667 * len(av_dataset))
test_size = len(av_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(av_dataset, [train_size, test_size])

In [6]:
model = AudioVisionModel()

In [14]:
import torch.utils.data as data
train_data_loader = data.DataLoader(train_dataset, shuffle = True)
test_data_loader = data.DataLoader(test_dataset, shuffle = True)

In [15]:
summary(model, (1,14, 109))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 14, 13, 108]              70
         MaxPool2d-2            [-1, 14, 6, 54]               0
            Conv2d-3            [-1, 28, 5, 53]           1,596
         MaxPool2d-4            [-1, 28, 2, 26]               0
            Linear-5                   [-1, 64]          93,248
           Dropout-6                   [-1, 64]               0
            Linear-7                   [-1, 10]             650
Total params: 95,564
Trainable params: 95,564
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.25
Params size (MB): 0.36
Estimated Total Size (MB): 0.62
----------------------------------------------------------------


In [35]:
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn

loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.7)

In [36]:
import time

def trainNet(net, n_epochs):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", 174)
    print("epochs=", n_epochs)
    print("learning_rate=", 0.001)
    print("=" * 30)
    
    n_batches = len(train_data_loader)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        count = 0
        
        for i, data in enumerate(train_data_loader, 0):
            count+= 1
            
            #Get inputs
            inputs, labels = data
            
            #Wrap them in a Variable object
            inputs, labels = Variable(inputs.unsqueeze(0)), Variable(labels)

            #Set the parameter gradients to zero
            optimizer.zero_grad()
            
            #Forward pass, backward pass, optimize
            outputs = net(inputs)
            loss_size = loss(outputs, labels)
            loss_size.backward()
            optimizer.step()
            
            #Print statistics
            running_loss += loss_size.data
            total_train_loss += loss_size.data
            
            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()
            
        #At the end of the epoch, do a pass on the validation set
        total_val_loss = 0
        for inputs, labels in test_data_loader:
            
            count += 1
            #Wrap tensors in Variables
            inputs, labels = Variable(inputs.unsqueeze(0)), Variable(labels)
            
            #Forward pass
            val_outputs = net(inputs)
            val_loss_size = loss(val_outputs, labels)
            total_val_loss += val_loss_size.data
            
        print("Validation loss = {:.2f}".format(total_val_loss / len(test_data_loader)))
        print(count)
        
    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))

In [37]:
trainNet(model, 2)

===== HYPERPARAMETERS =====
batch_size= 174
epochs= 2
learning_rate= 0.001
Epoch 1, 10% 	 train_loss: 2.28 took: 4.51s
Epoch 1, 20% 	 train_loss: 2.26 took: 4.47s
Epoch 1, 30% 	 train_loss: 2.26 took: 4.64s
Epoch 1, 40% 	 train_loss: 2.25 took: 5.73s
Epoch 1, 50% 	 train_loss: 2.26 took: 6.98s
Epoch 1, 60% 	 train_loss: 2.27 took: 4.69s
Epoch 1, 70% 	 train_loss: 2.25 took: 4.95s
Epoch 1, 80% 	 train_loss: 2.27 took: 4.54s
Epoch 1, 90% 	 train_loss: 2.26 took: 4.64s
Validation loss = 2.26
8732
Epoch 2, 10% 	 train_loss: 2.26 took: 4.69s
Epoch 2, 20% 	 train_loss: 2.25 took: 4.89s
Epoch 2, 30% 	 train_loss: 2.27 took: 4.67s
Epoch 2, 40% 	 train_loss: 2.26 took: 4.74s
Epoch 2, 50% 	 train_loss: 2.25 took: 4.46s
Epoch 2, 60% 	 train_loss: 2.28 took: 4.51s
Epoch 2, 70% 	 train_loss: 2.27 took: 4.38s
Epoch 2, 80% 	 train_loss: 2.25 took: 4.40s
Epoch 2, 90% 	 train_loss: 2.26 took: 4.41s
Validation loss = 2.26
8732
Training finished, took 127.05s
