In [1]:
import torchaudio
import torch
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
from ast import literal_eval

from helpers.AudioVisionDataset import AudioVisionDataset
from helpers.ModelLayout import AudioVisionModel
from torchsummary import summary

In [2]:
features_file = "../DataAnalysis/Features.csv"

In [3]:
av_dataset = AudioVisionDataset(features_file)

In [4]:
train_size = int(0.667 * len(av_dataset))
test_size = len(av_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(av_dataset, [train_size, test_size])

In [5]:
model = AudioVisionModel()

In [6]:
import torch.utils.data as data
train_data_loader = data.DataLoader(train_dataset, shuffle = True)
test_data_loader = data.DataLoader(test_dataset, shuffle = True)

In [7]:
summary(model, (1,14))

torch.Size([2, 1, 14])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1               [-1, 14, 14]              28
         MaxPool1d-2                [-1, 14, 7]               0
            Conv1d-3                [-1, 28, 7]             420
         MaxPool1d-4                [-1, 28, 3]               0
            Linear-5                   [-1, 64]           5,440
           Dropout-6                   [-1, 64]               0
            Linear-7                    [-1, 2]             130
Total params: 6,018
Trainable params: 6,018
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.02
Estimated Total Size (MB): 0.03
----------------------------------------------------------------


In [8]:
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn

loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [20]:
import time

def trainNet(net, n_epochs):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", 174)
    print("epochs=", n_epochs)
    print("learning_rate=", 0.001)
    print("=" * 30)
    
    n_batches = len(train_data_loader)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        
        for i, data in enumerate(train_data_loader, 0):
            
            #Get inputs
            inputs, labels = data
            
            #Wrap them in a Variable object
            inputs, labels = Variable(inputs.unsqueeze(0)), Variable(labels)

            #Set the parameter gradients to zero
            optimizer.zero_grad()
            
            #Forward pass, backward pass, optimize
            outputs = net(inputs)
            loss_size = loss(outputs, labels)
            loss_size.backward()
            optimizer.step()
            
            #Print statistics
            running_loss += loss_size.data[0]
            total_train_loss += loss_size.data[0]
            
            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()
            
        #At the end of the epoch, do a pass on the validation set
        total_val_loss = 0
        for inputs, labels in test_data_loader:
            
            #Wrap tensors in Variables
            inputs, labels = Variable(inputs), Variable(labels)
            
            #Forward pass
            val_outputs = net(inputs)
            val_loss_size = loss(val_outputs, labels)
            total_val_loss += val_loss_size.data[0]
            
        print("Validation loss = {:.2f}".format(total_val_loss / len(test_data_loader)))
        
    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))

In [21]:
trainNet(model, 2)

===== HYPERPARAMETERS =====
batch_size= 174
epochs= 2
learning_rate= 0.001
torch.Size([1, 1, 14])


RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'target'