In [71]:
import os
import numpy as np
import pandas as pd

In [72]:
import soundfile as sf
from python_speech_features import mfcc, logfbank

In [73]:
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

In [74]:
train_folder_path='Dataset/train/'
train_pickle='train_map.pickle'

In [75]:
test_folder_path='Dataset/test/'
test_pickle='test_map.pickle'

In [76]:
def mfcc_calculation(y,rate):
    return np.mean(mfcc(y[int(1*rate):int(9*rate)]).T,axis=0)

In [77]:
class Dataset(Dataset):
    def __init__(self,folder_path,pickle):
        self.pickle=pd.read_pickle(pickle)
        self.folder_path=folder_path
        
    def __len__(self):
        return self.pickle.shape[0]
    
    def __getitem__(self,idx):
        filename=self.pickle['filename'].loc[idx]
        label=self.pickle['label'].loc[idx]
        file_path=self.folder_path+filename
        data_,_ = sf.read(file_path)
        data=mfcc_calculation(data_,_).T
        return data,label

In [78]:
train=Dataset(train_folder_path,train_pickle)

In [79]:
train[2500]

(array([ 0.16466813,  0.4475323 , -7.44830319, ..., -6.58984447,
        -5.5760523 , -5.40538994]), 2)

In [80]:
train_loader=DataLoader(train,batch_size=10,shuffle=True)

In [81]:
dataiter=iter(train_loader)

In [82]:
data,label=dataiter.next()

In [83]:
data

tensor([[ -4.6546,  -4.8721,  -3.2247,  ...,   1.9475,  -0.5086,  -7.7759],
        [ -6.6595,  -4.2734,  -4.5027,  ...,  -3.2101,  -6.7514,  -6.5088],
        [ -0.7924,   0.1814,  -0.4557,  ...,  -1.6499,  -2.0457,  -3.3162],
        ...,
        [ -2.7792,  -4.3989,  -3.2863,  ...,  -1.5822,  -2.9771,  -9.8651],
        [  0.5290,   1.5440,   1.0050,  ...,   1.7289,  -1.0923,   0.0842],
        [ -6.1970,  -4.6316, -10.2945,  ...,  -3.6216,  -7.7341,  -9.8853]],
       dtype=torch.float64)

In [84]:
data[0].shape

torch.Size([1101])

In [85]:
label[0]

tensor(2)

In [86]:
import torch.nn as nn
import torch.nn.functional as F

In [87]:
import torch.optim

In [88]:
# define the NN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # number of hidden nodes in each layer (512)
        hidden_1 = 512
        hidden_2 = 256
        # linear layer (784 -> hidden_1)
        self.fc1 = nn.Linear(1101, hidden_1)
        # linear layer (n_hidden -> hidden_2)
        self.fc2 = nn.Linear(hidden_1, hidden_2)
        # linear layer (n_hidden -> 10)
        self.fc3 = nn.Linear(hidden_2, 3)
        # dropout layer (p=0.2)
        # dropout prevents overfitting of data

    def forward(self, x):
        # add hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        # add hidden layer, with relu activation function
        x = F.relu(self.fc2(x))
        # add output layer
        x = self.fc3(x)
        return x

# initialize the NN
model = Net()
print(model)

Net(
  (fc1): Linear(in_features=1101, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=3, bias=True)
)


In [89]:
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()

# specify optimizer (stochastic gradient descent) and learning rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [90]:
data

tensor([[ -4.6546,  -4.8721,  -3.2247,  ...,   1.9475,  -0.5086,  -7.7759],
        [ -6.6595,  -4.2734,  -4.5027,  ...,  -3.2101,  -6.7514,  -6.5088],
        [ -0.7924,   0.1814,  -0.4557,  ...,  -1.6499,  -2.0457,  -3.3162],
        ...,
        [ -2.7792,  -4.3989,  -3.2863,  ...,  -1.5822,  -2.9771,  -9.8651],
        [  0.5290,   1.5440,   1.0050,  ...,   1.7289,  -1.0923,   0.0842],
        [ -6.1970,  -4.6316, -10.2945,  ...,  -3.6216,  -7.7341,  -9.8853]],
       dtype=torch.float64)

In [91]:
output=model(data.float())

In [92]:
output

tensor([[-0.9521,  0.7267, -0.8000],
        [-0.6991,  0.9954, -0.7815],
        [-0.8080,  0.5246, -0.4593],
        [-0.0091,  0.6066, -0.2689],
        [-0.9201,  0.7026, -0.1274],
        [-0.7994,  1.0447, -0.6364],
        [-0.0884,  1.2978, -0.3389],
        [-0.4192, -0.5365, -0.2734],
        [-0.5232,  0.2706, -0.0190],
        [-0.5301,  0.2486, -0.1899]], grad_fn=<AddmmBackward>)

In [93]:
criterion(output,label)

tensor(1.3551, grad_fn=<NllLossBackward>)

In [94]:
# number of epochs to train the model
n_epochs = 10

# initialize tracker for minimum validation loss
valid_loss_min = np.Inf # set initial "min" to infinity

for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    valid_loss = 0.0
    
    ###################
    # train the model #
    ###################
    model.train() # prep model for training
    for data, target in train_loader:
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        #print(data,target)
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data.float())
        # calculate the loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*data.size(0)
        
           
    # print training/validation statistics 
    # calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.sampler)
    
    print('Epoch: {} \tTraining Loss: {:.6f} '.format(
        epoch+1, 
        train_loss
        ))

Epoch: 1 	Training Loss: 1.122048 
Epoch: 2 	Training Loss: 1.049255 
Epoch: 3 	Training Loss: 0.994045 
Epoch: 4 	Training Loss: 0.938933 
Epoch: 5 	Training Loss: 0.862856 
Epoch: 6 	Training Loss: 0.778864 
Epoch: 7 	Training Loss: 0.717588 
Epoch: 8 	Training Loss: 0.672055 
Epoch: 9 	Training Loss: 0.598805 
Epoch: 10 	Training Loss: 0.559573 


In [95]:
torch.save(model.state_dict(), 'model.pt')

In [96]:
test=Dataset(test_folder_path,test_pickle)

In [97]:
test_loader=DataLoader(test,batch_size=16,shuffle=True)

In [98]:
# initialize lists to monitor test loss and accuracy
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))


model.eval() # prep model for evaluation

for data, target in test_loader:
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data.float())
    # calculate the loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct = np.squeeze(pred.eq(target.data.view_as(pred)))
    # calculate test accuracy for each object class
    for i in range(len(target)):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1



acc=total_true/pd.read_pickle('test_map.pickle').shape[0]
# calculate and print avg test loss
test_loss = test_loss/len(test_loader.sampler)
print('Test Loss: {:.6f}\n'.format(test_loss))

Test Loss: 4.523189

