In [1]:
%matplotlib inline
import IPython.display

# Basic Imports
import gzip
import cPickle as pickle
import pandas as pd
import random
import seaborn
# import librosa
import sklearn
import numpy as np
from matplotlib import pyplot as plt

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data_utils
from torch.autograd import Variable

## Load Data

In [2]:
# Loaded Spoken Digits Dataset
dbfile ='../vae/SpokenDigitDB.pkl.gz'
with gzip.open(dbfile, 'rb') as ifile:
    df = pd.read_pickle(ifile)
    print('File loaded as '+ dbfile)

File loaded as ../vae/SpokenDigitDB.pkl.gz


In [3]:
# Padding & Truncating
maxlen = 84
pad    = lambda a, n: a[:,0: n] if a.shape[1] > n else np.hstack((a, np.min(a[:])*np.ones([a.shape[0],n - a.shape[1]])))
df.Magnitude = df.Magnitude.apply(pad,args=(maxlen,))  # MaxLen Truncation Voodoo :D
print(np.unique([np.shape(x)[1] for x in df.Magnitude]))

[84]


In [4]:
# Random Sample
sr = 8000  
j  = random.randrange(len(df))
IPython.display.Audio(data=df.Wave[j], rate=sr)

## Data Preparation

In [5]:
# Prepare Data
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

# Train Scaler
x_data = df.Magnitude.values
normsc = np.hstack(x_data)
scaler = MinMaxScaler().fit(normsc.T)

# Transform Data using Scaler
x_data = [scaler.transform(arr.T).T for arr in df.Magnitude.values]
x_data = np.dstack(x_data).transpose(2,0,1)

# Add Singleton
# x_data = x_data[...,None]         # Add singleton class
# y_data = pd.get_dummies(df.Class).values # One Hot Encoding
y_data = df.Class.cat.codes.values.astype('int')

# Shuffle & Split
x_train,x_test,y_train,y_test=train_test_split(x_data,y_data,
                              test_size=0.33, random_state=32)

# Print Dimensions
print 'Training Feature size:', x_train.shape
print 'Training Target  size:', y_train.shape
print ''
print 'Testing  Feature size:', x_test.shape
print 'Testing  Target  size:', y_test.shape

Training Feature size: (335, 64, 84)
Training Target  size: (335,)

Testing  Feature size: (166, 64, 84)
Testing  Target  size: (166,)


In [6]:
# Create Torch DataLoader
# Training Set
feats  = torch.from_numpy(x_train)
targs  = torch.from_numpy(y_train)
dtrain = data_utils.TensorDataset(feats, targs)

# Testing Set
feats  = torch.from_numpy(x_test)
targs  = torch.from_numpy(y_test)
dtest  = data_utils.TensorDataset(feats, targs)

# Loaders
tr_loader = data_utils.DataLoader(dtrain,batch_size=10,shuffle=True,drop_last=True)
ts_loader = data_utils.DataLoader(dtest, batch_size=5, shuffle=True,drop_last=True)

## Recurrent Network Model

In [7]:
# Input Dimensions
_,fbins,steps = tr_loader.dataset.data_tensor.shape
nclass = len(np.unique(tr_loader.dataset.target_tensor))

# Parameters
L1 = 32
L2 = 20
L3 = 16

num_epochs = 20
batch_size = tr_loader.batch_size

In [8]:
# StateLess
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__() 
        self.gru1 = nn.GRUCell(fbins,L1).double()
        self.gru2 = nn.GRUCell(L1,L2).double()
        self.fc3  = nn.Linear(L2,L3).double()
        self.fc4  = nn.Linear(L3,nclass).double()
              
    def forward(self,inputs):
        h1 = Variable(torch.zeros(1, L1)).double()
        h2 = Variable(torch.zeros(1, L2)).double()
        for x in inputs:
            h1 = self.gru1(x,h1)
            h2 = self.gru2(h1,h2)            
        ofc3 = F.relu(self.fc3(h2))
        out = self.fc4(ofc3)
        return out

In [32]:
# StateFul
class Network2(nn.Module):
    def __init__(self):
        super(Network2, self).__init__() 
        self.gru1 = nn.GRUCell(fbins,L1).double()
        self.gru2 = nn.GRUCell(L1,L2).double()
        self.fc3  = nn.Linear(L2,L3).double()
        self.fc4  = nn.Linear(L3,nclass).double()
        
        self.h1   = Variable(torch.zeros(1, L1)).double()
        self.h2   = Variable(torch.zeros(1, L2)).double()
              
    def forward(self,inputs):
        h1 = self.h1
        h2 = self.h2
        for x in inputs:
            h1 = self.gru1(x,h1)
            h2 = self.gru2(h1,h2)
        
        self.h1 = h1
        self.h2 = h2
        
        ofc3 = F.relu(self.fc3(self.h2))
        out = self.fc4(ofc3)
        return out
    
    def init_hidden(self):
        self.h1 = Variable(torch.zeros(1, L1)).double()
        self.h2 = Variable(torch.zeros(1, L2)).double()
        return

In [36]:
# Loss and Optimizer
model = Network2()
lr    = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [37]:
# Train Model
for epoch in range(num_epochs):
    model.init_hidden()
    for i, (x,y) in enumerate(tr_loader):
        x = Variable(x.permute(2,0,1)) 
        y = Variable(y)   # Make variables

        optimizer.zero_grad()     # Zero gradients
        out  = model(x)           # Forward
        loss = criterion(out,y)   # Compute Loss
#         loss.backward()           # Backward
        loss.backward(retain_graph=True)           # Backward
        optimizer.step()          # Optimize
        
    print ('Epoch [%d/%d], Loss: %.4f' %(epoch+1, num_epochs, loss.data[0]))

print('Finished Training')

Epoch [1/20], Loss: 2.2925
Epoch [2/20], Loss: 2.2337
Epoch [3/20], Loss: 2.2246
Epoch [4/20], Loss: 2.2445
Epoch [5/20], Loss: 2.0657
Epoch [6/20], Loss: 1.7517
Epoch [7/20], Loss: 1.5179
Epoch [8/20], Loss: 1.7324
Epoch [9/20], Loss: 2.0176
Epoch [10/20], Loss: 2.0413
Epoch [11/20], Loss: 2.1895
Epoch [12/20], Loss: 1.6557
Epoch [13/20], Loss: 1.5920
Epoch [14/20], Loss: 1.2620
Epoch [15/20], Loss: 1.3822
Epoch [16/20], Loss: 1.2661
Epoch [17/20], Loss: 1.3792
Epoch [18/20], Loss: 1.3578
Epoch [19/20], Loss: 1.8078
Epoch [20/20], Loss: 1.2015
Finished Training
