In [4]:
%matplotlib inline
import IPython.display

# Basic Imports
import gzip
import cPickle as pickle
import pandas as pd
import random
import seaborn
# import librosa
import sklearn
import numpy as np
from matplotlib import pyplot as plt

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data_utils
from torch.autograd import Variable

In [54]:
ls -lah ../vae/

total 44M
drwxr-xr-x   32 Junior 1.0K Nov  4 17:56 ./
drwxr-xr-x+ 118 Junior 3.7K Dec 16 12:36 ../
-rw-r--r--    1 Junior 6.1K Jun  1  2017 .DS_Store
drwxr-xr-x   15 Junior  480 Dec 16 12:36 .git/
drwxr-xr-x   11 Junior  352 Nov  3 09:47 .ipynb_checkpoints/
-rw-r--r--    1 Junior 481K Oct 11 08:35 Audio VAE Mag & Phase.ipynb
-rw-r--r--    1 Junior 2.8M Sep 17 18:39 DCT Tricks.ipynb
-rw-r--r--    1 Junior 186K Oct 12 12:43 GAN Tutorial.ipynb
-rw-r--r--    1 Junior   57 May  2  2017 README.md
-rw-r--r--    1 Junior 447K Nov  2 13:04 RecoNet Model2.ipynb
-rw-r--r--    1 Junior 445K Nov  3 09:47 RecoNet.ipynb
-rw-r--r--    1 Junior 5.8K Oct 22 21:50 RecoNet.py
-rw-r--r--    1 Junior 157K Oct 24 22:43 Robustness of DNN Activations.ipynb
-rw-r--r--    1 Junior  21K Oct 22 21:46 Seq2Seq Tutorial.ipynb
-rw-r--r--    1 Junior 1.2M Oct 12 12:36 Spoken Digits VAE.ipynb
-rw-r--r--    1 Junior  19M May  3  2017 SpokenDigitDB.pkl.gz
-rw-r--r--    1 Junior  35K May 16  2017 VAE Audio

## Load Data

In [80]:
# Loaded Spoken Digits Dataset
dbfile ='../vae/SpokenDigitDB.pkl.gz'
with gzip.open(dbfile, 'rb') as ifile:
    df = pd.read_pickle(ifile)
    print('File loaded as '+ dbfile)

File loaded as ../vae/SpokenDigitDB.pkl.gz


In [189]:
# Padding & Truncating
maxlen = 84
pad    = lambda a, n: a[:,0: n] if a.shape[1] > n else np.hstack((a, np.min(a[:])*np.ones([a.shape[0],n - a.shape[1]])))
df.Magnitude = df.Magnitude.apply(pad,args=(maxlen,))  # MaxLen Truncation Voodoo :D
print(np.unique([np.shape(x)[1] for x in df.Magnitude]))

[84]


In [86]:
# Random Sample
sr = 8000  
j  = random.randrange(len(df))
IPython.display.Audio(data=df.Wave[j], rate=sr)

## Data Preparation

In [262]:
# Prepare Data
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

# Train Scaler
x_data = df.Magnitude.values
normsc = np.hstack(x_data)
scaler = MinMaxScaler().fit(normsc.T)

# Transform Data using Scaler
x_data = [scaler.transform(arr.T).T for arr in df.Magnitude.values]
x_data = np.dstack(x_data).transpose(2,0,1)

# Add Singleton
# x_data = x_data[...,None]         # Add singleton class
# y_data = df.Class.cat.codes.values
y_data = pd.get_dummies(df.Class).values
# y_data = y_data.astype('int')

# Shuffle & Split
x_train,x_test,y_train,y_test=train_test_split(x_data,y_data,
                              test_size=0.33, random_state=32)

# Print Dimensions
print 'Training Feature size:', x_train.shape
print 'Training Target  size:', y_train.shape
print ''
print 'Testing  Feature size:', x_test.shape
print 'Testing  Target  size:', y_test.shape

Training Feature size: (335, 64, 84)
Training Target  size: (335, 10)

Testing  Feature size: (166, 64, 84)
Testing  Target  size: (166, 10)


In [261]:
x_data.transpose(1,2,0).shape

(84, 501, 64)

In [118]:
# Create Torch DataLoader
feats = torch.from_numpy(x_train)
targs = torch.from_numpy(y_train)

dtrain = data_utils.TensorDataset(features, targets)
loader = data_utils.DataLoader(train,batch_size=10,shuffle=True)

In [56]:
## Recurrent Network Model

In [247]:
# Input Dimensions
_,fbins,steps = x_data.shape
nclass = len(np.unique(y_data))

# Parameters
L1 = 32
L2 = 20
L3 = 16

In [120]:
# Model
model = torch.nn.Sequential(
    nn.GRU(fbins,L1,1),
    nn.GRU(L1,L2,1),
    nn.GRU(L2,L3,1),
    nn.Linear(L3,nclass)
)

model.double()

Sequential(
  (0): GRU(64, 32)
  (1): GRU(32, 20)
  (2): GRU(20, 16)
  (3): Linear(in_features=16, out_features=2)
)

In [342]:
# Using GRU layers, Without Time Loop
gru1 = nn.GRU(fbins,L1).double()
gru2 = nn.GRU(L1,L2).double()
fc3  = nn.Linear(L2,L3).double()
fc4  = nn.Linear(L3,nclass).double()


for i, data in enumerate(loader):
    outputs = []
    x,y = data
    x,y = Variable(x.permute(2,0,1)),Variable(y)
    
    h1 = Variable(torch.zeros(1,x.size(1), L1)).double()
    h2 = Variable(torch.zeros(1,x.size(1), L2)).double()

    o1,h1 = gru1(x,h1)     # return output sequence o1
    o2,h2 = gru2(o1,h2)    # return output sequence o1
    lin  = F.relu(fc3(h2)) # use last state
    out  = F.softmax(fc4(lin),dim=0)

In [309]:
# Using Cell, With Time Loop 
gru1 = nn.GRUCell(fbins,L1).double()
gru2 = nn.GRUCell(L1,L2).double()
fc3  = nn.Linear(L2,L3).double()
fc4  = nn.Linear(L3,nclass).double()

for i, data in enumerate(loader):
    outputs = []
    x,y = data
    x,y = Variable(x.permute(2,0,1)),Variable(y)
    
    ht1 = Variable(torch.zeros(x.size(1), L1)).double()
    ht2 = Variable(torch.zeros(x.size(1), L2)).double()
    
    for xt1 in x:
        ht1 = gru1(xt1,ht1)
        ht2 = gru2(ht1,ht2)
        ot3 = F.relu(fc3(ht2))
        out = F.softmax(fc4(ot3),dim=-1)
        outputs += [out]

In [None]:
class Sequence(nn.Module):
    def __init__(self):
        super(Sequence, self).__init__() 
        self.gru1 = nn.GRU(fbins,L1)
        self.gru2 = nn.GRU(L1,L2)
        self.fc3  = nn.Linear(L3,nclass)
        
        
    def forward(self,inputs):
        for i in inputs:
            inp = i.view(1,1,-1)
            h1  = self.gru1(inp,h1)
            h2  = self.gru2(h1,h2)
            out = self.fc3(h2)

In [122]:
# Loss and Optimizer
lr = 1e-4
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [123]:
for epoch in range(2):  # loop over the dataset multiple times
    rloss = 0.0    # running loss
    for i, data in enumerate(loader):
        x, y = data
        x, y = Variable(x), Variable(y)  # Make variable

        optimizer.zero_grad()
        
        out  = model(x)           # Forward
        loss = criterion(out,y)   # Compute Loss
        loss.backward()           # Backward
        optimizer.step()          # Optimize
        
        # print statistics
        rloss += loss.data[0]
        if i % 10 == 0:           # Print every 10 batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, rloss / 10))
            rloss = 0.0

print('Finished Training')

RuntimeError: size mismatch, m1: [64 x 84], m2: [64 x 96] at /Users/soumith/minicondabuild3/conda-bld/pytorch_1512379211386/work/torch/lib/TH/generic/THTensorMath.c:1416