In [8]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
from scipy.signal import lfilter
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable

from ss_perf_utils import *

np.random.seed(2)
torch.manual_seed(2)

global device,dtype
device = torch.device('cuda:0')
#device = torch.device('cpu')
dtype = torch.float

In [9]:
def train_numpy(X,y,layer_dims,num_iters,lr=0.01,add_del=False):
    sigmoid = lambda z : 1./(1+np.exp(-z))
    
    din,dh,dout = tuple(layer_dims)
    m = X.shape[1]
    delta,prob,epsilon,max_hidden_size,tau = init_add_del()
    losses = []
    num_neurons = []
    
    W1 = np.random.randn(dh,din)
    b1 = np.random.randn(dh,1)
    W2 = np.random.randn(dout,dh)
    b2 = np.random.randn(dout,1)
    
    for t in range(num_iters):
        # Forwardprop
        Z1 = np.dot(W1,X)+b1
        A = Z1.clip(min=0) # relu
        Z2 = np.dot(W2,A)+b2
        yhat = sigmoid(Z2).clip(1e-6,1.-1e-6)
    
        loss = 1./m*(-np.dot(y,np.log(yhat).T)-np.dot(1-y,np.log(1-yhat).T))
        loss = loss.squeeze().item()
        losses.append(loss)
    
        # Backprop
        dyhat = -(np.divide(y,yhat) - np.divide(1-y, 1-yhat))
        dZ2 = dyhat*sigmoid(Z2)*(1-sigmoid(Z2))
        dW2 = 1./m*np.dot(dZ2,A.T)
        db2 = 1./m*np.sum(dZ2,1,keepdims=True)
        dA = np.dot(W2.T,dZ2)
        dZ1 = dA
        dZ1[Z1 < 0] = 0
        dW1 = 1./m*np.dot(dZ1,X.T)
        db1 = 1./m*np.sum(dZ1,1,keepdims=True)
    
        # gradient descent
        W1 -= lr*dW1
        b1 -= lr*db1
        W2 -= lr*dW2
        b2 -= lr*db2

        if add_del and t>tau:
            W1,b1,W2,b2 = delete_neurons_numpy(W1,b1,W2,b2,delta,prob)
            W1,b1,W2,b2 = add_neurons_numpy(W1,b1,W2,b2,losses,epsilon,delta,
                                              max_hidden_size,tau,prob)
        num_neurons.append(b1.shape[0])

        if t % max(1,num_iters // 20) == 0:
            print('loss after iteration %i: %f' % (t, losses[-1]))
            if add_del:
                print('# neurons after iteration %i: %d' % (t, num_neurons[-1]))
    
    return losses,num_neurons

In [10]:
def train_pytorch(X,y,layer_dims,num_iters,lr=0.01,add_del=False):
    sigmoid = lambda z : 1./(1+torch.exp(-z))
    
    din,dh,dout = tuple(layer_dims)
    m = X.shape[1]
    delta,prob,epsilon,max_hidden_size,tau = init_add_del()
    losses = []
    num_neurons = []
    
    W1 = torch.randn(dh, din, dtype=dtype, requires_grad=False, device=device)
    b1 = torch.randn(dh, 1, dtype=dtype, requires_grad=False, device=device)
    W2 = torch.randn(dout, dh, dtype=dtype, requires_grad=False, device=device)
    b2 = torch.randn(dout, 1, dtype=dtype, requires_grad=False, device=device)
    
    for t in range(num_iters):
        # Forwardprop
        Z1 = torch.mm(W1,X)+b1
        A = Z1.clamp(min=0) # relu
        Z2 = torch.mm(W2,A)+b2
        yhat = sigmoid(Z2).clamp(1e-6,1.-1e-6)
    
        criterion = nn.BCELoss()
        loss = criterion(yhat,y)
        loss = loss.squeeze_().item()
        losses.append(loss)
    
        # Backprop
        dyhat = -(torch.div(y,yhat) - torch.div(1-y, 1-yhat))
        dZ2 = dyhat*sigmoid(Z2)*(1-sigmoid(Z2))
        dW2 = 1./m*torch.mm(dZ2,A.t())
        db2 = 1./m*torch.sum(dZ2,1,keepdim=True)
        dA = torch.mm(W2.t(),dZ2)
        dZ1 = dA
        dZ1[Z1 < 0] = 0
        dW1 = 1./m*torch.mm(dZ1,X.t())
        db1 = 1./m*torch.sum(dZ1,1,keepdim=True)
    
        # gradient descent
        W1 -= lr*dW1
        b1 -= lr*db1
        W2 -= lr*dW2
        b2 -= lr*db2

        if add_del and t>tau:
            W1,b1,W2,b2 = delete_neurons_pytorch(W1,b1,W2,b2,delta,prob)
            W1,b1,W2,b2 = add_neurons_pytorch(W1,b1,W2,b2,losses,epsilon,delta,max_hidden_size,tau,prob,device)
        num_neurons.append(b1.shape[0])

        if t % max(1,num_iters // 20) == 0:
            print('loss after iteration %i: %f' % (t, losses[-1]))
            if add_del:
                print('# neurons after iteration %i: %d' % (t, num_neurons[-1]))
    
    return losses,num_neurons

In [11]:
num_iters = 10000
num_samples = 1000
num_features = 2
num_hidden = 100
num_classes = 1
lr = 0.1
layer_dims = [num_features,num_hidden,num_classes]

X,y,x1,x2 = gen_data(samples=num_samples,var=0.01)

In [12]:
X_np = X.T
y_np = y.reshape(1,-1)

tin = time.clock()
losses,num_neurons = train_numpy(X_np,y_np,layer_dims,num_iters,lr=lr,add_del=False)
tout = time.clock()
tdiff = tout-tin
print('\ntime for numpy = %f' % tdiff)

loss after iteration 0: 5.490647
loss after iteration 500: 0.027052
loss after iteration 1000: 0.013905
loss after iteration 1500: 0.009344
loss after iteration 2000: 0.007019
loss after iteration 2500: 0.005496
loss after iteration 3000: 0.004309
loss after iteration 3500: 0.003697
loss after iteration 4000: 0.003256
loss after iteration 4500: 0.002912
loss after iteration 5000: 0.002634
loss after iteration 5500: 0.002404
loss after iteration 6000: 0.002211
loss after iteration 6500: 0.002047
loss after iteration 7000: 0.001905
loss after iteration 7500: 0.001781
loss after iteration 8000: 0.001672
loss after iteration 8500: 0.001576
loss after iteration 9000: 0.001490
loss after iteration 9500: 0.001413
time for numpy = 128.919129


In [13]:
X_pt = torch.tensor(X,device=device,dtype=dtype).t()
y_pt = torch.tensor(y,device=device,dtype=dtype).reshape(1,-1)

tin = time.clock()
losses,num_neurons = train_pytorch(X_pt,y_pt,layer_dims,num_iters,lr=lr,add_del=False)
tout = time.clock()
tdiff = tout-tin
print('\ntime for pytorch = %f' % tdiff)

loss after iteration 0: 3.980994
loss after iteration 500: 0.026215
loss after iteration 1000: 0.013299
loss after iteration 1500: 0.008580
loss after iteration 2000: 0.006365
loss after iteration 2500: 0.005047
loss after iteration 3000: 0.004166
loss after iteration 3500: 0.003537
loss after iteration 4000: 0.003071
loss after iteration 4500: 0.002710
loss after iteration 5000: 0.002425
loss after iteration 5500: 0.002196
loss after iteration 6000: 0.002006
loss after iteration 6500: 0.001849
loss after iteration 7000: 0.001715
loss after iteration 7500: 0.001599
loss after iteration 8000: 0.001498
loss after iteration 8500: 0.001409
loss after iteration 9000: 0.001330
loss after iteration 9500: 0.001260
time for pytorch = 9.194320


In [14]:
#losses = np.array(losses)
#filt_neurons = lfilter([1.0/50]*50,1,num_neurons)
#filt_neurons[filt_neurons<1] = num_hidden

#plt.plot(losses,color='blue')
#plt.title('Loss')
#plt.show()

#plt.plot(filt_neurons,color='green')
#plt.title('# Neurons')
#plt.show()