# HW6 Neural Nets

By Maia Rosengarten <br/>
SID: 23572580 <br/>
Login: cs-<br/>
April 14, 2017

In [1]:
import matplotlib
import numpy as np
from numpy import linalg as LA
import matplotlib.cm as cm
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import math
import scipy as sp
from scipy import io
from scipy.stats import logistic as sig
import pandas as pd

np.set_printoptions(threshold=np.nan)

import sklearn
from sklearn.preprocessing import normalize
import csv
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
%matplotlib inline

# NP FNS

In [2]:
def vec(arr): # d b 1 --> (d, 1)
    return arr.reshape((arr.shape[0], 1))

In [3]:
def arr(vec): # 1 by d --> (d, )
    return vec.flatten()

# UTIL FNS

In [4]:
def plotAccuracies(accuracies, depths):
    '''
        Plots accuracies as a function od depth
        Args:
            costs (ndarray) - lst of costs per iteration of gradient descent
    '''
   
    plt.plot(depths, accuracies)
    plt.title("Prediction Evaluation Decision Tree On Census Data")
    plt.xlabel('max_depth')
    plt.ylabel('accuracy')
    plt.show()

In [5]:
def generateCsv(aryPredictions, strCsvName):
    '''
    Writes predictions of testSet to csv file
    Args:
        aryPredictions (ndarray) - (nx1)-array of predictions given size n test (or valid) set
        strCsvName (str) - name of csv file to write to
    '''
    with open(strCsvName + '.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        for i in range(len(aryPredictions)):
            writer.writerow([i, aryPredictions[i]])
    csvfile.close()

# Training

In [6]:
# def computeCost(X, y, w, regConst):
#     prob = sp.special.expit(np.dot(X, w))
#     ret = 1/X.shape[0] * (regConst * np.linalg.norm(w)**2) - (vec(y).T.dot(np.log(prob + 0.000001)) + (1-vec(y)).T.dot(np.log(1 - prob + 0.000001)))
#     return arr(ret)

In [7]:
class NeuralNet:
    def __init__(self, trainX, trainY, V=None, W=None, l2=None, numHiddenLayers=800, learnRateW=0.01, learnRateV=0.01, hasDecay=True, hasDropOut=False, momentum=None, batchSize=None, numIterations=10000):
        self.X = trainX
        self.Y = trainY
        self.sizeH = numHiddenLayers
        self.learnRateW = learnRateW
        self.learnRateV = learnRateV
        self.hasDecay = hasDecay
        self.hasDropOut = hasDropOut
        self.momentum = momentum
        self.batchSize = batchSize
        self.numIter = numIterations
        self.V = V 
        self.W = W
        self.predictions = []
        self.l2 = l2
        if V==None and W==None:
            self.initWeights()
    
    def setLearningRates(self, learnRateW, learnRateV):
        self.learnRateW = learnRateW
        self.learnRateV = learnRateV
    
    def setWeights(self, W, V):
        self.V = V
        self.W = W
    
    def setHasDecay(self, hasDecay):
        self.hasDecay = hasDecay
    
    def setMomentum(self, momentum):
        self.momentum = momentum
    
    def initWeights(self):
        numFeatures = self.X.shape[1]
#         self.V = np.random.normal(loc=0.0, scale=1/np.sqrt(numFeatures), size=(self.sizeH, numFeatures))
#         self.W = np.random.normal(loc=0.0, scale=1/np.sqrt(self.sizeH + 1), size=(26, self.sizeH + 1))
        self.V = np.random.normal(loc=0.0, scale=0.01, size=(self.sizeH, numFeatures))
        self.W = np.random.normal(loc=0.0, scale=0.01, size=(26, self.sizeH + 1))
    
    def trainSGD(self):
        epoch = self.X.shape[0]
        numFeatures = self.X.shape[1]
        for i in range(self.numIter):   
            index = i%epoch
            sample = vec(self.X[index])
            y = self.Y[index]
            i+=1

            if self.hasDropOut and i%10000==0:
                inputDropOutIndices = np.random.choice(range(0, numFeatures), size=math.ceil(0.10*numFeatures), replace=False)
                hiddenDropOutIndices = np.random.choice(range(0, self.sizeH), size=math.ceil(0.5*self.sizeH), replace=False)
                sample[inputDropOutIndices]=0 
                h = np.tanh(np.dot(self.V.T, sample)) 
                h[hiddenDropOutIndices]=0
            else:
                h = np.tanh(np.dot(self.V.T, sample)) 
            h = np.vstack(h, np.array(1))
            z = sp.special.expit(np.dot(self.W, vec(h)))
            
            grad_w = grad = np.dot(z-vec(y), h.T)
            grad_v = self.computeGradV(index, y, z, h)
            
            if self.hasDecay and i%50000==0:
                self.learnRateW = 0.001
                self.learnRateV = 0.001
            
            self.W = self.W - self.learnRateW * grad_w
            self.V = self.V - self.learnRateV * grad_v
        return self.V, self.W


    def trainMiniBatch(self):
        epoch = self.X.shape[0]
        t = 0
        
        while t < self.numIter:
            if (t%20000==0):
                print('iter ' + str(t))
            indices = np.random.choice(range(0, epoch), size=self.batchSize, replace=True)
            samples=self.X[indices]
            y=self.Y[indices]
#             i = t % epoch
#             j = (t + self.batchSize) % epoch

#             if j < i:
#                 i = 0
#                 j = self.batchSize
#             samples = self.X[i:j]
#             y = self.Y[i:j]
  
#             if self.hasDropOut:
#                 inputDropOutIndices = np.random.choice(range(numFeatures), size=math.ceil(0.10*numFeatures), replace=False)
#                 hiddenDropOutIndices = np.random.choice(range(self.sizeH), size=math.ceil(0.5*self.sizeH), replace=False) 
#                 samples[inputDropOutIndices]=0 
#                 h = np.tanh(np.dot(self.V, samples.T)) 
#                 h[hiddenDropOutIndices]=0
#             else:
#                 h = np.tanh(np.dot(self.V, samples.T)) 
            
            h = np.tanh(np.dot(self.V, samples.T)) 
            h = np.vstack((h, np.array([1]*h.shape[1])))
            z = sp.special.expit(np.dot(self.W, h))
        
            if (self.l2):
                grad_w = np.dot((z-y.T), h.T) + 2*self.l2*self.W
                grad_v = self.computeGradVBatch(indices, y, z, h) + 2*self.l2*self.V
            else:
                grad_w = (z-y.T).dot(h.T)
                grad_v = np.multiply(((z.T-y).dot(self.W)).T, 1-h**2).dot(samples)[:-1]
                
            if self.hasDecay and t%50000==0:
                self.learnRateW = 0.001
                self.learnRateV = 0.001
                
            self.W = self.W - (self.learnRateW * grad_w)
            self.V = self.V - (self.learnRateV * grad_v)
            
            t+=self.batchSize
        return self.V, self.W

    
    def computeGradVBatch(self, indices, y, z, h):
        '''
            BATCH
                samples: (50, 785)
                y: (50,26)
                z: (26,50)
                h: (801,50)
                W: (26, 801)
                V: (800, 785)
                dHdL: (50,801)
                
                ret grad (800x785)
        '''
        samples = self.X[indices]
        dLdH = np.dot((z.T-y), self.W)
        prod = np.multiply(dLdH.T, 1-np.square(h))
        grad = np.dot(prod, samples)[:-1]
        return grad
    
    def predict(self, testX):
        sizeData = testX.shape[0]
#         print('sizeData ' + str(sizeData))
        for i in range(sizeData):
            if (i%50000==0):
                print('iter ' + str(i))
            h = np.tanh(np.dot(self.V, vec(self.X[i].T)))
            h = np.vstack((h, np.array(1)))
            z = sp.special.expit(np.dot(self.W, h))
            prediction = np.argmax(z)
            self.predictions.append(prediction+1)
#         print('predictions' + str(len(self.predictions)))
        return self.predictions


# Prediction

# Kaggle

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
dictLetters = sp.io.loadmat("hw6_data_dist/letters_data.mat")

In [10]:
train_x = dictLetters['train_x']
train_y = dictLetters['train_y']
test_x = dictLetters['test_x']

scaler = StandardScaler()
normalizer = scaler.fit(train_x)
train_x = normalizer.transform(train_x)
test_x = normalizer.transform(test_x)

combined = np.hstack((train_x, train_y))
np.random.shuffle(combined)
train_x = combined[:, :-1]
train_y = combined[:, -1]
train_x = np.hstack((train_x, np.ones(shape=(train_x.shape[0], 1))))
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=.2, random_state=42)
one_hot_train_y = pd.get_dummies(arr(train_y)).as_matrix()



In [11]:
net = NeuralNet(train_x, one_hot_train_y, l2=0.04, numHiddenLayers=800, batchSize=50, numIterations=100000, hasDecay=True)

In [None]:
V, W = net.trainMiniBatch()

iter 0


In [None]:
predictions = net.predict(train_x)

In [None]:
accuracy = accuracy_score(predictions, train_y)
accuracy

In [None]:
# train acc 78% with batch 100, numiter 100,000, l2=0.04

In [123]:
#numIterations = 10000, decayRate is every ceil(epoch/k) *=0.8 --> 79%

In [81]:
 

    
    
#     def computeHiddenValues(self, indexRanSample, inputDropOutIndices=None, hiddenDropOutIndices=None):
#         '''
#             BATCH:
#                 sample: 50x785
#                 V: (800, 785)
                
#                 ret hidden: (801, 50)
#             SGD:
#                 sample: 1x785
#                 V: (800, 785)
                
#                 ret hidden: (800, 1)
#         '''
#         sample = self.X[indexRanSample]
#         hidden = np.tanh(np.dot(self.V, sample.T))  
#         if self.hasDropOut:
#             sample[inputDropOutIndices]=0 
#             hidden[hiddenDropOutIndices]=0
#         if (self.batchSize):
#             hidden = np.vstack((hidden, np.array([1]*hidden.shape[1])))
#         else:    
#             hidden = np.vstack((vec(hidden), np.array(1)))
#         return hidden

#     def computeOutputValues(self, vectHidden):
#         '''
#             BATCH
#                 self.W: (26x801)
#                 vectHidden: (801x50)         
#                 ret output: (26x50)
#             SGD
#                 self.W: (26x801)
#                 vectHidden: (801x1)            
#                 ret output: (26x1)
                
#         '''
#         if (self.batchSize):
#             output = sp.special.expit(np.dot(self.W, vectHidden))
#         else:
#             output = sp.special.expit(np.dot(self.W, vec(vectHidden)))
#         return output


#     def computeGradW(self, z, y, h):
#         '''
#             BATCH
#                 z: (26,50)
#                 y: (50,26)
#                 h: (801x50)  
#                 ret (26x801)
            
#             SGD
#                 z = (26x1)
#                 y = (26x1)
#                 h = (801x1)
#                 ret (26x801)
#         '''
#         if (self.batchSize):
#             grad = np.dot(z-y.T, h.T)
#         else:
#             grad = np.dot(z-vec(y), h.T)
#         return grad