# HW6 Neural Nets

By Maia Rosengarten <br/>
SID: 23572580 <br/>
Login: cs-<br/>
April 14, 2017

In [3]:
import matplotlib
import numpy as np
from numpy import linalg as LA
import matplotlib.cm as cm
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import math
import scipy as sp
from scipy import io
from scipy.stats import logistic as sig
import pandas as pd

np.set_printoptions(threshold=np.nan)

import sklearn
from sklearn.preprocessing import normalize
# from sklearn.feature_extraction import DictVectorizer as dv

# from sklearn.preprocessing import Imputer as imp
import csv
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
%matplotlib inline

# NP FNS

In [4]:
def vec(arr): # d b 1 --> (d, 1)
    return arr.reshape((arr.shape[0], 1))

In [5]:
def arr(vec): # 1 by d --> (d, )
    return vec.flatten()

# UTIL FNS

In [43]:
def plotAccuracies(accuracies, depths):
    '''
        Plots accuracies as a function od depth
        Args:
            costs (ndarray) - lst of costs per iteration of gradient descent
    '''
   
    plt.plot(depths, accuracies)
    plt.title("Prediction Evaluation Decision Tree On Census Data")
    plt.xlabel('max_depth')
    plt.ylabel('accuracy')
    plt.show()

In [44]:
def generateCsv(aryPredictions, strCsvName):
    '''
    Writes predictions of testSet to csv file
    Args:
        aryPredictions (ndarray) - (nx1)-array of predictions given size n test (or valid) set
        strCsvName (str) - name of csv file to write to
    '''
    with open(strCsvName + '.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        for i in range(len(aryPredictions)):
            writer.writerow([i, aryPredictions[i]])
    csvfile.close()

In [68]:
def computeGradW(z, y, h):
    grad = np.dot(vec(z-y), vec(h).T)
#     print('grad W shape ' + str(grad.shape))
    return grad

In [69]:
def computeGradV(x, y, z, h, W, V):
    '''
        Args
            x - sample point (785 x 1)
            y - true labels for point x (26,)
            z - predicted labels for point x (26,)
            h = hidden layer vector
            W - weight matrix to output layer
            V - weight matrix to hidden layer
    
    '''
    #diagaonal expects a 0-diemnsional thing.
    diag = np.diag(np.dot(z-y, W))
    dLdH = 1-np.square(vec(h))
    grad = np.dot(np.dot(diag.T, dLdH), vec(x).T)[:-1]
#     print('grad V shape ' + str(dLdH.shape))
    return grad

# Training

In [101]:
def trainNeuralNetwork(X, Y, numIterations, learnRateW=0.01, learnRateV=0.01, decayRate=None):
    '''
        X: training images (images)
        y: training labels (labels)
        params: hyperparameters, e.g., learning rate ε, weight decay rate λ for L2 regularization, etc.
        1. Initialize the weights V and W randomly
        2. while (some stopping criterion)
            a. pick one image/label pair (Xi, yi) at random from the training set
            b. perform forward pass (compute hidden & output values and predicted labels) 
            c. perform backward pass (compute partial derivatives needed for gradient descent) 
            d. perform stochastic gradient descent update
        3. store V,W  
    '''
    
    V = np.random.normal(loc=0.0, scale=1/np.sqrt(785), size=(800, 785))
    W = np.random.normal(loc=0.0, scale=1/np.sqrt(801), size=(26, 801))
    for i in range(numIterations):
        randNum = np.random.choice(X.shape[0])
        sample = X[randNum]
        vectTrueLabel = Y[randNum]
        #forward_pass
        vectHidden = computeHiddenValues(X, V, randNum)
        vectPredicted = computeOutputValues(vectHidden, W)
        #backward_pass
        grad_w = computeGradW(vectPredicted, vectTrueLabel, vectHidden)
        grad_v = computeGradV(sample, vectTrueLabel, vectPredicted, vectHidden, W, V)
        W = W - learnRateW * grad_w
        V = V - learnRateV * grad_v
    return V, W

In [48]:
def preProcessData(data, meanMatrix, normalizingConstant):
    '''
    Center and normalize all your features. You are encouraged to try other preprocessing methods. 
    Shuffle the data!
    The mean vector that you subtract from the training data must be the same as the vector you 
    subtract from the test data! 
    This also applies to the values you divide by to normalize.     
    '''


In [49]:
# def computeCost(X, y, w, regConst):
#     prob = sp.special.expit(np.dot(X, w))
#     ret = 1/X.shape[0] * (regConst * np.linalg.norm(w)**2) - (vec(y).T.dot(np.log(prob + 0.000001)) + (1-vec(y)).T.dot(np.log(1 - prob + 0.000001)))
#     return arr(ret)

In [50]:
def computeHiddenValues(X, V, index):
    sample = X[index]
    hidden = np.tanh(np.dot(V, sample.T))
    hidden = np.vstack((vec(hidden), np.array(1)))
    return arr(hidden)
    

In [97]:
def computeOutputValues(vectHidden, W):
    output = sp.special.expit(np.dot(W, vec(vectHidden)))
    return output

In [52]:
def saveCheckPoint():
    '''
    Write your code so that it saves its progress in a file when you terminate the program 
    (see the Python signal package for how to capture an interrupt signal) 
    and/or after every fixed number of iterations, and to write code that loads this file 
    and allows you to resume training. 
    
    You might find it useful to look into the Python pickle module, or numpy.save. 
    This allows you to save and load arbitrary Python or numpy objects 
    (such as your neural network weights) as files.
    
    
    '''


In [53]:
def miniBatchGradDescent(data, k):
    '''
    Sample k data points instead of one data point and average the gradient update over those data points. 
    If k = n this becomes batch gradient descent. 
    
    Typically k is some number between 16 and 256 (50 is a good starting point). 
    Note that larger batches use more memory.
    
    '''

# Prediction

In [82]:
def predictNeuralNetwork(test_X, V, W):
    '''
    test_X: test images
    V, W: network weights (previously trained)
    
    1. for each test image x
        with V & W, perform forward pass (compute hidden & output values and predicted labels)
    2. return all the predicted labels
    
    '''
    predictions = []
    for i in range(test_X.shape[0]):
        vectHidden = computeHiddenValues(test_X, V, i)
        vectPredict = computeOutputValues(vectHidden, W)
        prediction = np.argmax(vectPredict)
        predictions.append(prediction + 1)
    return predictions

# Kaggle

In [55]:
from sklearn.preprocessing import StandardScaler

In [56]:
dictLetters = sp.io.loadmat("hw6_data_dist/letters_data.mat")

In [57]:
train_x = dictLetters['train_x']
train_y = dictLetters['train_y']
test_x = dictLetters['test_x']


# DO WE NORMALIZE BEFORE WE ADD THE BIAS OR AFTER?
scaler = StandardScaler()
normalizer = scaler.fit(train_x)
train_x = normalizer.transform(train_x)
test_x = normalizer.transform(test_x)

combined = np.hstack((train_x, train_y))
np.random.shuffle(combined)
train_x = combined[:, :-1]
train_y = combined[:, -1]



In [102]:
train_x = np.hstack((train_x, np.ones(shape=(train_x.shape[0], 1))))

In [103]:
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=.2, random_state=42)

In [104]:
one_hot_train_y = pd.get_dummies(arr(train_y)).as_matrix()

In [105]:
V, W = trainNeuralNetwork(train_x, one_hot_train_y, 100000, decayRate=None)

ValueError: shapes (800,785) and (786,) not aligned: 785 (dim 1) != 786 (dim 0)

In [98]:
predictions = predictNeuralNetwork(valid_x, V, W)

In [99]:
accuracy = accuracy_score(valid_y, predictions)

In [100]:
accuracy

0.79326923076923073