# COMS 4995_002 Deep Learning Assignment 1
Due on Monday, Oct 9, 11:59pm

This assignment can be done in groups of at most 3 students. Everyone must submit on Courseworks individually.

Write down the UNIs of your group (if applicable)

Member 1: Yibo Jiang, yj2460

Member 2: Yiran Shi, ys3077

Member 3: Xiangan Liu, xl2699

In [62]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc
import glob
import sys
# you shouldn't need to make any more imports

In [63]:
class NeuralNetwork(object):
    """
    Abstraction of neural network.
    Stores parameters, activations, cached values. 
    Provides necessary functions for training and prediction. 
    """
    def __init__(self, layer_dimensions, drop_prob=0.0, reg_lambda=0.0,use_batchnorm = False):
        """
        Initializes the weights and biases for each layer
        :param layer_dimensions: (list) number of nodes in each layer
        :param drop_prob: drop probability for dropout layers. Only required in part 2 of the assignment
        :param reg_lambda: regularization parameter. Only required in part 2 of the assignment
        """
        np.random.seed(1)
        
        self.parameters = {}
        self.t = 0
        self.M = {}
        self.V = {}
        self.dropMask = {}
        self.num_layers = len(layer_dimensions)
        self.drop_prob = drop_prob
        self.reg_lambda = reg_lambda
        self.beta1 = 0.9
        self.beta2 = 0.99
        self.epsilon = 10**(-8)
        self.bn_epsilon = 10**(-8)
        self.use_batchnorm = use_batchnorm
        for i in xrange(self.num_layers-1):
            #self.parameters['W'+str(i+1)]=np.random.normal(0,0.01,size=(layer_dimensions[i+1], layer_dimensions[i])
            if self.use_batchnorm:
                self.parameters['W'+str(i+1)] = np.resize(np.random.normal(0, np.sqrt(1.0/layer_dimensions[i]),layer_dimensions[i]*layer_dimensions[i+1]),(layer_dimensions[i+1],layer_dimensions[i])  )
            else:
                self.parameters['W'+str(i+1)]=np.random.normal(0,np.sqrt(2.0/layer_dimensions[i]),size=(layer_dimensions[i+1], layer_dimensions[i]))
            self.M ['W'+str(i+1)] = np.zeros((layer_dimensions[i+1],layer_dimensions[i]))
            self.V ['W'+str(i+1)] = np.zeros((layer_dimensions[i+1],layer_dimensions[i]))
            self.parameters['b'+str(i+1)]=np.random.normal(0,0.01,size=(layer_dimensions[i+1],1))
            self.M['b'+str(i+1)] = np.zeros((layer_dimensions[i+1],1))
            self.V['b'+str(i+1)] = np.zeros((layer_dimensions[i+1],1))
            if self.use_batchnorm and i<(self.num_layers-2):     
                self.M ['gamma'+str(i+1)] = np.zeros((1,layer_dimensions[i+1]))
                self.V ['gamma'+str(i+1)] = np.zeros((1,layer_dimensions[i+1]))
                self.M ['beta'+str(i+1)] = np.zeros((1,layer_dimensions[i+1]))
                self.V ['beta'+str(i+1)] = np.zeros((1,layer_dimensions[i+1]))
                self.parameters['gamma'+str(i+1)]=np.ones((1,layer_dimensions[i+1]))
                self.parameters['beta'+str(i+1)]=np.zeros((1,layer_dimensions[i+1]))
        # init parameters
        

    def affineForward(self, A, W, b):
        """
        Forward pass for the affine layer.
        :param A: input matrix, shape (L, S), where L is the number of hidden units in the previous layer and S is
        the number of samples
        :returns: the affine product WA + b, along with the cache required for the backward pass
        """
        out = None
        out = np.dot(W,A)+b
        cache = A
        return out,cache
        

    def activationForward(self, A, activation="relu"):
        """
        Common interface to access all activation functions.
        :param A: input to the activation function
        :param prob: activation funciton to apply to A. Just "relu" for this assignment.
        :returns: activation(A)
        """ 
        out,cache = self.relu(A)
        return out,cache


    def relu(self, X):
        out = None
        out=np.maximum(0,X)
        cache = X
        return out,cache
            
    def dropout(self, A, prob):
        """
        :param A: 
        :param prob: drop prob
        :returns: tuple (A, M) 
            WHERE
            A is matrix after applying dropout
            M is dropout mask, used in the backward pass
        Need to double check on that
        """
        retain_prob=1.0-prob
        M=np.random.binomial(n=1,p=retain_prob,size=A.shape)
        A=A*M
        A=A/retain_prob 
        return A, M

    def forwardPropagation(self, X):
        """
        Runs an input X through the neural network to compute activations
        for all layers. Returns the output computed at the last layer along
        with the cache required for backpropagation.
        :returns: (tuple) AL, cache
            WHERE 
            AL is activation of last layer
            cache is cached values for each layer that
                     are needed in further steps
        """
        cache = {}
        for i in range(self.num_layers-1):
            X_forward,cache_forward = self.affineForward(X, self.parameters['W'+str(i+1)], self.parameters['b'+str(i+1)])
            cache['X_Forward'+str(i+1)] = cache_forward
            if i == self.num_layers-2:
                X,cache_activate = X_forward,X_forward
                cache['X_ACTIVATE'+str(i+1)] = cache_activate
            else:
                if self.use_batchnorm:
                    X_forward,cache_batchnorm = self.batchnorm_forward(X_forward, self.parameters['gamma'+str(i+1)], self.parameters['beta'+str(i+1)],self.bn_epsilon)
                    cache['X_batchnorm'+str(i+1)] = cache_batchnorm
                X,cache_activate = self.activationForward(X_forward)
                if self.drop_prob>0:
                    X,cache_dropout = self.dropout(X,self.drop_prob)
                    self.dropMask[str(i+1)]=cache_dropout
                cache['X_ACTIVATE'+str(i+1)] = cache_activate
        AL = X
        return AL, cache
    
    def batchnorm_forward(self,X_forward, gamma, beta, eps):
        D, S = X_forward.shape
          #step1: calculate mean
        mu = 1./S * np.sum(X_forward, axis = 1)
      #step2: subtract mean vector of every trainings example
        xmu = (X_forward.transpose() - mu).transpose()
      #step3: following the lower branch - calculation denominator
        sq = xmu ** 2
      #step4: calculate variance
        var = 1./S * np.sum(sq, axis = 1)
      #step5: add eps for numerical stability, then sqrt
        sqrtvar = np.sqrt(var + eps)
      #step6: invert sqrtwar     
        ivar = 1./sqrtvar
      #step7: execute normalization
        xhat = (xmu.transpose() * ivar).transpose()
      #step8: Nor the two transformation steps
        gammax = (gamma.transpose() * xhat).transpose()
      #step9
        out = (gammax + beta).transpose()
      #store intermediate
        cache = (xhat,gamma,xmu,ivar,sqrtvar,var,eps)
        return out, cache
    
    def batchnorm_backward(self,dout, cache):

      #unfold the variables stored in cache
        xhat,gamma,xmu,ivar,sqrtvar,var,eps = cache

      #get the dimensions of the input/output
        D,S = dout.shape
      #step9
        dbeta = np.sum(dout, axis=1)
        dgammax = dout #not necessary, but more understandable
      #step8
        dgamma = np.sum(dgammax*xhat, axis=1)
        dxhat = (dgammax.transpose() * gamma).transpose()
      #step7
        divar = np.sum(dxhat*xmu, axis=1)
        dxmu1 = (dxhat.transpose() * ivar).transpose()
      #step6
        dsqrtvar = -1. /(sqrtvar**2) * divar
      #step5
        dvar = 0.5 * 1. /np.sqrt(var+eps) * dsqrtvar
      #step4
        dsq = 1. /S * (np.ones((S,D)) * dvar).transpose()
      #step3
        dxmu2 = 2 * xmu * dsq
      #step2
        dx1 = (dxmu1 + dxmu2)
        dmu = -1 * np.sum(dxmu1+dxmu2, axis=1)
      #step1
        dx2 = 1. /S * np.ones((S,D)) * dmu
      #step0
        dx = (dx1.transpose() + dx2).transpose()
        return dx, dgamma, dbeta
    
    def softmax_loss(self, x ,y):
        e_x = np.exp(x-np.tile(np.max(x,axis=0),(x.shape[0],1)))
        e_x = np.exp(x)
        #scores = (e_x / e_x.sum(axis=0)).T
        
        y=np.array(y)
        y=y.reshape(y.shape[0],1)
        i=np.indices(y.shape)[0]
        j=y.astype(int)
        label_scores=scores[i,j]
        
        dscores=np.multiply(scores,-1*label_scores)
        dscores[i,j]+=label_scores
        
        return 1-label_scores,-dscores.T
    
    def costFunction(self, AL, y):
        """
        :param AL: Activation of last layer, shape (num_classes, S)
        :param y: labels, shape (S)
        :param alpha: regularization parameter
        :returns cost, dAL: A scalar denoting cost and the gradient of cost
        """
        # compute loss
        
        data_loss,dscores=self.softmax_log(AL,y)
        reg_loss=0
        if self.reg_lambda > 0:
            # add regularization
            for i in xrange(self.num_layers-1):
                reg_loss+=self.reg_lambda*np.sum(self.parameters['W'+str(i+1)]*self.parameters['W'+str(i+1)])
            
        cost=data_loss+reg_loss
        
        # gradient of cost
        
        dAL = dscores
        return cost, dAL
    
    def affineBackward(self, dA_prev, cache, W):
        """
        Backward pass for the affine layer.
        :param dA_prev: gradient from the next layer.
        :param cache: cache returned in affineForward
        :returns dA: gradient on the input to this layer
                 dW: gradient on the weights
                 db: gradient on the bias
        """
        A=cache
        dA,dW,db=None,None,None
        dA=np.dot(W.T,dA_prev)

        A_new=A.reshape(A.shape[0],-1)
        dW=np.dot(A_new,dA_prev.T).T
        #dW = np.einsum('ij,kj->ikj',dA_prev,A_new)
        #db= np.expand_dims(dA_prev,axis=1)
        db = np.sum(dA_prev,axis=1,keepdims=True) 
        return dA, dW, db

    def activationBackward(self, dA, cache, activation="relu"):
        """
        Interface to call backward on activation functions.
        In this case, it's just relu. 
        """
        dA=self.relu_derivative(dA,cache)
        return dA
        
    def relu_derivative(self, dx, cached_x):
        dx,x=dx,cached_x
        dx[x<=0]=0
        return dx

    def dropout_backward(self, dA, cache):
        M=cache
        dA=M*dA
        
        return dA
    
    def softmax_log(self,x,y):
        e_x = np.exp(x-np.tile(np.max(x,axis=0),(x.shape[0],1)))
        scores = (e_x / e_x.sum(axis=0)).T
        
        y=np.array(y)
        y=y.reshape(y.shape[0],1)
        i=np.indices(y.shape)[0]
        j=y.astype(int)
        
        y_l=np.zeros_like(scores)
        y_l[i,j]=1.0
        
        loss= -np.sum(y_l*np.log(scores))/x.shape[1]
        scores[i,j]-=1
        
        return loss,scores.T/x.shape[1]
    
    def backPropagation(self, dAL, Y, cache):
        """
        Run backpropagation to compute gradients on all paramters in the model
        :param dAL: gradient on the last layer of the network. Returned by the cost function.
        :param Y: labels
        :param cache: cached values during forwardprop
        :returns gradients: dW and db for each weight/bias
        """
        gradients = {}
        
        for i in range(self.num_layers-1):
           
            
            layer_ind = self.num_layers - i - 1
            if i == 0:
                activate_cache = cache['X_ACTIVATE'+str(layer_ind)]
                dA_prev = dAL
                back_cache = cache['X_Forward'+str(layer_ind)]
                dAL, dW, db = self.affineBackward(dA_prev, back_cache,self.parameters['W'+str(layer_ind)])
                gradients['W'+str(layer_ind)] = dW
                gradients['b'+str(layer_ind)] = db
            else:
                if self.drop_prob>0:
                    M=self.dropMask[str(layer_ind)]
                    dAL=self.dropout_backward(dAL,M)
                activate_cache = cache['X_ACTIVATE'+str(layer_ind)]
                dA_prev = self.activationBackward(dAL, activate_cache)
                back_cache = cache['X_Forward'+str(layer_ind)]
                if self.use_batchnorm:
                    bn_cacbe = cache['X_batchnorm'+str(layer_ind)]
                    dA_prev, dgamma, dbeta = self.batchnorm_backward(dA_prev, bn_cacbe)
                    gradients['gamma'+str(layer_ind)] = dgamma
                    gradients['beta'+str(layer_ind)] = dbeta
                    gradients['gamma'+str(layer_ind)] = np.resize(dgamma,(1,layer_dimensions[layer_ind]))
                    gradients['beta'+str(layer_ind)] = np.resize(dbeta,(1,layer_dimensions[layer_ind]))

                dAL, dW, db = self.affineBackward(dA_prev, back_cache,self.parameters['W'+str(layer_ind)])
                gradients['W'+str(layer_ind)] = dW
                gradients['b'+str(layer_ind)] = db

            
            if self.drop_prob > 0:
                pass
                #call dropout_backward
           
            
        if self.reg_lambda > 0:
            for var in self.parameters:
                gradients[var] += self.reg_lambda*self.parameters[var]
                
            # add gradients from L2 regularization to each dW
        
        return gradients


    def updateParameters(self, gradients, alpha):
        """
        :param gradients: gradients for each weight/bias
        :param alpha: step size for gradient descent 
        """
        self.t += 1
        lr_t = alpha * np.sqrt(1 - (self.beta2)**self.t) / (1 - (self.beta1)**self.t)
        for var in self.parameters:
            
            #gradients[var] = np.sum(gradients[var],axis=2)#/(gradients[var].shape[2])
            #gradients[var] = np.clip(gradients[var],-1,1)
            
            self.M[var] = self.beta1 * self.M[var] + (1 - self.beta1) * gradients[var]
            self.V[var] = self.beta2 * self.V[var] + (1 - self.beta2) * (gradients[var] ** 2)
            self.parameters[var] -= lr_t*self.M[var]/(np.sqrt(self.V[var])+self.epsilon)
            #self.parameters[var] -= alpha*gradients[var]

    def train(self, X, y, iters=1000, alpha=0.0001, batch_size=100, print_every=100):
        #lengyue
        """
        :param X: input samples, each column is a sample
        :param y: labels for input samples, y.shape[0] must equal X.shape[1]
        :param iters: number of training iterations
        :param alpha: step size for gradient descent
        :param batch_size: number of samples in a minibatch
        :param print_every: no. of iterations to print debug info after
        """
        for i in range(0, iters):
            for j in range(len(y)/batch_size):
                # get minibatch
                #X_batch, y_batch = self.get_batch(X, y, batch_size)
                X_batch, y_batch = X[:,(j)*batch_size:(j+1)*batch_size],y[(j)*batch_size:(j+1)*batch_size]
                #print(X_batch.shape)
                #print(y_batch.shape)
                # forward prop
                AL, cache = self.forwardPropagation(X_batch)
                # compute loss
                cost, dAL = self.costFunction(AL, y_batch)
                # compute gradients
                gradients = self.backPropagation(dAL, y_batch, cache)
                # update weights and biases based on gradient
                self.updateParameters(gradients, alpha)

                if ((i*int(len(y)/batch_size)+j)+1) % print_every == 0 or (i==0 and j == 0):
                    #######compute train accuracy####### 
                    temp = self.drop_prob
                    self.drop_prob = 0
                    prediction_label = np.argmax(self.predict(X_train),axis=0)
                    #print(sum(prediction_label))
                    train_acc = sum(prediction_label == y_train)
                    #print(train_acc)
                    train_acc = float(train_acc)/float(len(y_train))

                    #######compute validation accuracy####### 
                    prediction_label = np.argmax(self.predict(X_validation),axis=0)
                    validation_acc = sum(prediction_label == y_validation)
                    validation_acc = float(validation_acc)/float(len(y_validation))
                    
                    self.drop_prob = temp
                    print ("Iteration "+str(int(i*len(y)/batch_size)+j+1)+", cost: "+str(np.sum(cost))+ ", train accuracy: "+str(train_acc) + ", validation accuracy: "+str(validation_acc) )
                    # print cost, train and validation set accuracies
                
    def predict(self, X):
        #lengyue
        """
        
        Make predictions for each sample
        """

        AL, cache = self.forwardPropagation(X)
        return AL
        #return y_pred

    
    def get_batch(self, X, y, batch_size):
        #lengyue
        """
        Return minibatch of samples and labels
        
        :param X, y: samples and corresponding labels
        :parma batch_size: minibatch size
        :returns: (tuple) X_batch, y_batch
        """
        idx = np.random.choice(len(y),batch_size)
        ind = 0
        X_batch = np.zeros((X.shape[0],batch_size))
        y_batch = np.zeros((batch_size,))
        for i in idx:
            # Get pair of (X, y) of the current minibatch/chunk
            X_batch[:,ind] = X[:,i]
            y_batch[ind] = y[i]
            ind += 1
        return X_batch, y_batch
        

In [64]:
# Helper functions, DO NOT modify this

def get_img_array(path):
    """
    Given path of image, returns it's numpy array
    """
    return scipy.misc.imread(path)

def get_files(folder):
    """
    Given path to folder, returns list of files in it
    """
    filenames = [file for file in glob.glob(folder+'*/*')]
    filenames.sort()
    return filenames

def get_label(filepath, label2id):
    """
    Files are assumed to be labeled as: /path/to/file/999_frog.png
    Returns label for a filepath
    """
    tokens = filepath.split('/')
    label = tokens[-1].split('_')[1][:-4]
    if label in label2id:
        return label2id[label]
    else:
        sys.exit("Invalid label: " + label)

In [65]:
# Functions to load data, DO NOT change these

def get_labels(folder, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    files = get_files(folder)
    y = []
    for f in files:
        y.append(get_label(f,label2id))
    return np.array(y)

def one_hot(y, num_classes=10):
    """
    Converts each label index in y to vector with one_hot encoding
    """
    y_one_hot = np.zeros((y.shape[0], num_classes))
    y_one_hot[y] = 1
    return y_one_hot.T

def get_label_mapping(label_file):
    """
    Returns mappings of label to index and index to label
    The input file has list of labels, each on a separate line.
    """
    with open(label_file, 'r') as f:
        id2label = f.readlines()
        id2label = [l.strip() for l in id2label]
    label2id = {}
    count = 0
    for label in id2label:
        label2id[label] = count
        count += 1
    return id2label, label2id

def get_images(folder):
    """
    returns numpy array of all samples in folder
    each column is a sample resized to 30x30 and flattened
    """
    files = get_files(folder)
    images = []
    count = 0
    
    for f in files:
        count += 1
        if count % 10000 == 0:
            print("Loaded {}/{}".format(count,len(files)))
        img_arr = get_img_array(f)
        img_arr = img_arr.flatten() / 255.0
        images.append(img_arr)
    X = np.column_stack(images)

    return X

def get_train_data(data_root_path):
    """
    Return X and y
    """
    train_data_path = data_root_path + 'train'
    id2label, label2id = get_label_mapping(data_root_path+'labels.txt')
    print(label2id)
    X = get_images(train_data_path)
    y = get_labels(train_data_path, label2id)
    return X, y

def get_labels_augmented(folder, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    files = get_files(folder)
    y = []
    for f in files:
        for i in range(2):
            y.append(get_label(f,label2id))
    return np.array(y)

def get_images_augmented(folder):
    """
    returns numpy array of all samples in folder
    each column is a sample resized to 30x30 and flattened
    """
    files = get_files(folder)
    images = []
    count = 0
    
    for f in files:
        count += 1
        if count % 10000 == 0:
            print("Loaded {}/{}".format(count,len(files)))
            
        original_img_arr = get_img_array(f)
        original_img_arr_ct = original_img_arr[4:28,4:28,:]
        original_img_arr_nw = original_img_arr[0:24,0:24,:]
        original_img_arr_ne = original_img_arr[0:24,8:32,:]
        original_img_arr_se = original_img_arr[8:32,8:32,:]
        original_img_arr_sw = original_img_arr[8:32,0:24,:]
        
        images.append(original_img_arr.flatten() / 255.0)
        
        original_img_arr_fliplr = np.fliplr(original_img_arr)
        original_img_arr_fliplr_ct = original_img_arr_fliplr[4:28,4:28,:]
        original_img_arr_fliplr_nw = original_img_arr_fliplr[0:24,0:24,:]
        original_img_arr_fliplrr_ne = original_img_arr_fliplr[0:24,8:32,:]
        original_img_arr_fliplr_se = original_img_arr_fliplr[8:32,8:32,:]
        original_img_arr_fliplr_sw = original_img_arr_fliplr[8:32,0:24,:]
        
        images.append(original_img_arr_fliplr.flatten() / 255.0)
    X = np.column_stack(images)
    
    return X

def get_train_data_augmented(data_root_path):
    """
    Return X and y
    """
    train_data_path = data_root_path + 'train'
    id2label, label2id = get_label_mapping(data_root_path+'labels.txt')
    print(label2id)
    X = get_images_augmented(train_data_path)
    y = get_labels_augmented(train_data_path, label2id)
    return X, y
def save_predictions(filename, y):
    """
    Dumps y into .npy file
    """
    np.save(filename, y)

In [66]:
# Load the data
data_root_path = '../cifar10-hw1/'
X, y = get_train_data(data_root_path) # this may take a few minutes

p = np.random.permutation(50000)
X_shuffle = X[:,p] 
y_shuffle = y[p]
#separate train data and train label
X_train = X[:,0:45000]
y_train = y[0:45000] 

#separate validation data and validation label
X_validation = X[:,45000:]
y_validation = y[45000:] 

X_test = get_images(data_root_path + 'test')
print('Data loading done')

{'horse': 7, 'automobile': 1, 'deer': 4, 'dog': 5, 'frog': 6, 'cat': 3, 'truck': 9, 'ship': 8, 'airplane': 0, 'bird': 2}
Loaded 10000/50000
Loaded 20000/50000
Loaded 30000/50000
Loaded 40000/50000
Loaded 50000/50000
Loaded 10000/10000
Data loading done


## Part 1

#### Simple fully-connected deep neural network

In [67]:
layer_dimensions = [X_train.shape[0],512,300,10]  # including the input and output layers
NN = NeuralNetwork(layer_dimensions)
NN.train(X_train, y_train, iters=60, alpha=0.001, batch_size=450, print_every=1000)

Iteration 1, cost: 2.65966746401, train accuracy: 0.101133333333, validation accuracy: 0.0968
Iteration 1000, cost: 1.42942072882, train accuracy: 0.5066, validation accuracy: 0.4844
Iteration 2000, cost: 1.23472314353, train accuracy: 0.560822222222, validation accuracy: 0.5104
Iteration 3000, cost: 1.09623097582, train accuracy: 0.608488888889, validation accuracy: 0.519
Iteration 4000, cost: 0.985076147579, train accuracy: 0.616311111111, validation accuracy: 0.513
Iteration 5000, cost: 0.879331030403, train accuracy: 0.634844444444, validation accuracy: 0.5072
Iteration 6000, cost: 0.81553886022, train accuracy: 0.645911111111, validation accuracy: 0.504


In [1]:
y_predicted = NN.predict(X_test)
save_predictions('ans1-xl2699', y_predicted)

NameError: name 'NN' is not defined

In [2]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans1-xl2699.npy')
print(loaded_y.shape)
loaded_y[:10]

NameError: name 'np' is not defined

## Part 2: Regularizing the neural network
#### Add dropout and L2 regularization

In [70]:
NN2 = NeuralNetwork(layer_dimensions, drop_prob=0.2, reg_lambda=0.001)
NN2.train(X_train, y_train, iters=80, alpha=0.001, batch_size=450, print_every=1000)

Iteration 1, cost: 4.44419548232, train accuracy: 0.103511111111, validation accuracy: 0.0994
Iteration 1000, cost: 1.77768767005, train accuracy: 0.483622222222, validation accuracy: 0.4696
Iteration 2000, cost: 1.73877025375, train accuracy: 0.506444444444, validation accuracy: 0.487
Iteration 3000, cost: 1.63541858491, train accuracy: 0.525466666667, validation accuracy: 0.5088
Iteration 4000, cost: 1.64792352779, train accuracy: 0.532044444444, validation accuracy: 0.5044
Iteration 5000, cost: 1.6298738109, train accuracy: 0.546911111111, validation accuracy: 0.5028
Iteration 6000, cost: 1.59858158053, train accuracy: 0.550844444444, validation accuracy: 0.5144
Iteration 7000, cost: 1.64465560129, train accuracy: 0.5496, validation accuracy: 0.5112
Iteration 8000, cost: 1.60452214182, train accuracy: 0.559355555556, validation accuracy: 0.5184


In [3]:
y_predicted2 = NN2.predict(X_test)
save_predictions('ans2-xl2699',y_predicted2)

NameError: name 'NN2' is not defined

In [4]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans2-xl2699.npy')
print(loaded_y.shape)
loaded_y[:10]

NameError: name 'np' is not defined

## Part 3: Extra Credit

Implement data augmentation and Batch normalization

In [73]:
# Data augmentation
data_root_path = '../cifar10-hw1/'
X_aug, y_aug = get_train_data_augmented(data_root_path) # this may take a few minutes

X_test = get_images(data_root_path + 'test')
print('Data loading done')

{'horse': 7, 'automobile': 1, 'deer': 4, 'dog': 5, 'frog': 6, 'cat': 3, 'truck': 9, 'ship': 8, 'airplane': 0, 'bird': 2}
Loaded 10000/50000
Loaded 20000/50000
Loaded 30000/50000
Loaded 40000/50000
Loaded 50000/50000
Loaded 10000/10000
Data loading done


In [74]:
X_train = X_aug[:,0:90000]
y_train = y_aug[0:90000]


#separate validation data and validation label
X_validation = []
y_validation = []
for i in range(5000):
    X_validation.append(X_aug[:,90000+i*2])
    y_validation.append(y_aug[90000+i*2])
X_validation = np.column_stack(X_validation)
y_validation = np.array(y_validation)

In [75]:
layer_dimensions = [X_train.shape[0],512,300,10]  # including the input and output layers
NN3 = NeuralNetwork(layer_dimensions,use_batchnorm = True,drop_prob=0.2, reg_lambda=0.001)
NN3.train(X_train, y_train, iters=70, alpha=0.001, batch_size=450, print_every=1000)

Iteration 1, cost: 3.41839278899, train accuracy: 0.238966666667, validation accuracy: 0.243
Iteration 1000, cost: 1.61249873852, train accuracy: 0.561066666667, validation accuracy: 0.5308
Iteration 2000, cost: 1.52207655137, train accuracy: 0.590577777778, validation accuracy: 0.554
Iteration 3000, cost: 1.49840337971, train accuracy: 0.601666666667, validation accuracy: 0.552
Iteration 4000, cost: 1.4674742215, train accuracy: 0.613566666667, validation accuracy: 0.5578
Iteration 5000, cost: 1.41807320586, train accuracy: 0.618733333333, validation accuracy: 0.5586
Iteration 6000, cost: 1.46224228672, train accuracy: 0.621577777778, validation accuracy: 0.5586
Iteration 7000, cost: 1.50645017809, train accuracy: 0.618544444444, validation accuracy: 0.5584
Iteration 8000, cost: 1.43191663466, train accuracy: 0.626866666667, validation accuracy: 0.5626
Iteration 9000, cost: 1.46994167048, train accuracy: 0.6246, validation accuracy: 0.5586
Iteration 10000, cost: 1.46946296436, train a

In [5]:
y_predicted3 = NN3.predict(X_test)
save_predictions('ans3_xl2699',y_predicted3)

NameError: name 'NN3' is not defined

In [6]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans3-xl2699.npy')
print(loaded_y.shape)
loaded_y[:10]

NameError: name 'np' is not defined