# COMS 4995_002 Deep Learning Assignment 1
Due on Monday, Oct 9, 11:59pm

This assignment can be done in groups of at most 3 students. Everyone must submit on Courseworks individually.

Write down the UNIs of your group (if applicable)

Member 1: Rohit Banerjee, rb3246

In [7]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc
import glob
import sys
# you shouldn't need to make any more imports

In [59]:
class NeuralNetwork(object):
    def __init__(self, layer_dimensions, drop_prob=0.0, reg_lambda=0.0):
        np.random.seed(1)

        self.parameters = {}
        self.parameters["batch_index"] = 0
        self.num_layers = len(layer_dimensions)
        self.drop_prob = drop_prob
        self.reg_lambda = reg_lambda

        print("Num Layers: ", self.num_layers)
        for l in range(1, self.num_layers):
            self.parameters["W" + str(l)] = np.random.randn(layer_dimensions[l], layer_dimensions[l-1])
            print("W"+str(l), " -- ", self.parameters["W" + str(l)].shape)
            self.parameters["b" + str(l)] = np.zeros((layer_dimensions[l], 1))
            print("b"+str(l), " -- ", self.parameters["b" + str(l)].shape)

    def affineForward(self, A, W, b):
        assert(W.shape[1] == A.shape[0])
        cache = (A, W, b)
        return np.dot(W,A)+b, cache

    def activationForward(self, A, activation="relu"):
        if activation=="relu":
            return self.relu(A)

    def relu(self, X):
        return np.maximum(0, X)

    def dropout(self, A, prob):
        M = None
        return A, M

    def forwardPropagation(self, X):
        AL = None
        cache = []

        AL = X
        for l in range (1, self.num_layers):
            W = self.parameters["W"+str(l)]
            b = self.parameters["b"+str(l)]
            AL, cacheL = self.affineForward(AL, W, b)
            print("affineForward for layer ", l, " -- shape of output: ", AL.shape)
            AL = self.activationForward(AL, "relu")
            print("activationForward for layer ", l, " -- shape of output: ", AL.shape)
            cache.append(cacheL)

        print("forwardPropagation finished -- cacheSize: ", len(cache))
        return AL, cache

    def affineBackward(self, dA_prev, cache):
        A_prev, W, b = cache
        dA, dW, db = None, None, None

        m = A_prev.shape[1]
        dA = np.dot(W.T,dA_prev)
        dW = (1/m) * np.dot(dA_prev, A_prev.T)
        db = (1/m) * np.sum(dA_prev, axis=1, keepdims=True)

        return dA, dW, db

    def relu_derivative(self, dx):
        dx[dx<=0] = 0
        return dx

    def activationBackward(self, dA, cache, activation="relu"):
        if activation=="relu":
            return self.relu_derivative(dA)

    def dropout_backward(self, dA, cache):
        return dA

    def backPropagation(self, dAL, Y, cache):
        gradients = {}

        for l in reversed(range(1, self.num_layers)):
            current_cache = cache[l-1]
            dAL_nonlinear = self.activationBackward(dAL, current_cache, "relu")
            dAL, dW, db = self.affineBackward(dAL_nonlinear, current_cache)
            gradients["dW"+str(l)] = dW
            gradients["db"+str(l)] = db

            #if self.drop_prob > 0:
            #call dropout_backward

        #if self.reg_lambda > 0:
        # add gradients from L2 regularization to each dW

        return gradients

    def updateParameters(self, gradients, alpha):
        for l in range(1, self.num_layers-1):
            dW = gradients["dW"+str(l)]
            db = gradients["db"+str(l)]
            print("updateParameters -- dW.shape: ", dW.shape, ", db.shape: ", db.shape)
            
            W_idx = "W"+str(l)
            b_idx = "b"+str(l)
            assert(dW.shape == self.parameters[W_idx].shape)
            assert(db.shape == self.parameters[b_idx].shape)

            self.parameters[W_idx] = self.parameters[W_idx] - alpha*dW
            self.parameters[b_idx] = self.parameters[b_idx] - alpha*db

    def get_batch(self, X, y, batch_size):
        bi = self.parameters["batch_index"]
        self.parameters["batch_index"] = self.parameters["batch_index"] + batch_size
        return X[:,bi:bi+batch_size], y[bi:bi+batch_size]
    
    def costFunction(self, AL, y):
        cost = (-1/AL.shape[1]) * np.sum(np.log(AL)*y + np.log(1-AL)*(1-y))
        cost = np.squeeze(cost)
        print("Cost: ", cost)
        
        #if self.reg_lambda > 0:
        # add regularization

        dAL = -(np.divide(y, AL) - np.divide(1-y, 1-AL))
        print("dAL: ", dAL)
        
        return cost, dAL

    def train(self, X, y, iters=1000, alpha=0.0001, batch_size=100, print_every=100):
        for i in range(0, iters):
            X_batch, y_batch = self.get_batch(X, y, batch_size)
            print(X_batch.shape)
            print(y_batch.shape)
            
            y_hat, cache = self.forwardPropagation(X_batch)
            print(y_hat.shape)
            print(len(cache))
            
            cost, dAL = self.costFunction(y_hat, y_batch)            
            gradients = self.backPropagation(dAL, y_batch, cache)
            self.updateParameters(gradients, alpha)

            if i % print_every == 0:
                print("Cost: ", cost)

    def predict(self, X):
        y_pred, cache = self.forwardPropagation(X)
        return y_pred

In [12]:
# Helper functions, DO NOT modify this

def get_img_array(path):
    """
    Given path of image, returns it's numpy array
    """
    return scipy.misc.imread(path)

def get_files(folder):
    """
    Given path to folder, returns list of files in it
    """
    filenames = [file for file in glob.glob(folder+'*/*')]
    filenames.sort()
    return filenames

def get_label(filepath, label2id):
    """
    Files are assumed to be labeled as: /path/to/file/999_frog.png
    Returns label for a filepath
    """
    tokens = filepath.split('/')
    label = tokens[-1].split('_')[1][:-4]
    if label in label2id:
        return label2id[label]
    else:
        sys.exit("Invalid label: " + label)

In [9]:
# Functions to load data, DO NOT change these

def get_labels(folder, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    files = get_files(folder)
    y = []
    for f in files:
        y.append(get_label(f,label2id))
    return np.array(y)

def one_hot(y, num_classes=10):
    """
    Converts each label index in y to vector with one_hot encoding
    """
    y_one_hot = np.zeros((y.shape[0], num_classes))
    y_one_hot[y] = 1
    return y_one_hot.T

def get_label_mapping(label_file):
    """
    Returns mappings of label to index and index to label
    The input file has list of labels, each on a separate line.
    """
    with open(label_file, 'r') as f:
        id2label = f.readlines()
        id2label = [l.strip() for l in id2label]
    label2id = {}
    count = 0
    for label in id2label:
        label2id[label] = count
        count += 1
    return id2label, label2id

def get_images(folder):
    """
    returns numpy array of all samples in folder
    each column is a sample resized to 30x30 and flattened
    """
    files = get_files(folder)
    images = []
    count = 0
    
    for f in files:
        count += 1
        if count % 10000 == 0:
            print("Loaded {}/{}".format(count,len(files)))
        img_arr = get_img_array(f)
        img_arr = img_arr.flatten() / 255.0
        images.append(img_arr)
    X = np.column_stack(images)

    return X

def get_train_data(data_root_path):
    """
    Return X and y
    """
    train_data_path = data_root_path + 'train'
    id2label, label2id = get_label_mapping(data_root_path+'labels.txt')
    print(label2id)
    X = get_images(train_data_path)
    y = get_labels(train_data_path, label2id)
    return X, y

def save_predictions(filename, y):
    """
    Dumps y into .npy file
    """
    np.save(filename, y)

In [10]:
# Load the data
data_root_path = '/home/rbanerjee/Downloads/cifar10-hw1/'
X_train, y_train = get_train_data(data_root_path) # this may take a few minutes
X_test = get_images(data_root_path + 'test')
print('Data loading done')

{'frog': 6, 'bird': 2, 'deer': 4, 'airplane': 0, 'automobile': 1, 'ship': 8, 'dog': 5, 'horse': 7, 'truck': 9, 'cat': 3}
Loaded 10000/50000
Loaded 20000/50000
Loaded 30000/50000
Loaded 40000/50000
Loaded 50000/50000
Loaded 10000/10000
Data loading done


## Part 1

#### Simple fully-connected deep neural network

In [60]:
layer_dimensions = [X_train.shape[0], 5, 10]  # including the input and output layers
NN = NeuralNetwork(layer_dimensions)
NN.train(X_train, y_train, iters=1, alpha=0.001, batch_size=2, print_every=100)

Num Layers:  3
W1  --  (5, 3072)
b1  --  (5, 1)
W2  --  (10, 5)
b2  --  (10, 1)
(3072, 2)
(2,)
affineForward for layer  1  -- shape of output:  (5, 2)
activationForward for layer  1  -- shape of output:  (5, 2)
affineForward for layer  2  -- shape of output:  (10, 2)
activationForward for layer  2  -- shape of output:  (10, 2)
forwardPropagation finished -- cacheSize:  2
(10, 2)
2
Cost:  nan
dAL:  [[       -inf        -inf]
 [       -inf        -inf]
 [       -inf -0.13088206]
 [       -inf        -inf]
 [-0.03467389 -0.03393816]
 [-0.04190047        -inf]
 [       -inf        -inf]
 [-0.01221051 -0.02269812]
 [-0.01219686 -0.01211497]
 [-0.01380152        -inf]]
updateParameters -- dW.shape:  (5, 3072) , db.shape:  (5, 1)
Cost:  nan




In [None]:
y_predicted = NN.predict(X_test)
save_predictions('ans1-rb3246', y_predicted)

In [None]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans1-rb3246.npy')
print(loaded_y.shape)
loaded_y[:10]

## Part 2: Regularizing the neural network
#### Add dropout and L2 regularization

In [None]:
NN2 = NeuralNetwork(layer_dimensions, drop_prob=0, reg_lambda=0)
NN2.train(X_train, y_train, iters=1000, alpha=0.00001, batch_size=1000, print_every=10)

In [None]:
y_predicted2 = NN2.predict(X)
save_predictions(y_predicted, 'ans2-uni')