In [1]:
import cloudpickle as pickle
import numpy as np
import pandas as pd
import os
import math
from sklearn.model_selection import train_test_split
from sklearn import datasets, linear_model
import scipy.sparse
import copy
import random
import matplotlib 
import matplotlib.pyplot as plt

In [2]:
def get_CIFAR10_data(cifar10_dir, num_training=49000, num_validation=1000, num_test=1000):
    '''
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the neural net classifier.
    '''
    # Load the raw CIFAR-10 data
    X_train, y_train, X_test, y_test = load(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = X_train.astype(np.float64)
    X_val = X_val.astype(np.float64)
    X_test = X_test.astype(np.float64)

    # Transpose so that channels come first
    X_train = X_train.transpose(0, 3, 1, 2)
    X_val = X_val.transpose(0, 3, 1, 2)
    X_test = X_test.transpose(0, 3, 1, 2)
    mean_image = np.mean(X_train, axis=0)
    std = np.std(X_train)

    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    X_train /= std
    X_val /= std
    X_test /= std
    '''
    # covert N x 3 x 32 x 32 to N x 3072
    X_train = np.reshape(X_train, (len(X_train), 3 * 32 * 32))
    X_val = np.reshape(X_val, (len(X_val), 3 * 32 * 32))
    X_test = np.reshape(X_test, (len(X_test), 3 * 32 * 32))
    '''
    return {
        'X_train': X_train, 'y_train': y_train,
        'X_val': X_val, 'y_val': y_val,
        'X_test': X_test, 'y_test': y_test,
        'mean': mean_image, 'std': std
    }


def load_CIFAR_batch(filename):
    ''' load single batch of cifar '''
    with open(filename, 'rb') as f:
        datadict = pickle.load(f, encoding ='bytes')
        X = datadict[b'data']
        Y = datadict[b'labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1)
        Y = np.array(Y)
        return X, Y


def get_CIFAR10(ROOT):
    ''' load all of cifar '''
    xs = []
    ys = []
    for b in range(1, 6):
        f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X, Y
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
    return Xtr, Xte, Ytr, Yte


In [3]:
def load(file_name):
        train_x, test_x, train_y, test_y = get_CIFAR10("")
        # covert N x 3 x 32 x 32 to N x 3072
        train_x = np.reshape(train_x, (len(train_x), 3 * 32 * 32))
        test_x = np.reshape(test_x, (len(test_x), 3 * 32 * 32))
        
        return train_x, test_x, train_y, test_y 

In [4]:
#COST FUNCTION

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


def sigmoid_prime(z):
    return z * (1 - z)


def softmax(z):
    z -= np.max(z)
    sm = (np.exp(z).T / np.sum(np.exp(z), axis=1)).T
    return sm


def softmax_prime(z):
    return


def relu(z):
    return np.maximum(z, 0)


def relu_prime(z):
    dz = np.ones_like(z)
    dz[z < 0] = 0
    return dz


In [5]:
#WEIGHTS INITIALIZATION FUNCTION
def relu_weight(m, n):
    np.random.seed(0)
    return np.random.rand(m, n) * np.sqrt(2) / np.sqrt(m)


def xavier(m, n):
    np.random.seed(0)
    return np.random.rand(m, n) / np.sqrt(m)


def he(m, n):
    np.random.seed(0)
    return np.random.rand(m, n) * np.sqrt(2) / np.sqrt(m + n)


In [6]:
class Layers(object):

    def __init__(self, n_in, n_out=10, activation_function="relu"):
        self.n_in = n_in
        self.n_out = n_out
        self.set_activation_functions(act_function_name=activation_function)

    def set_activation_functions(self, act_function_name="relu"):
        if act_function_name == "relu":
            self.activation_function = relu
            self.function = relu_prime
            self.set_weight_function(weight_name="he")
        elif act_function_name == "sigmoid":
            self.activation_function = sigmoid
            self.function = sigmoid_prime
            self.set_weight_function(weight_name="xavier")
        elif act_function_name == "softmax":
            self.activation_function = softmax
            self.function = softmax_prime
            self.set_weight_function(weight_name="he")

    def set_weight_function(self, weight_name):
        if weight_name == "relu":
            self.weight_function = relu_weight
        elif weight_name == "xavier":
            self.weight_function = xavier
        elif weight_name == "he":
            self.weight_function = he


In [7]:
class Neural_Network(object):
    
    def __init__(self, n_in=784, n_out=10, l_rate=1.0):

        self.n_in = n_in
        self.n_out = n_out
        self.initial_lrate = l_rate
        self.l_rate = l_rate

        self.weights = []
        self.biases = []
        self.previous_weights = []
        self.previous_biases = []
        self.layers = []
        self.losses = []
        

    def layer(self, activation_function="relu", n_neurons=4):
        if len(self.layers) <= 0:
            n_previous_neurons = self.n_in
        else:
            n_previous_neurons = self.layers[-1].n_out

        L = Layers(n_in=n_previous_neurons, n_out=n_neurons, activation_function=activation_function)
        self.layers.append(L)
        
    
    def backpropogation_weights(self):
        self.previous_weights.append(self.weights)
        self.previous_biases.append(self.biases)
    
    
    def forward_propagation(self, X):
        a = [X]
        for l in range(len(self.layers)):
            z = a[l].dot(self.weights[l]) + self.biases[l]
            activation = self.layers[l].activation_function(z)
            a.append(activation)

        return a
    
    def backpropagation(self, x,y_mat,a):
        m = x.shape[0]
        output = a[-1]
        
        loss = (-1 / m) * np.sum(y_mat * np.log(output))
        
        deltas = []
        delta = y_mat - output
        deltas.append(delta)
        
        for l in range(len(self.layers)-1):
            prime = self.layers[-2 - l].function(a[-2 - l])
            w = self.weights[-1-l]
            delta = np.dot(delta, w.T) * prime
            deltas.append(delta)
        
        prev_weights = self.previous_weights.pop(0)
        prev_biases = self.previous_biases.pop(0)

        
        for l in range(len(self.layers)-1):
            dw = (2/m) * np.dot(a[l].T,deltas[-1-l])
            self.weights[l] += self.l_rate * dw
            
            db = (1/m) * np.sum(deltas[-1-l], axis=0, keepdims=True)
            self.biases[l] += self.l_rate * db
   
        self.backpropogation_weights()
        return loss
    
    
    def initialize_weights(self):
        for i in range(len(self.layers)):
            n_cur_layer_neurons = self.layers[i].n_out
            n_prev_layer_neurons = self.layers[i].n_in

            weights = self.layers[i].weight_function(n_prev_layer_neurons, n_cur_layer_neurons)
            self.weights.append(weights)

            biases = np.zeros((1, n_cur_layer_neurons))
            self.biases.append(biases)

    def train(self, x, y, n_epoch=50000):
        
        self.initialize_weights()
        self.backpropogation_weights()
        
        y_mat = self.oneHotIt(y)
        
        for i in range(n_epoch):
            a = self.forward_propagation(x)
            loss = self.backpropagation(x,y_mat,a)
                       
            if i%1000==0:
                print('Iteration: {0}  --  Loss: {1}'.format(i,loss))
                self.losses.append([i,loss])
            
    #Encode Target Label IDs to one hot vector of size m where m is the number of unique labels
    def oneHotIt(self, Y):
        m = Y.shape[0]
        label = scipy.sparse.csr_matrix((np.ones(m), (Y, np.array(range(m)))))
        label = np.array(label.todense()).T
        return label
    
    def Predicted_value(self, x):
        probs = self.forward_propagation(x)[-1]
        preds = np.argmax(probs,axis=1)      
        return probs,preds

    def Accuracy(self, x,y):
        prob,predicted_val = self.Predicted_value(x)
        accuracy = sum(predicted_val == y)/(float(len(y)))
        percentage = accuracy*100
        return percentage
    
    def loss_graph(self):
        errors = np.array(self.losses)
        plt.plot(errors[:, 0], errors[:, 1], 'r--')
        plt.title("(CIFAR-10) Loss vs Epoch (Learning rate = 1.0)")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.show()
        



In [None]:

# mnist data set
train_x, test_x, train_y, test_y = load(file_name="cifar")
    
X_train = train_x[:1000] / 255.
y_train = train_y[:1000].astype(int)
X_test = test_x[:100] / 255.
y_test = test_y[:100].astype(int)

n_in = X_train.shape[1]

nn = Neural_Network(n_in=n_in, n_out=10)

#Randomly assigning activation functions to the hidden layers
List = ["relu", "sigmoid"]

activation_func = random.choice(List)
nn.layer(activation_func, n_neurons=80)
nn.layer(activation_func, n_neurons=30)
nn.layer(activation_function="softmax", n_neurons=10)

nn.train(X_train, y_train, n_epoch=500000)

train_accuracy = nn.Accuracy(X_train,y_train)
test_accuracy = nn.Accuracy(X_test,y_test)



Iteration: 0  --  Loss: 2.337716768928859
Iteration: 1000  --  Loss: 2.3001272682558196
Iteration: 2000  --  Loss: 2.300048924060527
Iteration: 3000  --  Loss: 2.3000439573777216
Iteration: 4000  --  Loss: 2.3000418468709296
Iteration: 5000  --  Loss: 2.300040447189312
Iteration: 6000  --  Loss: 2.300039442458885
Iteration: 7000  --  Loss: 2.3000387190120386
Iteration: 8000  --  Loss: 2.3000382079572623
Iteration: 9000  --  Loss: 2.300037845812053
Iteration: 10000  --  Loss: 2.3000375742415264
Iteration: 11000  --  Loss: 2.3000373501718414
Iteration: 12000  --  Loss: 2.300037147359945
Iteration: 13000  --  Loss: 2.3000369514236203
Iteration: 14000  --  Loss: 2.300036754566792
Iteration: 15000  --  Loss: 2.3000365521235926
Iteration: 16000  --  Loss: 2.3000363406439512
Iteration: 17000  --  Loss: 2.3000361168497365
Iteration: 18000  --  Loss: 2.300035876975245
Iteration: 19000  --  Loss: 2.3000356161868867
Iteration: 20000  --  Loss: 2.3000353278456784
Iteration: 21000  --  Loss: 2.3000

Iteration: 175000  --  Loss: 1.8816153218194493
Iteration: 176000  --  Loss: 1.8720101187334974
Iteration: 177000  --  Loss: 1.872560987335447
Iteration: 178000  --  Loss: 1.877934002703146
Iteration: 179000  --  Loss: 1.8730036549602864
Iteration: 180000  --  Loss: 1.8711940458626113
Iteration: 181000  --  Loss: 1.8714094859208998
Iteration: 182000  --  Loss: 1.869240003306478
Iteration: 183000  --  Loss: 1.86985377997679
Iteration: 184000  --  Loss: 1.8753350244042077
Iteration: 185000  --  Loss: 1.8678326060913815
Iteration: 186000  --  Loss: 1.8686803676571784
Iteration: 187000  --  Loss: 1.8680599461511007
Iteration: 188000  --  Loss: 1.8670966734803829
Iteration: 189000  --  Loss: 1.8783618986613586
Iteration: 190000  --  Loss: 1.8660395254301816
Iteration: 191000  --  Loss: 1.8656915181894516
Iteration: 192000  --  Loss: 1.87084532078327
Iteration: 193000  --  Loss: 1.8977597969175017
Iteration: 194000  --  Loss: 1.8654767417001978
Iteration: 195000  --  Loss: 1.8660521771473981

Iteration: 347000  --  Loss: 1.8443869090622196
Iteration: 348000  --  Loss: 1.8451562945026967
Iteration: 349000  --  Loss: 1.844486426844278
Iteration: 350000  --  Loss: 1.8464172763747704
Iteration: 351000  --  Loss: 1.845532251837677
Iteration: 352000  --  Loss: 1.844378772086917
Iteration: 353000  --  Loss: 1.8447518787833794
Iteration: 354000  --  Loss: 1.8445588949503324
Iteration: 355000  --  Loss: 1.844652014334708
Iteration: 356000  --  Loss: 1.8444371687581897
Iteration: 357000  --  Loss: 1.8448967414012392
Iteration: 358000  --  Loss: 1.844709008225108
Iteration: 359000  --  Loss: 1.8455942124714608
Iteration: 360000  --  Loss: 1.8448779603222003
Iteration: 361000  --  Loss: 1.8440854797681754
Iteration: 362000  --  Loss: 1.8448748269999689
Iteration: 363000  --  Loss: 1.84473573216639
Iteration: 364000  --  Loss: 1.844315123145152
Iteration: 365000  --  Loss: 1.8449291344490548
Iteration: 366000  --  Loss: 1.8435323405465789
Iteration: 367000  --  Loss: 1.8442820947705219


In [None]:
print('Training Accuracy: {0:0.2f} %'.format(train_accuracy))
print('Test Accuracy: {0:0.2f} %'.format(test_accuracy))
nn.loss_graph()