# <b> Fully connected Neural Network (Vanilla Code in Python) </b>
## Submitted by : 
## Namit Mohale (nm3191)
## Karanpreet Wadhwa (ksw352)

In [1]:
import torch 
import torchvision
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score

In [2]:
transform = transforms.Compose([ transforms.ToTensor () ,
                            transforms.Normalize ((0.5 , 0.5 , 0.5) , (0.5 , 0.5 , 0.5) ) ])

In [3]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
            download=True, transform=transform) 
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
            download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
import pickle
import os
import numpy as np

<b> Below are some functions mde to preprocess the data batches </b>

In [29]:
data_path = "./data/"

# Constants

# Image size
img_size = 32

# 3 channels: Red, Green, Blue.
channel_number = 3

oneimgsize = img_size * img_size
# Length of an flattened image array.
img_size_flat = oneimgsize * channel_number

# Number of classes.
num_classes = 10

# Constants used to allocate arrays

# Number of files for the training-set.
_images_per_file = 10000

train_num_files = int(50000 / _images_per_file)

# Total number of images in the training-set.
# This is used to pre-allocate arrays for efficiency.
total_images_train = int(train_num_files * _images_per_file)

In [30]:
def get_path(filename=""):
    """
    Return the full path of a data-file for the data-set.
    If filename=="" then return the directory of the files.
    """

    return os.path.join(data_path, "cifar-10-batches-py/", filename)

In [31]:
# Convert the data into a numpy array
def unpickle(filename):
    """
    Unpickle the given file and return the data.
    Note that the appropriate dir-name is prepended the filename.
    """

    # Create full path for the file.
    file_path = get_path(filename)

    print("Loading data: " + file_path)

    with open(file_path, mode='rb') as file:
        # In Python 3.X it is important to set the encoding, to prevent exception
        data = pickle.load(file, encoding='bytes')

    return data

In [32]:
def convertimg(raw):
    raw_float = np.array(raw,dtype=float)/255
    #raw_float = np.array(raw,dtype=float)
    #Reshape the image into 4 dimension
    
    images = raw_float.reshape([-1, channel_number, img_size, img_size])
    images = images.transpose([0,2,3,1])
    return images

In [33]:
def one_hot_encoded(class_numbers, num_classes=None):
    """
    Generate the One-Hot encoded class-labels from an array of integers.
    This way, each class is represented by an array of integers, instead of a single integer
    """

    # Find the number of classes if None is provided.
    # Assumes the lowest class-number is zero.
    if num_classes is None:
        num_classes = np.max(class_numbers) + 1

    return np.eye(num_classes, dtype=float)[class_numbers]

In [34]:
def data_loader(filename):
    """
    Load a pickled data-file from the CIFAR-10 data-set
    and return the converted imagesb and the class-number
    for each image.
    """

    # Load the pickled data-file.
    data = unpickle(filename)

    # Get the raw images.
    images = data[b'data']

    # Get the class-numbers for each image. Convert to numpy-array.
    classlabels = np.array(data[b'labels'])
    
    images = convertimg(images)

    return images, classlabels

In [35]:
def load_class_names():
    """
    Load the names for the classes in the data-set.
    Returns a list with the names. Example: names[3] is the name
    associated with class-number 3.
    """

    # Load the class-names from the pickled file.
    raw = unpickle(filename="batches.meta")[b'label_names']

    # Convert from binary strings.
    names = [x.decode('utf-8') for x in raw]

    return names

In [36]:
def load_training_data():
    """
    Load all the training-data for the data-set.
    The data-set is split into 5 data-files which are merged here.
    Returns the images, class-numbers and one-hot encoded class-labels.
    """

    # Pre-allocate the arrays for the images and class-numbers for efficiency.
    images = np.zeros(shape=[total_images_train, img_size, img_size, channel_number], dtype=float)
    classlabel = np.zeros(shape=[total_images_train], dtype=int)

    # Begin-index for the current batch.
    begin = 0

    # For each data-file.
    for i in range(train_num_files):
        # Load the images and class-numbers from the data-file.
        images_batch, class_batch = data_loader(filename="data_batch_" + str(i + 1))

        # Number of images in this batch.
        num_images = len(images_batch)

        # End-index for the current batch.
        end = begin + num_images
        
        print(end)
        # Store the images into the array.
        images[begin:end, :] = images_batch

        # Store the class-numbers into the array.
        classlabel[begin:end] = class_batch

        # The begin-index for the next batch is the current end-index.
        begin = end

    return images, classlabel, one_hot_encoded(class_numbers = classlabel, num_classes=num_classes)
    

In [37]:
def load_test_data():
    """
    Load all the test-data for the CIFAR-10 data-set.
    Returns the images, class-numbers and one-hot encoded class-labels.
    """

    images, classlabel = data_loader(filename="test_batch")

    return images, classlabel, one_hot_encoded(class_numbers = classlabel, num_classes=num_classes)

In [38]:
class_names = load_class_names()
class_names

Loading data: ./data/cifar-10-batches-py/batches.meta


['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [39]:
images_train, train_class, labels_train = load_training_data()

Loading data: ./data/cifar-10-batches-py/data_batch_1
10000
Loading data: ./data/cifar-10-batches-py/data_batch_2
20000
Loading data: ./data/cifar-10-batches-py/data_batch_3
30000
Loading data: ./data/cifar-10-batches-py/data_batch_4
40000
Loading data: ./data/cifar-10-batches-py/data_batch_5
50000


In [46]:
images_test, test_class, labels_test = load_test_data()

Loading data: ./data/cifar-10-batches-py/test_batch


In [47]:
print("Size of:")
print("Training-set:\t\t{}".format(len(images_train)))
print("Test-set:\t\t{}".format(len(images_test)))

Size of:
- Training-set:		50000
- Test-set:		10000


In [48]:
print("Type of")
print("-Training images:\t{}, shape {}".format(type(images_train),images_train.shape))
print("-Test images:\t\t{}, shape {}".format(type(images_test),images_test.shape))

Type of
-Training images:	<class 'numpy.ndarray'>, shape (50000, 3072)
-Test images:	<class 'numpy.ndarray'>, shape (10000, 32, 32, 3)


<h3>Reshaping the images</h3>

In [49]:
print('Original Dimenions..')
print('Training data size- Data,Labels {},{}'.format(images_train.shape,train_class.shape))
print('Test data size- Data,Labels {},{}'.format(images_test.shape,test_class.shape))

Original Dimenions..
Training data size- Data,Labels (50000, 3072),(50000, 1)
Test data size- Data,Labels (10000, 32, 32, 3),(10000,)


In [50]:
images_train = images_train.reshape(50000,-1) # -1 indicates reshaping with the leftover dimensions
train_class = train_class.reshape(50000,-1)

images_test = images_test.reshape(10000,-1)
test_class = test_class.reshape(10000,-1)

print('Post Reshaping Dimenions..')
print('Training data size- Data,Labels {},{}'.format(images_train.shape,train_class.shape))
print('Test data size- Data,Labels {},{}'.format(images_test.shape,test_class.shape))

Post Reshaping Dimenions..
Training data size- Data,Labels (50000, 3072),(50000, 1)
Test data size- Data,Labels (10000, 3072),(10000, 1)


<h3>Neural Network Implementation </h3>

In [51]:
number_of_images = 10000;

images_train = images_train[:number_of_images].T
len(images_train)
labels_train = labels_train[:number_of_images]
len(labels_train)
images_train.shape
#labels_train.shape

(3072, 10000)

In [52]:

"""Each activation layer will indicate the number of features of one images i.e 1000 neurons 
in the first hidden layer will have 1000 features per image. matrix would be 10000* num of images
Translating it to a different plane to yield linear models.
second would have 500 features per images

"""

#Paramters for the neural network
numberOfLayers = 3
neurons_layer_one = 250
neurons_layer_two = 100
neurons_output_layer = 10;

layer_dim = [images_train.shape[0],neurons_layer_one,neurons_layer_two,neurons_output_layer]
parameters = {
}


## Network training code here

In [53]:
class NeuralNetwork:
    
    
    def __init__(self,dimensions):
        self.layer_dimensions = dimensions;
        np.random.seed(1);
        self.parameters = parameters;
        L = len(layer_dim)
        
        for i in range(0,L-1):
            a = str(i+1)
            parameters["W"+a] = (np.random.randn(dimensions[i+1],dimensions[i])*0.01)
            parameters["b"+a] = np.zeros((dimensions[i+1],1)) 
            
                
    def affineForward(self,A_prev,W,b):
        #print('Inside affineForward defn.......')
        Z_dot = np.dot(W,A_prev)
        Z = Z_dot+b
        
        cache = (A_prev,W,b)
        return Z,cache
    
    
    def activationForward(self, A,layer,activation_function="relu"):
        parameters = self.parameters
        a = str(layer)
        W = parameters["W"+a]
        b = parameters["b"+a]
        Z,linear_cache = self.affineForward(A,W,b)
        
        #linear_cache is for A,W,b
        #activation_cache is the Z
        #print("Layer,Size of the input,Weight,Bias,Z {},{},{},{},{}".format(layer,A.shape,W.shape,b.shape,Z.shape))
        if(activation_function == "relu"):
            A,activation_cache = relu(Z)
        elif (activation_function == "softmax"):
            A,activation_cache = _softmax(Z)
        
        cache = (linear_cache,activation_cache)
        return A,cache
        

    def forwardPropagation(self,X):
        
        #print('......Inside forwardPropagation......')
        L = len(self.layer_dimensions)
        A = X
        caches =[]
        #print('Dimension length..',L)
        
        for i in range(1,L-1):
            #print('......Starting layer....',i)
            A_prev = A
            A,cache = self.activationForward(A_prev,i,"relu")         
            caches.append(cache)     
        
        Al,cache = self.activationForward(A,L-1,"softmax")
        caches.append(cache)
        #print('Post applying softmax function for layer,Y,Z..{},{},{}'.format(L-1,Al.shape,cache.shape))           
        return Al,caches
    
    def costFunction(self,Al,y):
        Al = Al.T
        m = y.shape[0]
        cost = -(1/m)*((np.sum((y*np.log(Al)))+((1-y)*(np.log(1-Al)))))
       # print('...Shape of cost function {}....'.format(cost.shape))
        return cost
    
    
    def affineBackward(self,dZ,linear_cache):
        #delta = g'(z) = dZ
        #dZ would represent the g'(z)
        #a=A_prev ; dw3= a.dZ
        
        A_prev,W,b = linear_cache
        m = A_prev.shape[1]
        
        dW = (1/m)*np.dot(dZ,A_prev.T)
        db = (1/m)*np.sum(dZ,axis=1,keepdims=True)
        
        dA_prev = np.dot(W.T,dZ)
        
        return dA_prev,dW,db
        
        
    
    def activationBackward(self, dA, cache,activation_fn):
        linear_cache,activation_cache = cache;
        
        if activation_fn == "relu":
            dZ = relu_gradient(dA,activation_cache)
            dA_prev, dW,db = self.affineBackward(dZ,linear_cache)
        
        elif activation_fn == "_softmax":
            dZ = _softmax_gradient(dA,activation_cache)
            dA_prev, dW,db = self.affineBackward(dZ,linear_cache)
            
        
        return dA_prev, dW,db
    
    
    
    def backwardPropagation(self,Al,y,caches):
        L = len(caches)
        #print('Length of cache...',L)
        grads ={}
        dAl = np.divide(Al - y.T, np.multiply(Al, 1 - Al))
        
        #Use different activation function at the last layer
        grads["dA"+str(L-1)],grads["dW"+str(L)],grads["db"+str(L)] = self.activationBackward(dAl,caches[L-1],"_softmax")
        
        for i in range(L-1,0,-1):
            current_cache = caches[i - 1]
            grads["dA"+str(i-1)],grads["dW"+str(i)],grads["db"+str(i)] = self.activationBackward(grads["dA"+str(i)],current_cache,"relu")
            
        return grads  
    
    # define the function to update both weight matrices and bias vectors
    def update_parameters(self, grads, learning_rate):
        #print(grads)
        parameters = self.parameters
        
        L = len(parameters)
        
       
        for l in range (1,4):
            a = str(l)
            parameters["W" + a] = parameters["W" + a] - learning_rate * grads["dW" + a]
            parameters["b" + a] = parameters["b" + a] - learning_rate * grads["db" + a]
        
        return parameters
         
        
      
    def train(self, X,y, number_of_iterations,learning_rate,activation_fn):
        print('....Training the network for total iteration of of {}'.format(number_of_iterations))
        
        for i in range (1,number_of_iterations):
            print('....Starting Iteration {}..........'.format(i))
            Al,caches = self.forwardPropagation(X);
            Y_pred = Al
            
            print('.......Calculating the cost function.......')
            cost = self.costFunction(Al,y)
            if i % 5 == 0:
              print ("Cost: ", cost.mean())
            
            print('.......Calculating gradients.......')
            gradients = self.backwardPropagation(Al,y,caches)
            
            print('.......Updating paramaters.......')
            paramters = self.update_parameters(gradients,learning_rate)
            
            score = accuracy_score(train_class[:10000],np.argmax(Y_pred.T,axis=1),normalize=True) *100;
            print('ACCURACY -----Training Data : {}%'.format(score))
            
            
        print('....Training completed....')       
        
        return Y_pred,paramters
    
    
    def predict(self,X):
        y_pred,caches = self.forwardPropagation(X);
        return y_pred
        

In [54]:
len(layer_dim)

4

In [55]:
#Define activation function

def relu(Z):
    A = np.maximum(0,Z)
    return A,Z

def leaky_relu(Z):
    A = np.maximum(0.1*Z,Z)
    return A,Z

def _softmax(Z):
    A = 1/(1+np.exp(-Z))
    return A,Z

def tanh(Z):
    A = (np.exp(Z)-np.exp(-Z))/(np.exp(Z)+np.exp(-Z))
    return A,Z

def softmax(Z):
    Z = Z.T
    A = (np.exp(Z))/float(sum(np.exp(Z)))
    return A,Z

In [56]:
# Derivative functions

def _softmax_gradient(dA,Z):
    A,Z = _softmax(Z)
    dZ = dA * A * (1-A)
    return dZ


def tanh_gradient(dA, Z):
    A, Z = tanh(Z)
    dZ = dA * (1 - np.square(A))

    return dZ


def relu_gradient(dA, Z):
    A, Z = relu(Z)
    dZ = np.multiply(dA, np.int64(A > 0))

    return dZ
    

In [57]:
model = NeuralNetwork(layer_dim)
print(model.parameters["W1"].shape,model.parameters["W2"].shape,model.parameters["W3"].shape)
print(model.parameters["b1"].shape,model.parameters["b2"].shape,model.parameters["b3"].shape)


(250, 3072) (100, 250) (10, 100)
(250, 1) (100, 1) (10, 1)


In [58]:
images_train.shape

(3072, 10000)

<b> Training the model </b>

In [None]:
Y_pred,paramters = model.train(images_train,labels_train,7000, 0.03,"relu")

....Training the network for total iteration of of 7000
....Starting Iteration 1..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.190000000000001%
....Starting Iteration 2..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.45%
....Starting Iteration 3..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.639999999999999%
....Starting Iteration 4..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.48%
....Starting Iteration 5..........
.......Calculating the cost function.......
Cost:  0.7210892169583164
.......Calculating gradients.......
.......Updating paramaters.......

.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.05%
....Starting Iteration 86..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.040000000000001%
....Starting Iteration 87..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.040000000000001%
....Starting Iteration 88..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.040000000000001%
....Starting Iteration 89..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.040000000000001%
....Starting Iteration 90..........
.......Calculating the cost function.......
Cost:  2.2809

ACCURACY -----Training Data : 10.03%
....Starting Iteration 168..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.01%
....Starting Iteration 169..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.01%
....Starting Iteration 170..........
.......Calculating the cost function.......
Cost:  2.2810262641147663
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.01%
....Starting Iteration 171..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.01%
....Starting Iteration 172..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 10.0

.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 9.879999999999999%
....Starting Iteration 252..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 9.91%
....Starting Iteration 253..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 9.91%
....Starting Iteration 254..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 9.91%
....Starting Iteration 255..........
.......Calculating the cost function.......
Cost:  2.2789292976700137
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 9.91%
....Starting Iteration 256..........
.......Calculating the cos

ACCURACY -----Training Data : 11.25%
....Starting Iteration 335..........
.......Calculating the cost function.......
Cost:  2.2702473619776695
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 11.29%
....Starting Iteration 336..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 11.33%
....Starting Iteration 337..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 11.379999999999999%
....Starting Iteration 338..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 11.459999999999999%
....Starting Iteration 339..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY

.......Updating paramaters.......
ACCURACY -----Training Data : 17.080000000000002%
....Starting Iteration 418..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 17.11%
....Starting Iteration 419..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 17.130000000000003%
....Starting Iteration 420..........
.......Calculating the cost function.......
Cost:  2.22093089953618
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 17.21%
....Starting Iteration 421..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 17.2%
....Starting Iteration 422..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Upda

....Starting Iteration 501..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 18.5%
....Starting Iteration 502..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 18.5%
....Starting Iteration 503..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 18.57%
....Starting Iteration 504..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 18.58%
....Starting Iteration 505..........
.......Calculating the cost function.......
Cost:  2.118245493039899
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 18.55%
....Starting Iteration 506..........


ACCURACY -----Training Data : 19.05%
....Starting Iteration 584..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.07%
....Starting Iteration 585..........
.......Calculating the cost function.......
Cost:  2.0830958909488237
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.07%
....Starting Iteration 586..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.08%
....Starting Iteration 587..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.11%
....Starting Iteration 588..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.1

.......Updating paramaters.......
ACCURACY -----Training Data : 19.52%
....Starting Iteration 668..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.5%
....Starting Iteration 669..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.53%
....Starting Iteration 670..........
.......Calculating the cost function.......
Cost:  2.0614908019301956
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.52%
....Starting Iteration 671..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.52%
....Starting Iteration 672..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
A

.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.900000000000002%
....Starting Iteration 752..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.88%
....Starting Iteration 753..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.900000000000002%
....Starting Iteration 754..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.91%
....Starting Iteration 755..........
.......Calculating the cost function.......
Cost:  2.04737189563888
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 19.93%
....Starting Iteration 756..........
.......Cal

....Starting Iteration 835..........
.......Calculating the cost function.......
Cost:  2.036154254793181
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 20.66%
....Starting Iteration 836..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 20.7%
....Starting Iteration 837..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 20.72%
....Starting Iteration 838..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 20.72%
....Starting Iteration 839..........
.......Calculating the cost function.......
.......Calculating gradients.......
.......Updating paramaters.......
ACCURACY -----Training Data : 20.78%
....Starting Iteration 840..........

In [47]:
from sklearn.metrics import accuracy_score
score = accuracy_score(train_class[:10000],np.argmax(Y_pred.T,axis=1),normalize=True) *100;
print('Accuracy -----Training Data : {}%'.format(score))

Accuracy -----Training Data : 57.69%


In [48]:
X = images_test.T
X.shape

(3072, 10000)

<b>Predictions</b>

In [51]:
y_pred = model.predict(X)
y_pred = y_pred.T

In [58]:
y_true = test_class
Y_pred_max = np.argmax(y_pred,axis=1)

In [61]:
np.savetxt('predictions.npy', Y_pred_max)

In [62]:
score = accuracy_score(y_true,Y_pred_max,normalize=True)*100
print('Accuracy for Test Data..... : {}%'.format(score))

Accuracy for Test Data..... : 42.699999999999996%


## Note for Professor/TA
The network was initially tried on higher number of neurons in the hidden layers which gave 60%+ accuracy in training set and 55% (approx) on test set but while trying new things with the network, we ended up making changes in the original file and lost the outputs. We were on time crunch and hence could not re-run the network on the original decided upon parameters as this lower number of neurons network itself take over 5 hours to run. I hope you would consider it as an honest mistake. Thank you. Below we are sharing our results of the smaller network that we had settle with.

The network can be made up of different number of layers and by specifying the number of neurons in those layers. Initial results showed that training the network for at least 1000 loops started giving meaningful results. Increasing the neurons increase the accuracy by a lot. After a learning rate was decided upon, the network ran for 7000 loops after which the training accuracy went up to 57% while the test accuracy was around 43%.