In [86]:
%matplotlib inline
import sys
import matplotlib.pyplot as plt
import numpy as np
from numpy import exp, array, random, dot
import torch
import torchvision
import torchvision.transforms as transforms

In [87]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [275]:
class NeuralNetwork(object):
    
    def __init__(self, layer_dimensions, drop_prob = 0.0,reg_lambda = 0.0):
        np.random.seed(1)
        self.parameters = {}
        self.layer_dimensions = layer_dimensions
        self.num_layers = len(layer_dimensions) - 1
        self.drop_prob = drop_prob
        self.reg_lambda = reg_lambda
        #self.biases = [np.random.randn(y, 1) for y in layer_dimensions[1:]]
        #self.weights = [np.random.randn(y, x) for x, y in zip(layer_dimensions[:-1], layer_dimensions[1:])]
        for l in range(1,len(layer_dimensions)):
            self.parameters['W' + str(l)] = np.random.randn(layer_dimensions[l],layer_dimensions[l-1])*0.01 + .01
            self.parameters['b' + str(l)] = np.zeros((layer_dimensions[l],1))
    
    def affineForward(self, A, W, b):
        z = np.dot(W,A)+b
        if np.isnan(W[0][0]):
            print("w = ",W)
            print("A=",A)
        cache = (A,W,b,z)
        return z,cache
    
    def activationForward(self, A, activation='relu'):
        if activation=="relu":
            return self.relu(A)
        elif activation=="softmax":
            temp = self.softmax(A)
            if temp.size==0:
                print(A)
            else:
                return temp
    
    def relu(self, X):
        return np.maximum(0, X)
    
    def forwardPropagation(self, X):
        caches = []
        z = X
        layer_no = 0
        L = len(self.parameters) // 2
        for l in range(1,L):
            z, cache = self.affineForward(z, self.parameters['W' + str(l)],self.parameters['b' + str(l)])
            A = self.activationForward(z, activation="relu")
            caches.append(cache)
        
        ZL, cache_final_layer = self.affineForward(A, self.parameters['W' + str(L)],self.parameters['b' + str(L)])
        AL = self.activationForward(ZL, activation="softmax")

        caches.append(cache_final_layer)
        return AL, caches
    
    def costFunction(self, AL, y):
        L = AL
        shape = y.shape[0]
        correct_label_prob = L[y,range(shape)]
        cost = - np.sum(np.log(correct_label_prob))/shape
        cost = np.squeeze(cost)
        if(self.reg_lambda > 0):
            pass
        dAL = self.softmax_prime(AL,y)
        return cost,dAL
    
    def affineBackward(self, dA_prev, cache):
        A_prev,W,b,Z = cache
        m = A_prev.shape[1]
        dW = np.dot(dA_prev,A_prev.T)/m
        db = np.sum(dA_prev,axis=1, keepdims=True)/m
        dA = np.dot(W.T,dA_prev)
        return dA, dW, db
    
    def activationBackward(self, dA, cache, activation='relu'):
        A_prev,W,b,Z = cache
        if activation=="relu":
            return self.relu_derivative(dA,Z)
    
    def relu_derivative(self, dX, cached_x):
        dX[cached_x < 0] = 0
        dX[cached_x >= 0] = 1
        return dX
    
    def backPropagation(self, dAL, y, cache):
        gradients = {}
        L = len(cache)
        m = dAL.shape[1]
        current_cache = cache[L-1]
        gradients["dA" + str(L)], gradients["dW" + str(L)], gradients["db" + str(L)] = \
        self.affineBackward(dAL,current_cache)
        
        for l in reversed(range(L-1)):
            current_cache = cache[l]
            dA_prev_non_linear = self.activationBackward(gradients["dA" + str(l + 2)],current_cache,"relu")
            dA_prev, dW_temp, db_temp = self.affineBackward(dA_prev_non_linear,current_cache)
            gradients["dA" + str(l + 1)] = dA_prev
            gradients["dW" + str(l + 1)] = dW_temp
            gradients["db" + str(l + 1)] = db_temp
            
        return gradients
    
    def updateParameters(self, gradients, alpha):
        L = len(self.parameters) // 2
        for l in range(L):
            self.parameters["W" + str(l+1)] = self.parameters["W" + str(l+1)] - alpha*gradients["dW" + str(l+1)]
            self.parameters["b" + str(l+1)] = self.parameters["b" + str(l+1)] - alpha*gradients["db" + str(l+1)]
    
    def get_batch(self, X, y, batch_size):
        current_index=self.parameters["batch_index"]
        self.parameters["batch_index"]=self.parameters["batch_index"]+batch_size
        X_batch,y_batch = X[:,current_index:current_index+batch_size], y[current_index:current_index+batch_size]

        return X_batch, y_batch
    
    def train(self, X, y, iters=100, alpha=0.1, batch_size=100, print_every=10):
        batch_iters = (int)(X.shape[1]/batch_size)
        cost = 0
        for i in range(0, iters):
            self.parameters["batch_index"] = 0
            for j in range(batch_iters):
                X_batch,Y_batch=self.get_batch( X, y, batch_size)
                AL, all_layer_cache = self.forwardPropagation(X_batch)
                cost, dAL = self.costFunction(AL, Y_batch)
                gradients = self.backPropagation(dAL, Y_batch, all_layer_cache)
                self.updateParameters(gradients, alpha)
            if i % print_every == 0:
                print "Cost: ", cost
    
    def predict(self, X):
        y_pred, cache = self.forwardPropagation(X)
        y_pred = self.softmax(y_pred)
        predicted_labels=[]
        for i in range(y_pred.shape[1]):
            label_predicted=max(y_pred[:,i])
            predicted_labels.append(y_pred[:,i].tolist().index(label_predicted))
        predicted_labels = np.array(predicted_labels)
        return predicted_labels
    
    def softmax(self, X):
        return np.exp(X)/ np.sum(np.exp(X), axis = 0)
    
    def softmax_prime(self, X, y):
        return X - self.one_hot_encoding(y)
    
    def one_hot_encoding(self, x, n_classes = 10):
        one_hot = np.zeros((n_classes, x.shape[0]))
        one_hot[x, range(x.shape[0])] = 1
        return one_hot

In [254]:
NN = NeuralNetwork([5,30,20,10])
X = np.matrix([[1,1],[1,1],[1,1],[1,1],[1,1]])
last, cache = NN.forwardPropagation(X)
print last
Y = np.matrix([[1],[0]])
AL, all_layer_cache = NN.forwardPropagation(X)
cost,dAL = NN.costFunction(AL, Y)
gradients=NN.backPropagation(dAL, Y, all_layer_cache)

[[ 0.09999595  0.09999595]
 [ 0.0999962   0.0999962 ]
 [ 0.10000244  0.10000244]
 [ 0.100002    0.100002  ]
 [ 0.10000016  0.10000016]
 [ 0.09999968  0.09999968]
 [ 0.10000376  0.10000376]
 [ 0.09999776  0.09999776]
 [ 0.09999943  0.09999943]
 [ 0.10000262  0.10000262]]


TypeError: sum() got an unexpected keyword argument 'keepdims'

In [255]:
NN.train(X,Y)

Cost:  0
Cost:  0
Cost:  0
Cost:  0
Cost:  0
Cost:  0
Cost:  0
Cost:  0
Cost:  0
Cost:  0


In [256]:
trainset[0][0].numpy().shape

(3, 32, 32)

In [231]:
def make_output_from_int(a):
    out = np.zeros((10,1))
    out[a ,:] = 1
    return out

In [239]:
def make_numpy_from_CIFAR10_data(data, no_of_samples):
    out = []
    inp = np.zeros((3072, no_of_samples))
    y = []
    for tensor in range(no_of_samples):
        inp[:, tensor] = data[tensor][0].view(3072, -1).numpy()[:,0]
        y.append(int((data[tensor])[1]))
    out = np.matrix(y)
    out = out.reshape(out.shape[1],out.shape[0])
    return inp, out

In [240]:
x_train, y_train= make_numpy_from_CIFAR10_data(trainset, 50000)

In [257]:
y_train.shape

(50000, 1)

In [259]:
x_train, y_train= make_numpy_from_CIFAR10_data(trainset, 50000)
X_validate = x_train[:,45001:50000]
Y_validate = y_train[45001:50000]

X_train_temp = x_train[:,:45000]
Y_train_temp = y_train[:45000]

In [281]:
NN = NeuralNetwork([3072,32,16,10],drop_prob=0.2, reg_lambda=0.1)
NN.train(X_train_temp, Y_train_temp,iters=5, alpha=.01, batch_size=1, print_every=1)

Cost:  2.38487664436
Cost:  2.38487664436
Cost:  2.38487664436
Cost:  2.38487664436
Cost:  2.38487664436


In [282]:
y_predicted_validate = NN.predict(X_validate)
print(y_predicted_validate)
len(y_predicted_validate)
y_predicted_validate.shape

[0 0 0 ..., 0 0 0]


(4999,)

In [283]:
from sklearn.metrics import accuracy_score
accuracy_score(predicted_labels,Y_validate)*100

NameError: name 'predicted_labels' is not defined