In [19]:
#NETWORK MODEL

import random
from random import randint
import numpy as np
from scipy.sparse import csr_matrix
import csv

# Helper functions
def sig(x):
    return 1.0/(1.0+np.exp(-x))

def sigderivative(x):
    return sig(x)*(1-sig(x))

class NeuralNet(object):

    def __init__(self):
        self.neurons = [4096, 100, 10]
        self.layers = len(self.neurons)
        self.w = [np.random.randn(y, x) for x, y in zip(self.neurons[:-1], self.neurons[1:])]
        self.b = [np.random.randn(y, 1) for y in self.neurons[1:]]
        self.err = 0

    def train(self, data, epochs, batchsize, lr):
        print('starting...')
        for j in range(epochs):
            print('epoch:' + str(j))
            random.shuffle(data)
            batches = [data[z:z+batchsize] for z in range(0, len(data), batchsize)]
            for i, batch in enumerate(batches):
                if i % 500 == 0:
                    print('epoch:' + str(j) + ' ... ' + str(100*i/len(batches)) + '% ... err:' + str(self.err/500))
                    self.err = 0
                self.calcgradient(batch, lr)

    def forward(self, x):
        for b, w in zip(self.b, self.w):
            x = sig(np.dot(w, x)+b)
        return x

    def calcgradient(self, batch, lr):
        b = [np.zeros(b.shape) for b in self.b]
        w = [np.zeros(w.shape) for w in self.w]
        for x, y in batch:
            db, dw = self.back(x, y)
            b = [bval+dbval for bval, dbval in zip(b, db)]
            w = [wval+dwval for wval, dwval in zip(w, dw)]
        self.w = [wval-(lr/len(batch))*nwval for wval, nwval in zip(self.w, w)]
        self.b = [bval-(lr/len(batch))*nbval for bval, nbval in zip(self.b, b)]

    def back(self, x, y):
        nb = [np.zeros(b.shape) for b in self.b]
        nw = [np.zeros(w.shape) for w in self.w]
        net = x
        nets = [x]
        outputs = []
        for b, w in zip(self.b, self.w):
            o = np.dot(w, net)+b
            outputs.append(o)
            net = sig(o)
            nets.append(net)
        delta = self.errderivative(nets[-1], y) * sigderivative(outputs[-1])
        nb[-1] = delta
        nw[-1] = np.dot(delta, nets[-2].transpose())
        for layer in range(2, self.layers):
            o = outputs[-layer]
            do = sigderivative(o)
            delta = np.dot(self.w[-layer+1].transpose(), delta) * do
            nb[-layer] = delta
            nw[-layer] = np.dot(delta, nets[-layer-1].transpose())
        return (nb, nw)

    def predict(self, data):
        predictions = [(self.forward(x), y) for (x, y) in data]
        return sum(int(p == y) for (p, y) in predictions)

    def errderivative(self, o, y):
        errvec = (o-y)
        err = 0
        for e in errvec:
            err += (e**2)/2
        self.err += err
        return errvec

In [20]:
# LOAD DATA
traindata = np.load('train_x_sparse.npz')
traindata = csr_matrix((traindata['data'], traindata['indices'], traindata['indptr']), shape=traindata['shape'])

with open('train_y.csv') as f:
    trainy = f.readlines()
totaly = []
for i, s in enumerate(trainy):
    index = int(trainy[i].replace('\n', ''))
    tempy = np.zeros(10)
    tempy[index] = 1.0
    totaly.append(tempy)
trainy = totaly
        
datax = []
datay = []
for x in range(0, traindata.shape[0]-1):
    tup = []
    datax.append(np.reshape(traindata.getrow(x).toarray(), (4096, 1)))
    datay.append(np.reshape(trainy[x], (10, 1)))
data = list(zip(datax, datay))
random.shuffle(data)
testdata = data[:10000]
data = data[10000:50000]

In [21]:
#TRAINING
net = NeuralNet()
net.train(data, 10, 10, 0.7)

starting...
epoch:0
epoch:0 ... 0.0% ... err:0.0
epoch:0 ... 10.0% ... err:[5.32503363]
epoch:0 ... 20.0% ... err:[5.00535392]
epoch:0 ... 30.0% ... err:[5.00819138]
epoch:0 ... 40.0% ... err:[5.00398292]
epoch:0 ... 50.0% ... err:[4.99941735]
epoch:0 ... 60.0% ... err:[5.00224466]
epoch:0 ... 70.0% ... err:[4.99834222]
epoch:0 ... 80.0% ... err:[4.99850272]
epoch:0 ... 90.0% ... err:[5.00213036]
epoch:1
epoch:1 ... 0.0% ... err:[4.99770489]
epoch:1 ... 10.0% ... err:[4.9964541]
epoch:1 ... 20.0% ... err:[4.99565416]
epoch:1 ... 30.0% ... err:[4.99761091]
epoch:1 ... 40.0% ... err:[4.99564777]
epoch:1 ... 50.0% ... err:[4.99752272]
epoch:1 ... 60.0% ... err:[4.99550436]
epoch:1 ... 70.0% ... err:[4.99722789]
epoch:1 ... 80.0% ... err:[4.99190906]
epoch:1 ... 90.0% ... err:[4.99142041]
epoch:2
epoch:2 ... 0.0% ... err:[4.99395988]
epoch:2 ... 10.0% ... err:[4.99058891]
epoch:2 ... 20.0% ... err:[4.98488039]
epoch:2 ... 30.0% ... err:[4.98649685]
epoch:2 ... 40.0% ... err:[4.99165757]
ep

In [23]:
np.save('wNN6-100-10', net.w)
np.save('bNN6-100-10', net.b)

In [24]:
#LOAD SAVED MODEL AND TEST

def forward(x, w, b):
    for tb, tw in zip(b, w):
        x = sig(np.dot(tw, x)+tb)
    return x

def predict(data, w, b):
    pr = [(forward(x, w, b), y) for (x, y) in data]
    predictions = [(np.argmax(forward(x, w, b)), np.argmax(y)) for (x, y) in data]
    return sum(int(p == y) for (p, y) in predictions)

w = np.load('wNN6-100-10.npy')
b = np.load('bNN6-100-10.npy')

with open('train_y.csv') as f:
    trainy = f.readlines()

print(predict(testdata, w, b)/10000)


0.1208
