In [1]:
import tensorflow as tf
tf.test.gpu_device_name()

  from ._conv import register_converters as _register_converters


''

In [2]:

import math
import random
import numpy as np 
import scipy.misc


In [29]:
URL_ENDPOINT = "dataset/"

train_x = np.loadtxt(URL_ENDPOINT+"train_x.csv", delimiter=",")
train_y = np.loadtxt(URL_ENDPOINT+"train_y.csv", delimiter=",")
test_x = np.loadtxt(URL_ENDPOINT+"test_x.csv", delimiter=",")

train_x = train_x.reshape(-1, 64, 64) # reshape 
test_x = test_x.reshape(-1, 64, 64)

print("length train x:", len(train_x))
print("length train y:", len(train_y))
print("length test x:", len(test_x))


length train x: 50000
length train y: 50000
length test x: 10000


In [155]:
x = train_x.reshape(-1, 4096) 
y = np.zeros((50000, 10))
for i in range(len(y)):
    y[i][int(train_y[i])] = 1

test = test_x.reshape(-1, 4096) 

x[x < 235] = 0 
test[test < 235] = 0 

x /= 255.0
test /= 255.0


In [180]:
# sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-1 * x))

# derivative of our sigmoid function, in terms of the output (i.e. y)
def dsigmoid(x):
    return 1.0 - x**2

# Make a matrix 
def matrix(m, n, fill=0.0):
    return np.zeros(shape=(m,n)) + fill

# Make a random matrix
def rand_matrix(m, n, a=0, b=1):
	return np.random.rand(m, n) * (b - a) + a

# use logistic regression loss function 
def loss_fn(predict, truth):
    n = len(truth)
    loss = (- 1 / n) * np.sum(truth * np.log(predict) + (1 - truth) * (np.log(1 - predict)))
    loss = np.squeeze(loss)

    return loss

In [239]:
class NN:
    def __init__(self, ni, nh, no):
        # number of input, hidden, and output nodes
        self.ni = ni
        self.nh = nh
        self.no = no
        
        # bias vectors 
#         self.bh = np.zeros((1, self.nh))
#         self.bo = np.zeros((1, self.no))
        self.bh = np.ones(self.nh)
        self.bo = np.ones(self.no)
    

        # create weights
        # default to range (-0.5, 0.5)
        self.wh = rand_matrix(self.ni, self.nh, -0.5, 0.5)
        self.wo = rand_matrix(self.nh, self.no, -0.5, 0.5)

    def propagate(self, inputs):
        self.ai = inputs

        # hidden layers activations
        #bh is bias of hidden layers
        self.ah = np.dot(self.ai, self.wh) + self.bh

        # hidden output 
        self.oh = np.tanh(self.ah)

        # output layers activations
        self.ao = np.dot(self.ah, self.wo) + self.bo
        
        #h output layers output 
        self.oo = sigmoid(self.ao)

    def backPropagate(self, x, y, eta):
        n = x.shape[0]
                 
        self.dao = self.oo - y
        self.dwo = np.dot(self.oh.T, self.dao) / n
        self.dbo = np.sum(self.dao) / n
        
        self.dah = np.dot(self.dao, self.wo.T)*(1-np.tanh(self.ah))
        self.dwh = np.dot(x.T, self.dah) / n
        self.dbh = np.sum(self.dah) / n
        
        #update weights using gradient descent method. learning rate = eta
        self.wo = self.wo - eta * self.dwo
        self.wh = self.wh - eta * self.dwh
        self.bo = self.bo - eta * self.dbo
        self.bh = self.bh - eta * self.dbh
        
    def train(self, x, y, iterations = 1000, eta=0.5):
        for i in range(iterations):
            output = self.propagate(x)
            loss = loss_fn(self.oo, y)
            
            pred = np.argmax(self.oo, axis=1)
            diff = train_y - pred
            acc = (diff == 0).sum() / len(y)
            
            self.backPropagate(x, y, eta)
            if i % 100 == 0:
                print('iteration ', i, ":     loss: ", loss, ",    acc: ", acc)


In [240]:
nn = NN(ni=4096, nh=6, no=10)

In [241]:
nn.propagate(x)
print("output: ", nn.ao)
print("output shape: ", nn.ao.shape)

output:  [[1.01282876 0.8258037  2.10472678 ... 1.85213159 2.08352894 1.53097282]
 [1.01282876 0.8258037  2.10472678 ... 1.85213159 2.08352894 1.53097282]
 [1.01282876 0.8258037  2.10472678 ... 1.85213159 2.08352894 1.53097282]
 ...
 [1.01282876 0.8258037  2.10472678 ... 1.85213159 2.08352894 1.53097282]
 [1.01282876 0.8258037  2.10472678 ... 1.85213159 2.08352894 1.53097282]
 [1.01282876 0.8258037  2.10472678 ... 1.85213159 2.08352894 1.53097282]]
output shape:  (50000, 10)


In [None]:
nn.train(x, y, 2000)

iteration  0 :     loss:  14.2004261213919 ,    acc:  0.09818
iteration  100 :     loss:  3.2496876433363244 ,    acc:  0.1114
iteration  200 :     loss:  3.2496187490525235 ,    acc:  0.1114
iteration  300 :     loss:  3.249617556227717 ,    acc:  0.1114
iteration  400 :     loss:  3.2496175348931975 ,    acc:  0.1114
iteration  500 :     loss:  3.2496175344976774 ,    acc:  0.1114
iteration  600 :     loss:  3.2496175344902456 ,    acc:  0.1114
iteration  700 :     loss:  3.2496175344901053 ,    acc:  0.1114
iteration  800 :     loss:  3.249617534490102 ,    acc:  0.1114
iteration  900 :     loss:  3.2496175344901013 ,    acc:  0.1114
iteration  1000 :     loss:  3.249617534490102 ,    acc:  0.1114
iteration  1100 :     loss:  3.2496175344901017 ,    acc:  0.1114
iteration  1200 :     loss:  3.2496175344901026 ,    acc:  0.1114
iteration  1300 :     loss:  3.249617534490102 ,    acc:  0.1114
