In [1]:
from keras.datasets import mnist
import numpy as np

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
print(x_train.shape)
print(x_train[0,:,:])

# standardize
new_x_train = x_train / 255

print(new_x_train.shape)

reshaped_x_train = np.reshape(new_x_train, (60000, 28*28))

(60000, 28, 28)
[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   3  18  18  18 126 136
  175  26 166 255 247 127   0   0   0   0]
 [  0   0   0   0   0   0   0   0  30  36  94 154 170 253 253 253 253 253
  225 172 253 242 195  64   0   0   0   0]
 [  0   0   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251
   93  82  82  56  39   0   0   0   0   0]
 [  0   0   0   0   0   0   0  18 219 253 253 25

In [24]:
print(y_train.shape)

reshaped_y_train = np.zeros((60000,10))
for (i, value) in enumerate(y_train):
    new_y_train[i, value] = 1

(60000,)


In [5]:
# weights_1 = np.random.normal(size=(28*28, 128), scale=1/np.sqrt(28*28))
# biases_1 = np.zeros((1, 128))

In [6]:
#def sigmoid(z):
#    return 1/(1+np.exp(-z))

In [7]:
#def pre_activations(X, W, b):
#    return np.dot(X,W) + b

In [8]:
#def activations(X, W, b):
#    return sigmoid(pre_activations(X, W, b))

In [9]:
#weights_2 = np.random.normal(size=(128, 10), scale=1/np.sqrt(128))
#biases_2 = np.zeros((1, 10))

In [10]:
#def soft_max(X, W, b):
#    z = pre_activations(X, W, b)
#    expz = np.exp(z)
#    sums = np.sum(expz, axis=0)
#    return expz / sums

In [11]:
#def forward_propogate(X):
#    hidden_1 = activations(X, weights_1, biases_1)
#    return soft_max(hidden_1, weights_2, biases_2)

In [22]:
class DenseLayer:

    def __init__(self, input_length, output_length):
        self.in_len = input_length
        self.out_len = output_length
        self.weights = np.random.normal(
            size=(self.in_len, self.out_len),
            scale=1/np.sqrt(self.in_len))
        self.biases = np.zeros((1, self.out_len))

    def soft_max(self, X):
        pre = self.pre_activations(X)
        exp_pre = np.exp(pre)
        sums = np.sum(exp_pre, axis=1).reshape((-1, 1))
        #import pdb
        #pdb.set_trace()
        #sums = np.dot(exp_pre, np.ones((10,1)))
        return exp_pre / sums

    def activations(self, X):
        pre = self.pre_activations(X)
        return self.__class__.sigmoid(pre)

    def pre_activations(self, X):
        return np.dot(X, self.weights) + self.biases

    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))


In [13]:
class Network:

    def __init__(self, size):
        self.layers = []
        self.size = size
        # add line to reshape if it isn't correct

    def add_layer(self, output_length):
        num_layers = len(self.layers)
        in_len = self.layers[num_layers - 1].out_len if num_layers > 0 else self.size
        self.layers.append(DenseLayer(in_len, output_length))

    def forward_propogate(self, x_train):
        result = x_train
        for (i, layer) in enumerate(self.layers):
            if (i == len(self.layers) - 1):
                return layer.soft_max(result)
            else:
                result = layer.activations(result)
    
    def calculate_errors(self, results, y_train):
        error = -np.log(result)
        return np.sum(error * y_train)

    def accuracy(self, results, y_train):
        predictions = np.argmax(results, axis=1)
        correct_answers = np.argmax(y_train, axis=1)
        return np.sum(predictions == correct_answers)

    def derivative_of_sm_cross_entropy(self, hidden, y_train, predictions):
        hidden_reshape = hidden.reshape((-1, hidden.shape[1], 1))
        hidden_repeat = np.repeat(hidden_reshape, y_train.shape[1], axis = 2)
        
        cross_ent_deriv = predictions - y_train
        cross_ent_deriv_reshape = cross_ent_deriv.reshape((-1, 1, y_train.shape[1]))
        cross_ent_deriv_repeat = np.repeat(cross_ent_deriv_reshape, hidden.shape[1], axis=1)
        return np.sum(hidden_repeat * cross_ent_deriv_repeat, axis=0)

    def backprop(self, layer_inputs, hidden_output_values, ce_deriv_with_r_h):
        # TODO: generalize for batches of inputs
        num_outputs = 128
        layer_inputs_reshape = layer_inputs.reshape((-1, 1))
        layer_inputs_repeat = np.repeat(layer_inputs_reshape, num_outputs, axis=1)

        deriv_h_with_r_z = hidden_output_values * (1 - hidden_output_values)

        ce_deriv_ce_with_respect_to_z = (ce_deriv_with_r_h * deriv_h_with_r_z).reshape((1, -1))
        return layer_inputs_repeat * ce_deriv_ce_with_respect_to_z

In [23]:
net = Network(reshaped_x_train, )
net.add_layer(128)
net.add_layer(10)

[[0.10682457 0.12497473 0.05442231 ... 0.05517881 0.17525571 0.13171869]
 [0.10624418 0.12453039 0.05428742 ... 0.05507768 0.17608975 0.13129448]
 [0.10611709 0.12526361 0.05423544 ... 0.05473271 0.17616214 0.13172078]
 ...
 [0.10694583 0.12493339 0.05392322 ... 0.05508292 0.17515664 0.13183203]
 [0.10617037 0.12481319 0.05401659 ... 0.05519563 0.17494856 0.13116307]
 [0.10587407 0.12462835 0.05431938 ... 0.05498042 0.17520167 0.13194164]]
1.0


In [27]:
np.sum(np.arange(60).reshape((3,4,5)), axis=0)

array([[ 60,  63,  66,  69,  72],
       [ 75,  78,  81,  84,  87],
       [ 90,  93,  96,  99, 102],
       [105, 108, 111, 114, 117]])