In [1]:
"""
Activation Functions: 
1. Must be monotonic (increasing without mapping a value to more than one output)
2. Cannot change direction
3. Continuous and infinite in domain (cannot output missing values or NAN values)
4. Good activation functions are non-linear (they provide some sort of non-linearity so as to increase correlation)
5. Want the activation function to be effectively computable (must be fast and simple for CPU optimization)

"""

# Selective correlation - want one incoming signal to increase or decrease how correlated the neuron
# is to all other signals. 

"""
Example of Activation Functions: 
1. relu
2. sigmoid
3. tanh
4. softmax (attenuation)
5. raw dot product value (w/o limiting output values to 0-1 ~ think temperature data)
"""



'\nExample of Activation Functions: \n1. relu\n2. sigmoid\n3. tanh\n4. softmax (attenuation)\n5. raw dot product value (w/o limiting output values to 0-1 ~ think temperature data)\n'

In [20]:
# Start New Network

import numpy as np, sys, gzip, hashlib
from keras.datasets import mnist

(x_test, y_test), (x_train, y_train) = mnist.load_data()
print(len(x_test)) # 60000
print(len(x_train)) # 10000

60000
10000


In [38]:
train_filename = hashlib.md5("mnist_train".encode("utf-8")).hexdigest()
test_filename = hashlib.md5("mnist_test".encode("utf-8")).hexdigest()
def getdata():
    import os
    x_train = None
    y_train = None
    x_test = None
    y_test = None
    if not train_filename in os.listdir() or test_filename in os.listdir():
        (x_test, y_test), (x_train, y_train) = mnist.load_data()
        with open(train_filename, 'wb') as f:
            f.write(x_test)
            f.write(y_test)
        with open(test_filename, 'wb') as f: 
            f.write(x_train)
            f.write(y_train)
        return (x_test, y_test), (x_train, y_train)
    else: 
        with gzip.open(train_filename, 'rb') as f: 
            x_test = f.read(0, 60000)
            y_test = f.read(60001, 120000)
        with gzip.open(test_filename, 'rb') as f: 
            x_train = f.read(0, 10000)
            y_train = f.read(10001, 20000)
        return (x_test, y_test), (x_train, y_train)
            
(x_train, y_train), (x_test, y_test) = getdata()

In [42]:
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels): 
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test): 
    test_labels[i][l] = 1

# activation functions
def tanh(x):
    return np.tanh(x)
def tanh2deriv(output): 
    return 1 - (output ** 2) 
def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

alpha, iterations, hidden_size = (2, 300, 100)
pixels_per_image, num_labels = (784, 10)
batch_size = 100

weights_0_1 = .02 * np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations): 
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)): 
        batch_start, batch_end=((i * batch_size), ((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1*= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        
        for k in range(batch_size): 
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == \
                              np.argmax(labels[batch_start+k:batch_start+k+1]))
            layer_2_delta = (labels[batch_start:batch_end]-layer_2) / (np.argmax(labels[batch_start+k:batch_start+k+1]))
            layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
            layer_1_delta *= dropout_mask
            
            weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
        test_correct_cnt = 0
        
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = tanh(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
            
        if(j%10 == 0):
            sys.stdout.write("\n"+ "I: " + str(j) + \
                            " Test-Acc: " + str(test_correct_cnt/float(len(test_images))) +\
                            " Train-Acc: " + str(correct_cnt/float(len(images))))

  layer_2_delta = (labels[batch_start:batch_end]-layer_2) / (np.argmax(labels[batch_start+k:batch_start+k+1]))



I: 0 Test-Acc: 0.098 Train-Acc: 0.009
I: 0 Test-Acc: 0.098 Train-Acc: 0.017
I: 0 Test-Acc: 0.098 Train-Acc: 0.03
I: 0 Test-Acc: 0.098 Train-Acc: 0.035
I: 0 Test-Acc: 0.098 Train-Acc: 0.046
I: 0 Test-Acc: 0.098 Train-Acc: 0.054
I: 0 Test-Acc: 0.098 Train-Acc: 0.065
I: 0 Test-Acc: 0.098 Train-Acc: 0.075
I: 0 Test-Acc: 0.098 Train-Acc: 0.083
I: 0 Test-Acc: 0.098 Train-Acc: 0.093
I: 10 Test-Acc: 0.098 Train-Acc: 0.013
I: 10 Test-Acc: 0.098 Train-Acc: 0.021
I: 10 Test-Acc: 0.098 Train-Acc: 0.034
I: 10 Test-Acc: 0.098 Train-Acc: 0.039
I: 10 Test-Acc: 0.098 Train-Acc: 0.05
I: 10 Test-Acc: 0.098 Train-Acc: 0.058
I: 10 Test-Acc: 0.098 Train-Acc: 0.069
I: 10 Test-Acc: 0.098 Train-Acc: 0.079
I: 10 Test-Acc: 0.098 Train-Acc: 0.087
I: 10 Test-Acc: 0.098 Train-Acc: 0.097
I: 20 Test-Acc: 0.098 Train-Acc: 0.013
I: 20 Test-Acc: 0.098 Train-Acc: 0.021
I: 20 Test-Acc: 0.098 Train-Acc: 0.034
I: 20 Test-Acc: 0.098 Train-Acc: 0.039
I: 20 Test-Acc: 0.098 Train-Acc: 0.05
I: 20 Test-Acc: 0.098 Train-Acc: 0.05

KeyboardInterrupt: 