In [2]:
import pandas as pd
import numpy as np
import random

#Loading the Data
training_df = pd.read_csv('data/optdigits.tra', header=None)
X_training, y_training = training_df.loc[:, 0:63], training_df.loc[:, 64]
X_training.head()

testing_df = pd.read_csv('data/optdigits.tes', header=None)
X_testing, y_testing = testing_df.loc[:, 0:63], testing_df.loc[:, 64]

In [7]:
class Layer():
    
    ACTIVATION_FUNCS = {
        'none': lambda x: x,
        'relu': lambda x: max(0, x),
        'sigmoid': lambda x: 1 / (1 + np.exp(-x)),
        'round': np.around
    }
    ACTIVATION_FUNCS_DERIVATIVES = {
        'none': lambda x: 1,
        'relu': lambda x: 0 if x <= 0 else 1,
        'sigmoid': lambda x: ACTIVATION_FUNCS['sigmoid'](x) * (1 - ACTIVATION_FUNCS['sigmoid'](x)),
        'round': lambda x: 1
    }
    
    def __init__(self, no_of_neurons, activation_function):
        self.no_of_neurons = no_of_neurons
        self.activation_func = activation_function
        self.cached_outputs = np.zeros((self.no_of_neurons,))
        
    def init_weights_array(self, no_of_previous_layer_neurons):
        self.weights = 2 * np.random.rand(self.no_of_neurons, no_of_previous_layer_neurons) - 1
        self.biases = 2 * np.random.rand(self.no_of_neurons) - 1
        self.clear_errors()
        
    def clear_errors(self):
        self.gradients = np.zeros((self.no_of_neurons, self.weights.shape[1]))
        self.derivatives = np.zeros((self.no_of_neurons,))
        
    def get_outputs(self, previous_layer_outputs):
        outputs = np.zeros((self.no_of_neurons,))
        for neuron_idx in range(self.no_of_neurons):
            #print("W: ",self.weights[neuron_idx])
            #print("B: ",self.biases[neuron_idx])
            #print("Prevlayeroutputs: ", previous_layer_outputs)
            activation_input = np.dot(self.weights[neuron_idx], previous_layer_outputs) + self.biases[neuron_idx]
            neuron_output = Layer.ACTIVATION_FUNCS[self.activation_func](activation_input)
            #print("Output: ", neuron_output)
            outputs[neuron_idx] = neuron_output
        self.cached_outputs = outputs
        return outputs
        
    def accumulate_error(self, neuron_derivatives, previous_layer_outputs):
        prev_layer_derivatives = np.zeros((previous_layer_outputs.shape[0],))
        activation_func_derivatives = np.vectorize(Layer.ACTIVATION_FUNCS_DERIVATIVES[self.activation_func])(self.cached_outputs)
        for neuron_idx in range(self.no_of_neurons):
            activation_func_derivative = activation_func_derivatives[neuron_idx]
            gradient = activation_func_derivative * neuron_derivatives[neuron_idx] * previous_layer_outputs
            self.gradients[neuron_idx] += gradient
            
            for prev_layer_neuron_idx in range(previous_layer_outputs.shape[0]):
                self.derivatives[neuron_idx] += activation_func_derivative * neuron_derivatives[neuron_idx]
                prev_layer_derivatives[prev_layer_neuron_idx] += activation_func_derivative * self.weights[neuron_idx][prev_layer_neuron_idx] * neuron_derivatives[neuron_idx]
        
        return prev_layer_derivatives
            
class InputLayer(Layer):
    def __init__(self, input_size):
        super().__init__(input_size, "none")
        
    def init_weights_array(self):
        self.weights = np.ones((self.no_of_neurons,))
        self.biases = np.zeros((self.no_of_neurons,))
            
class NeuralNetwork:
    
    ERROR_FUNCTIONS = {
        "mse": lambda y, yhat: 0.5 * (y - yhat) ** 2
    }
    ERROR_FUNCTIONS_DERIVATIVES = {
        "mse": lambda y, yhat: yhat - y
    }
    
    def __init__(self, layers, error_function="mse"):
        self.layers = layers
        self.error_function = error_function
        self.layers[0].init_weights_array()
        for i in range(1, len(layers)):
            self.layers[i].init_weights_array(self.layers[i - 1].no_of_neurons)
            
    def predict(self, X):
        last_outputs = X
        self.layers[0].cached_outputs = X
        for i in range(1, len(self.layers)):
            last_outputs = self.layers[i].get_outputs(last_outputs)
        return last_outputs
    
    def train(self, X, y, learning_rate = 1e-06, batch_size=1):
        iter_no = 0
        for idx in range(len(X)):
            iter_no += 1
            
            x_sample = X[idx]
            y_sample = y[idx]
            yhat = self.predict(x_sample)
            derivatives = Network.ERROR_FUNCTIONS_DERIVATIVES[self.error_function](y_sample, yhat)
            for i in range(len(self.layers) - 1, 0, -1):
                derivatives = self.layers[i].accumulate_error(derivatives, self.layers[i - 1].cached_outputs)
                
            if iter_no == batch_size:
                for i in range(len(self.layers) - 1, 0, -1):
                    self.layers[i].weights -= learning_rate * self.layers[i].gradients
                    self.layers[i].biases -= learning_rate * self.layers[i].derivatives
                    self.layers[i].clear_errors()
                iter_no = 0

In [10]:
EPOCH_COUNT = 1

autoencoder = NeuralNetwork([InputLayer(64), Layer(16, "none"), Layer(16, "none"), Layer(64, "round")])
for i in range(EPOCH_COUNT):
    autoencoder.train(X_training.values, X_training.values)

In [11]:
print(X_training.values[1] - autoencoder.predict(X_training.values[1]))

[ 3.  0.  3.  4. -4.  5.  5. -1. -5.  4. 13. -7.  8. -2.  1. -3.  0.  7.
  7. -5.  5.  7.  4.  6.  3. 10.  1.  3. -1.  8.  9.  6. -1.  9. 10.  3.
  2.  7. 13. -2. -1.  6.  8. -4. -5. 12.  6.  0.  1.  3. 10.  0.  2.  6.
  4. -4. -4.  4.  7.  3.  7. -5. -2. -2.]
