In [2]:
# Neural Network Components

import numpy as np
import random as rd
import pickle as pk

class NeuralNet:
    def __init__(self, size_in, size_out, hidden, rate = 0.001, w_decay = 0, av = None, loss = None):
        self.x = np.zeros((size_in, 1), dtype=np.float64)
        self.y = np.zeros((size_out, 1), dtype=np.float64)
        self.weight = []
        self.weight_ = []
        self.bias = []
        self.bias_ = []
        self.z = []
        self.activations = []
        
        idx = 0
        self.layer = len(hidden) + 1
        n = self.layer - 1
        self.weight.append(np.random.rand(hidden[idx], size_in) * np.sqrt(2 / size_in))
        self.weight_.append(np.zeros((hidden[idx], size_in), dtype=np.float64))
        self.bias.append(np.random.rand(hidden[idx], 1))
        self.bias_.append(np.zeros((hidden[idx], 1), dtype=np.float64))
        self.activations.append(np.zeros((hidden[idx], 1), dtype=np.float64))
        self.z.append(np.zeros((hidden[idx], 1), dtype=np.float64))
        idx += 1
        
        while idx < n:
            self.weight.append(np.random.rand(hidden[idx], hidden[idx - 1]) * np.sqrt(2 / hidden[idx - 1]))
            self.weight_.append(np.zeros((hidden[idx], hidden[idx - 1]), dtype=np.float64))
            self.bias.append(np.random.rand(hidden[idx], 1))
            self.bias_.append(np.zeros((hidden[idx], 1), dtype=np.float64))
            self.z.append(np.zeros((hidden[idx], 1), dtype=np.float64))
            self.activations.append(np.zeros((hidden[idx], 1), dtype=np.float64))
            idx += 1
        
        self.weight.append(np.random.rand(size_out, hidden[idx - 1]) * np.sqrt(2 / hidden[idx - 1]))
        self.weight_.append(np.zeros((size_out, hidden[idx - 1]), dtype=np.float64))
        self.bias.append(np.random.rand(size_out, 1))
        self.bias_.append(np.zeros((size_out, 1), dtype=np.float64))
        self.activations.append(np.zeros((size_out, 1), dtype=np.float64))
        self.z.append(np.zeros((size_out, 1), dtype=np.float64))
        
        self.rate = rate
        self.w_decay = w_decay
        
        if av != None:
            if(hasattr(av[0], '__call__') and  hasattr(av[1], '__call__')):
                self.activate = av[0]
                self.activate_ = av[1]
                
        if loss != None:
            if(hasattr(loss[0], '__call__') and  hasattr(loss[1], '__call__')):
                self.cost = loss[0]
                self.cost_ = loss[1]
    
    def activate(self, x):
        return (1 - np.exp(-(x * 2))) / (1 + np.exp(-(x * 2)))
    
    def activate_(self, x):
        return 1 - np.square(self.activate(x))   
    
    def cost(self, y):
        return (self.y - y) ** 2
    
    def cost_(self, y):
        return (self.y - y) * 2
    
    def feed(self, x):
        self.x[:] = x.reshape((x.shape[0], 1))
        idx = 0
        n = self.layer - 1
        self.z[idx] = self.weight[idx].dot(self.x) + self.bias[idx]
        self.activations[idx] = self.activate(self.z[idx])
        idx += 1
        
        while idx < n:
            self.z[idx] = self.weight[idx].dot(self.activations[idx - 1]) + self.bias[idx]
            self.activations[idx] = self.activate(self.z[idx])
            idx += 1
        
        self.z[idx] = self.weight[idx].dot(self.activations[idx - 1]) + self.bias[idx]
        self.y = self.activate(self.z[idx])
        
    def propagate(self, y):
        y = y.reshape((y.shape[0], 1))
        idx = self.layer - 1
        i_ = self.activate_(self.z[idx]) * self.cost_(y)
        self.weight_[idx] = i_.dot(self.activations[idx - 1].T)
        self.bias_[idx] = i_
        c_ = self.weight[idx].T.dot(i_)
        idx -= 1
        
        while idx > 0:
            i_ = self.activate_(self.z[idx]) * c_
            self.weight_[idx] = i_.dot(self.activations[idx - 1].T)
            self.bias_[idx] = i_
            c_ = self.weight[idx].T.dot(i_)
            idx -= 1
        
        i_ = self.activate_(self.z[idx]) * c_
        self.weight_[idx] = i_.dot(self.x.T)
        self.bias_[idx] = i_
        
        
        while idx < self.layer:
            self.weight_[idx] += self.weight[idx] * self.w_decay
            w, b = self.update(self.weight_[idx], self.bias_[idx], idx)
            self.weight[idx] -= w
            self.bias[idx] -= b
            idx += 1
    
    def update(self, w_, b_, idx):
        return self.rate * w_, self.rate * b_
    
    def heetal_w(self, cur, prev, com):
        return np.random.randn(com, cur) * np.sqrt(2 / prev)
           
    def result(self):
        return self.y
    
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    return (2 / (1 + np.exp(-2 * x))) - 1

def tanh_(x):
    return 1 - np.square(tanh(x))

def relu(x, a = 0.01):
    return x * (x > 0)

def relu_(x, a = 0.01):
    return 1 * (x > 0)
    
def soe(dif):
    return np.square(dif)

def soe_(dif):
    return 2 * dif

class AdaDelta:
    def __init__(self, ann, arbitary = 1e-25, moment = 0.9):
        self.epsilon = arbitary
        self.moment = moment
        self.rate = ann.rate
        self.E = [[x for x in self.genE(ann)], [x for x in self.genE(ann, is_weight=False)]]
        self.delta = [[x for x in self.genE(ann)], [x for x in self.genE(ann, is_weight=False)]]
        ann.update = self
        
    def evaluate(self, w_, b_, idx):
        self.E[0][idx] = self.moment * self.E[0][idx] + (1 - self.moment) * (w_ ** 2)
        delta = self.rate * w_
        delta /= np.sqrt(self.E[0][idx] + self.epsilon)
        old_delta = self.delta[0][idx]
        self.delta[0][idx] = self.moment * self.delta[0][idx] + (1 - self.moment) * (delta ** 2)
        
        self.E[1][idx] = self.moment * self.E[1][idx] + (1 - self.moment) * (b_ ** 2)
        delta = self.rate * b_
        delta /= np.sqrt(self.E[1][idx] + self.epsilon)
        old_delta_b = self.delta[1][idx]
        self.delta[1][idx] = self.moment * self.delta[1][idx] + (1 - self.moment) * (delta ** 2)
        
        w_rate = np.sqrt(old_delta + self.epsilon)
        w_rate /= np.sqrt(self.E[0][idx] + self.epsilon)
        
        b_rate = np.sqrt(old_delta_b + self.epsilon)
        b_rate /= np.sqrt(self.E[1][idx] + self.epsilon)
        
        return w_rate, b_rate
        
    def __call__(self, w_, b_, idx):
        w_rate, b_rate = self.evaluate(w_, b_, idx)
        return w_rate * w_, b_rate * b_
            
    def genE(self, ann, is_weight = True):
        l = ann.layer
        idx = 0
        while idx < l:
            yield np.zeros(ann.weight_[idx].shape if is_weight else ann.bias_[idx].shape, dtype=np.float64)
            idx += 1

In [3]:
# Load Mnist Handwritten digits

import pickle
import gzip

with gzip.open('tmp/mnist.pkl.gz', 'rb') as file:
    train_set, valid_set, test_set = pickle.load(file, encoding='iso-8859-1')
train_x, train_y = train_set
valid_x, valid_y = valid_set
test_x, test_y = test_set

In [4]:
def train(ann, x, y, n = 1):
    i = 0
    epoch = 1
    out = np.zeros(10)
    plot_size = (x.shape[0] * n) + 1
    costs = np.zeros(plot_size)
    w_ = np.zeros(plot_size)
    b_ = np.zeros(plot_size)
    while i < n:
        idx = x.shape[0] - 1
        while idx > -1:
            ann.feed(x[idx])
            out[y[idx]] = 1
            costs[epoch] = ann.cost(out).sum()
            ann.propagate(out)
            w_[epoch] = ann.weight_[2][0][0]
            b_[epoch] = ann.bias_[2][0][0]
            out[y[idx]] = 0
            idx -= 1
            epoch += 1
        i += 1
    return (w_, b_, costs)

def validate(ann, x, y, y_con = lambda x: True):
    cost = 0
    idx = 0
    count = 0
    out = np.zeros(10)
    
    while idx < y.size - 1:
        if y_con(y[idx]):
            ann.feed(x[idx])
            out[y[idx]] = 1
            cost += ann.cost(out.reshape((out.shape[0], 1))).sum()
            out[y[idx]] = 0
            count += 1
        idx += 1
    
    return None if count == 0 else 1 - cost / count

In [24]:
ann_sigmoid = NeuralNet(784, 10, [25, 25], rate = 0.001, av = (sigmoid, sigmoid_))
AdaDelta(ann_sigmoid, arbitary=1e-20)
dW, dB, costs_sigmoid = train(ann_sigmoid, train_x, train_y)

In [53]:
train(ann_sigmoid, valid_x, valid_y)
validate(ann_sigmoid, test_x, test_y)

0.8381517601524926

In [224]:
ann_sigmoid = NeuralNet(784, 10, [25, 25], rate=0.1, av = sigmoid, av_ = sigmoid_)
dW, dB, costs_sigmoid = train(ann_sigmoid, train_x, train_y)

  "matplotlib is currently using a non-GUI backend, "


In [11]:
import matplotlib
matplotlib.use("TkAgg")
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2TkAgg
from matplotlib.figure import Figure
import matplotlib.pyplot as plot
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import ScalarFormatter, FormatStrFormatter

class FixedOrderFormatter(ScalarFormatter):
    """Formats axis ticks using scientific notation with a constant order of 
    magnitude"""
    def __init__(self, order_of_mag=0, useOffset=True, useMathText=False):
        self._order_of_mag = order_of_mag
        ScalarFormatter.__init__(self, useOffset=useOffset, 
                                 useMathText=useMathText)
    def _set_orderOfMagnitude(self, range):
        """Over-riding this to avoid having orderOfMagnitude reset elsewhere"""
        self.orderOfMagnitude = self._order_of_mag


import tkinter as tk
from tkinter import ttk


fig = plot.figure()
fig.tight_layout(True)
ax = fig.add_subplot(111, projection='3d')

rpr_dW = dW[:60]
rpr_dB = dB[:60]
rpr_costs = costs_sigmoid[:60]

for k in range(0, rpr_dW.size, 1):
    ax.plot(rpr_dW[:k], rpr_dB[:k], rpr_costs[:k])
    
    #ax.set_xlim3d(-0.4, 0.4)
    #ax.set_ylim3d(-0.4, 0.4)
    #ax.set_zlim3d(0, 100)
    ax.xaxis.set_label_text('dW')
    ax.yaxis.set_label_text('dB')
    ax.zaxis.set_label_text('Costs')
    ax.xaxis.label.set_fontsize(12)
    ax.yaxis.label.set_fontsize(12)
    ax.zaxis.label.set_fontsize(12)
    ax.xaxis.set_major_formatter(FixedOrderFormatter(6))
    ax.yaxis.set_major_formatter(FixedOrderFormatter(3))
    ax.zaxis.set_major_formatter(FixedOrderFormatter(6))
    plot.draw()
    plot.pause(0.05)
    if k != rpr_dW.size - 1:
        ax.cla()

In [14]:
# Q1 sum = 88
# roll 0.21590909
# year 0.10227273
# attendance 0.14772727
# exam 0.15909091
# external 0.17045455
# record 0.20454545

# Q2 
# Act 0.4
# Int 0.111111111
# Ext 0.48888889

ann = NeuralNet(6, 3, [30, 30], av=relu, av_=relu_, rate=0.00000001, w_decay=1)
idx = 0
epoch = idx + 1
n = 1000000
dB = np.zeros(n + 1)
dW = np.zeros(n + 1)
costs = np.zeros(n + 1)
while idx < n:
    roll = rd.randint(0, 101)
    year = rd.randint(0, 101)
    attendance = rd.randint(0, 101)
    exam = rd.randint(0, 101)
    external = rd.randint(0, 101)
    record = rd.randint(0, 101)
    
    activities = 0.22 * roll + 0.10 * year + 0.15 * attendance + 0.16 * exam + 0.17 * external + 0.20 * record
    
    x = np.array([roll, year, attendance, exam, external, record])
    y = np.array([activities, exam, external])

    ann.feed(x)
    ann.propagate(y)
    
    dB[epoch] = ann.bias_[2][0][0]
    dW[epoch] = ann.weight_[2][0][0]
    costs[epoch] = ann.cost(y.reshape((y.shape[0], 1))).sum()
    
    idx += 1
    epoch += 1

In [5]:
a = np.zeros(100)
a.mean()

In [6]:
with open('neuro.pickle', 'wb') as f:
    pk.dump(ann, f)

In [7]:
with open('neuro.pickle', 'rb') as f:
    ann = pk.load(f)
