In [2]:
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plot
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
matplotlib.rcParams.update({'font.size': 14})

In [1]:
import numpy as np
# hidden : [neurons of each specific layer]
class NeuralNet:
    def __init__(self, size_in, size_out, hidden, rate = 0.001, w_decay = 0, av = None, loss = None):
        self.x = np.zeros((size_in, 1), dtype=np.float64)
        self.y = np.zeros((size_out, 1), dtype=np.float64)
        self.weight = []
        self.weight_ = []
        self.bias = []
        self.bias_ = []
        self.z = []
        self.activations = []
        
        idx = 0
        self.layer = len(hidden) + 1
        n = self.layer - 1
        self.weight.append(np.random.rand(hidden[idx], size_in) * np.sqrt(2 / size_in))
        self.weight_.append(np.zeros((hidden[idx], size_in), dtype=np.float64))
        self.bias.append(np.random.rand(hidden[idx], 1))
        self.bias_.append(np.zeros((hidden[idx], 1), dtype=np.float64))
        self.activations.append(np.zeros((hidden[idx], 1), dtype=np.float64))
        self.z.append(np.zeros((hidden[idx], 1), dtype=np.float64))
        idx += 1
        
        while idx < n:
            self.weight.append(np.random.rand(hidden[idx], hidden[idx - 1]) * np.sqrt(2 / hidden[idx - 1]))
            self.weight_.append(np.zeros((hidden[idx], hidden[idx - 1]), dtype=np.float64))
            self.bias.append(np.random.rand(hidden[idx], 1))
            self.bias_.append(np.zeros((hidden[idx], 1), dtype=np.float64))
            self.z.append(np.zeros((hidden[idx], 1), dtype=np.float64))
            self.activations.append(np.zeros((hidden[idx], 1), dtype=np.float64))
            idx += 1
        
        self.weight.append(np.random.rand(size_out, hidden[idx - 1]) * np.sqrt(2 / hidden[idx - 1]))
        self.weight_.append(np.zeros((size_out, hidden[idx - 1]), dtype=np.float64))
        self.bias.append(np.random.rand(size_out, 1))
        self.bias_.append(np.zeros((size_out, 1), dtype=np.float64))
        self.activations.append(np.zeros((size_out, 1), dtype=np.float64))
        self.z.append(np.zeros((size_out, 1), dtype=np.float64))
        
        self.rate = rate
        self.w_decay = w_decay
        
        if av != None:
            if(hasattr(av[0], '__call__') and  hasattr(av[1], '__call__')):
                self.activate = av[0]
                self.activate_ = av[1]
                
        if loss != None:
            if(hasattr(loss[0], '__call__') and  hasattr(loss[1], '__call__')):
                self.cost = loss[0]
                self.cost_ = loss[1]
    
    def activate(self, x):
        return (1 - np.exp(-(x * 2))) / (1 + np.exp(-(x * 2)))
    
    def activate_(self, x):
        return 1 - np.square(self.activate(x))   
    
    def cost(self, y):
        return (self.y - y) ** 2
    
    def cost_(self, y):
        return (self.y - y) * 2
    
    def feed(self, x):
        self.x[:] = x.reshape((x.shape[0], 1))
        idx = 0
        n = self.layer - 1
        self.z[idx] = self.weight[idx].dot(self.x) + self.bias[idx]
        self.activations[idx] = self.activate(self.z[idx])
        idx += 1
        
        while idx < n:
            self.z[idx] = self.weight[idx].dot(self.activations[idx - 1]) + self.bias[idx]
            self.activations[idx] = self.activate(self.z[idx])
            idx += 1
        
        self.z[idx] = self.weight[idx].dot(self.activations[idx - 1]) + self.bias[idx]
        self.y = self.activate(self.z[idx])
        
    def propagate(self, y):
        y = y.reshape((y.shape[0], 1))
        idx = self.layer - 1
        i_ = self.activate_(self.z[idx]) * self.cost_(y)
        self.weight_[idx] = i_.dot(self.activations[idx - 1].T)
        self.bias_[idx] = i_
        c_ = self.weight[idx].T.dot(i_)
        idx -= 1
        
        while idx > 0:
            i_ = self.activate_(self.z[idx]) * c_
            self.weight_[idx] = i_.dot(self.activations[idx - 1].T)
            self.bias_[idx] = i_
            c_ = self.weight[idx].T.dot(i_)
            idx -= 1
        
        i_ = self.activate_(self.z[idx]) * c_
        self.weight_[idx] = i_.dot(self.x.T)
        self.bias_[idx] = i_
        
        
        while idx < self.layer:
            self.weight_[idx] += self.weight[idx] * self.w_decay
            w, b = self.update(self.weight_[idx], self.bias_[idx], idx)
            self.weight[idx] -= w
            self.bias[idx] -= b
            idx += 1
    
    def update(self, w_, b_, idx):
        return self.rate * w_, self.rate * b_
    
    def heetal_w(self, cur, prev, com):
        return np.random.randn(com, cur) * np.sqrt(2 / prev)
           
    def result(self):
        return self.y
    
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    return (2 / (1 + np.exp(-2 * x))) - 1

def tanh_(x):
    return 1 - np.square(tanh(x))

def relu(x, a = 0.01):
    return x * (x > 0)

def relu_(x, a = 0.01):
    return 1 * (x > 0)
    
def soe(dif):
    return np.square(dif)

def soe_(dif):
    return 2 * dif

class AdaDelta:
    def __init__(self, ann, arbitary = 1e-25, moment = 0.9):
        self.epsilon = arbitary
        self.moment = moment
        self.rate = ann.rate
        self.E = [[x for x in self.genParams(ann)], [x for x in self.genParams(ann, is_weight=False)]]
        self.delta = [[x for x in self.genParams(ann)], [x for x in self.genParams(ann, is_weight=False)]]
        ann.update = self
        
    def evaluate(self, w_, b_, idx):
        self.E[0][idx] = self.moment * self.E[0][idx] + (1 - self.moment) * (w_ ** 2)
        delta = self.rate * w_
        delta /= np.sqrt(self.E[0][idx] + self.epsilon)
        old_delta = self.delta[0][idx]
        self.delta[0][idx] = self.moment * self.delta[0][idx] + (1 - self.moment) * (delta ** 2)
        
        self.E[1][idx] = self.moment * self.E[1][idx] + (1 - self.moment) * (b_ ** 2)
        delta = self.rate * b_
        delta /= np.sqrt(self.E[1][idx] + self.epsilon)
        old_delta_b = self.delta[1][idx]
        self.delta[1][idx] = self.moment * self.delta[1][idx] + (1 - self.moment) * (delta ** 2)
        
        w_rate = np.sqrt(old_delta + self.epsilon)
        w_rate /= np.sqrt(self.E[0][idx] + self.epsilon)
        
        b_rate = np.sqrt(old_delta_b + self.epsilon)
        b_rate /= np.sqrt(self.E[1][idx] + self.epsilon)
        
        return w_rate, b_rate
        
    def __call__(self, w_, b_, idx):
        w_rate, b_rate = self.evaluate(w_, b_, idx)
        return w_rate * w_, b_rate * b_
            
    def genParams(self, ann, is_weight = True):
        l = ann.layer
        idx = 0
        while idx < l:
            yield np.zeros(ann.weight_[idx].shape if is_weight else ann.bias_[idx].shape, dtype=np.float64)
            idx += 1
            
class Adam:
    def __init__(self, ann, arbitary = 1e-8, beta1 = 0.9, beta2 = 0.999):
        self.epsilon = arbitary
        self.beta1 = beta1
        self.beta2 = beta2
        self.rate = ann.rate
        self.M = [[x for x in self.genParams(ann)], [x for x in self.genParams(ann, is_weight=False)]]
        self.V = [[x for x in self.genParams(ann)], [x for x in self.genParams(ann, is_weight=False)]]
        ann.update = self
    
    def __call__(self, w_, b_, idx):
        self.M[0][idx] = self.beta1 * self.M[0][idx] + (1 - self.beta1) * w_
        self.V[0][idx] = self.beta2 * self.V[0][idx] + (1 - self.beta2) * (w_ ** 2)
        M_cup = self.M[0][idx] / (1 - self.beta1)
        V_cup = self.V[0][idx] / (1 - self.beta2)
        weight = self.rate * (M_cup / (np.sqrt(V_cup) + self.epsilon))
        
        self.M[1][idx] = self.beta1 * self.M[1][idx] + (1 - self.beta1) * b_
        self.V[1][idx] = self.beta2 * self.V[1][idx] + (1 - self.beta2) * (b_ ** 2)
        M_cup = self.M[1][idx] / (1 - self.beta1)
        V_cup = self.V[1][idx] / (1 - self.beta2)
        bias = self.rate * (M_cup / (np.sqrt(V_cup) + self.epsilon))
        
        return weight, bias
            
    def genParams(self, ann, is_weight = True):
        l = ann.layer
        idx = 0
        while idx < l:
            yield np.zeros(ann.weight_[idx].shape if is_weight else ann.bias_[idx].shape, dtype=np.float64)
            idx += 1

In [None]:
#https://medium.com/data-science-group-iitr/logistic-regression-simplified-9b4efe801389
#https://stackoverflow.com/questions/3985619/how-to-calculate-a-logistic-sigmoid-function-in-python

In [None]:
fig, (ax0, ax1) = plot.subplots(nrows=2, figsize=(12,9))

ax0.plot(x, sg, linewidth=3)
ax0.spines['bottom'].set_position('zero')
ax0.set_title('Sigmoid')

ax1.plot(x, sg_, linewidth=3)
ax1.spines['bottom'].set_position('zero')
ax1.set_title('dSigmoid')

for ax in (ax0, ax1):
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.get_xaxis().tick_bottom()
    ax.spines['left'].set_position('zero')
    ax.spines['left'].set_smart_bounds(True)
    ax.spines['bottom'].set_smart_bounds(True)
    ax.get_yaxis().tick_left()


In [None]:
fig, (ax0, ax1) = plot.subplots(nrows=2, figsize=(12,9))

ax0.plot(x, th, linewidth=3)
ax0.spines['bottom'].set_position('center')
ax0.set_title('Tanh')

ax1.plot(x, th_, linewidth=3)
ax1.spines['bottom'].set_position('zero')
ax1.set_title('dTanh')

for ax in (ax0, ax1):
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.get_xaxis().tick_bottom()
    ax.spines['left'].set_position('zero')
    ax.spines['left'].set_smart_bounds(True)
    ax.spines['bottom'].set_smart_bounds(True)
    ax.get_yaxis().tick_left()

In [None]:
fig, (ax0, ax1) = plot.subplots(nrows=2, figsize=(12,9))

ax0.plot(x, square, linewidth=3)
ax0.spines['bottom'].set_position('zero')
ax0.set_title('SoE')

ax1.plot(x, square_, linewidth=3)
ax1.spines['bottom'].set_position('zero')
ax1.set_title('dSoE')

for ax in (ax0, ax1):
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.get_xaxis().tick_bottom()
    ax.spines['left'].set_position('zero')
    ax.spines['left'].set_smart_bounds(True)
    ax.spines['bottom'].set_smart_bounds(True)
    ax.get_yaxis().tick_left()

In [None]:
fig, (ax0, ax1) = plot.subplots(nrows=2, figsize=(12,9))

ax0.plot(x, r, linewidth=3)
ax0.spines['bottom'].set_position('zero')
ax0.set_title('ReLU')

ax1.plot(x, r_, linewidth=3)
ax1.spines['bottom'].set_position('zero')
ax1.set_title('dReLU')

for ax in (ax0, ax1):
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.get_xaxis().tick_bottom()
    ax.spines['left'].set_position('zero')
    ax.spines['left'].set_smart_bounds(True)
    ax.spines['bottom'].set_smart_bounds(True)
    ax.get_yaxis().tick_left()

In [2]:
import pickle
import gzip

In [3]:
with gzip.open('tmp/mnist.pkl.gz', 'rb') as file:
    train_set, valid_set, test_set = pickle.load(file, encoding='iso-8859-1')
train_x, train_y = train_set
valid_x, valid_y = valid_set
test_x, test_y = test_set

In [6]:
out = np.zeros((10, 1))
n = 1 # Only one training? If not change it
plot_size = (train_x.shape[0] * n) + 1
xaxis = np.arange(0, plot_size, 1)

In [120]:
ann_sigmoid = NeuralNet(784, 10, [25, 25], av = sigmoid, av_ = sigmoid_)
AdaDelta(ann_sigmoid)
cost_sinh, w_sinh, b_sinh = train(ann_sigmoid, train_x, train_y)
validate(ann_sigmoid, valid_x, valid_y)
validate(ann_sigmoid, test_x, test_y)

0.8314049564319982

In [6]:
ann_tanh = NeuralNet(784, 10, [25, 25])
Adam(ann_tanh)
cost_tanh, w_tanh, b_tanh = train(ann_tanh, train_x, train_y)
validate(ann_tanh, valid_x, valid_y)
validate(ann_tanh, test_x, test_y)

NameError: name 'validate' is not defined

In [116]:
ann_relu = NeuralNet(784, 10, [25, 25], w_decay=0.0001, av = relu, av_ = relu_)
AdaDelta(ann_relu)
cost_relu, w_relu, b_relu = train(ann_relu, train_x, train_y)

In [8]:
# 68
#train(ann_relu, valid_x, valid_y)
validate(ann_tanh, test_x, test_y)

0.8629670169697969

In [5]:
def train(ann, x, y, n = 1):
    i = 0
    epoch = 1
    out = np.zeros(10)
    plot_size = (x.shape[0] * n) + 1
    costs = np.zeros(plot_size)
    w_ = np.zeros(plot_size)
    b_ = np.zeros(plot_size)
    while i < n:
        idx = x.shape[0] - 1
        while idx > -1:
            ann.feed(x[idx])
            out[y[idx]] = 1
            costs[epoch] = ann.cost(out).sum()
            ann.propagate(out)
            w_[epoch] = ann.weight_[2][0][0]
            b_[epoch] = ann.bias_[2][0][0]
            out[y[idx]] = 0
            idx -= 1
            epoch += 1
        i += 1
    return (w_, b_, costs)

In [7]:
def validate(ann, x, y):
    idx = x.shape[0] - 1
    sum = 0.0
    out = np.zeros((10, 1))
    while idx > -1:
        ann.feed(x[idx])
        out[y[idx]] = 1
        sum += ann.cost(out.reshape((out.shape[0], 1))).sum()
        out[y[idx]] = 0
        idx -=  1
    sum /= x.shape[0]
    return 1 - sum

In [41]:
idx = test_x.shape[0] - 1
sum = 0.0
while idx > -1:
    ann_sigmoid.feed(test_x[idx])
    y[test_y[idx]] = 1
    sum = sum + ann.cost(y).sum()
    y[test_y[idx]][0] = 0
    idx = idx - 1
sum = sum / test_x.shape[0]
print(1 - sum)

SyntaxError: invalid syntax (<ipython-input-41-463a7c81299a>, line 5)

In [None]:
fig, ax0 = plot.subplots(nrows=1, figsize=(12, 9))
ax0.plot(xaxis[::100], cost_sinh[::100], linewidth=1, label='sinh with 0.1 Learning Rate')
ax0.plot(xaxis[::100], cost_tanh[::100], linewidth=1, label='tanh with 0.1 Learning Rate')
ax0.plot(xaxis[::100], cost_relu[::100], linewidth=1, label='relu with 0.0001 Learning Rate')
ax0.spines['bottom'].set_position('zero')
ax0.set_title('Traning Costs')
ax0.legend()

ax0.spines['top'].set_visible(False)
ax0.spines['right'].set_visible(False)
ax0.get_xaxis().tick_bottom()
ax0.spines['left'].set_smart_bounds(True)
ax0.spines['left'].set_position('zero')
ax0.spines['bottom'].set_smart_bounds(True)
ax0.get_yaxis().tick_left()

In [118]:
import numpy as np
import matplotlib
matplotlib.use("TkAgg")
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2TkAgg
from matplotlib.figure import Figure
import matplotlib.pyplot as plot
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import ScalarFormatter, FormatStrFormatter

class FixedOrderFormatter(ScalarFormatter):
    """Formats axis ticks using scientific notation with a constant order of 
    magnitude"""
    def __init__(self, order_of_mag=0, useOffset=True, useMathText=False):
        self._order_of_mag = order_of_mag
        ScalarFormatter.__init__(self, useOffset=useOffset, 
                                 useMathText=useMathText)
    def _set_orderOfMagnitude(self, range):
        """Over-riding this to avoid having orderOfMagnitude reset elsewhere"""
        self.orderOfMagnitude = self._order_of_mag


import tkinter as tk
from tkinter import ttk



fig = plot.figure()
ax = fig.add_subplot(111, projection='3d')

rpr_dW = w_sinh[:60]
rpr_dB = b_sinh[:60]
rpr_costs = cost_sinh[:60]

for k in range(0, rpr_dW.size, 1):
    ax.plot(rpr_dW[:k], rpr_dB[:k], rpr_costs[:k])
    
    #ax.set_xlim3d(-0.4, 0.4)
    #ax.set_ylim3d(-0.4, 0.4)
    #ax.set_zlim3d(0, 100)
    ax.xaxis.set_label_text('dW')
    ax.yaxis.set_label_text('dB')
    ax.zaxis.set_label_text('Costs')
    ax.xaxis.label.set_fontsize(12)
    ax.yaxis.label.set_fontsize(12)
    ax.zaxis.label.set_fontsize(12)
    ax.xaxis.set_major_formatter(FixedOrderFormatter(0))
    ax.yaxis.set_major_formatter(FixedOrderFormatter(0))
    ax.zaxis.set_major_formatter(FixedOrderFormatter(0))
    plot.draw()
    plot.pause(0.05)
    if k != rpr_dW.size - 1:
        ax.cla()

In [None]:
fig = plot.figure(figsize=(12, 9))
ax = fig.gca(projection = '3d')
ax.set_title('Gradient Plane')
#ax.plot(w_tanh[::100], b_tanh[::100], np.vstack((cost_relu[::100], xaxis[::100])), label = 'Tanh')
#ax.plot(w_sinh[::100], b_sinh[::100], np.vstack((cost_relu[::100], xaxis[::100])), label = 'Sinh')
ax.plot_wireframe(w_relu[::100], b_relu[::100], np.vstack((cost_relu[::100], xaxis[::100])), label = 'Relu')
ax.legend()

In [None]:
# roll, year, rc, semester, external, prev_performance
# activities, internal, external

activities = roll + academic_progress + attendance + semester + external + prev_performance
internal = roll + acdaemic_progress + attendance + semester + external + prev_performance
external = roll + academic_progress + attendance + semester + external + prev_performance

In [17]:
nero = NeuralNet(6, 3, [25, 25], rate=0.01, w_decay= 0.1, av= relu, av_=relu_)
nero.weight_[0].shape

(25, 6)