In [411]:
import pickle
import gzip
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
np.random.seed(666)

In [2]:
filename = '../mnist.pkl.gz'
f = gzip.open(filename, 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
f.close()

In [494]:
# Functions
def one_hot_vect(tuple_data,classes):
    one_hot_encoded=np.zeros((len(tuple_data[1]),len(classes)))
    identity = np.identity(len(classes))
    for i in range(len(tuple_data[1])):
        one_hot_encoded[i] = np.add(one_hot_encoded[i],identity[target_list[i]])
    return one_hot_encoded

def softmax(weights,train_data):
    bias = np.ones((np.shape(training_data[0])[0],1))
    train_withBias = np.hstack((train_data,bias))
    num = np.dot(weights,train_withBias.T)
    # High value Fix
    # https://houxianxu.github.io/2015/04/23/logistic-softmax-regression/
    num = np.subtract(num,np.max(num,axis=0))
    num = np.exp(num)
    deno = np.sum(num,axis=1)
    deno = deno.reshape((10,1))
    return np.divide(num,deno)

def cross_entropy_error(predicted,target):
    right = np.log(predicted)
    loss = np.multiply(np.matrix(target),np.transpose(np.matrix(right)))
    return -np.sum(np.sum(loss,axis=1),axis=0)

def batch_gradient(predicted,target):
    leftleft = np.transpose(predicted)
    left = np.subtract(leftleft,target)
    bias = np.ones((np.shape(training_data[0])[0],1))
    train_withBias = np.hstack((train_data,bias))
    gradient = np.dot(left.T,train_withBias)
    return np.matrix(gradient,dtype="float64")

def sgd_gradient(predicted,target):
    leftleft = np.transpose(predicted)
    left = np.subtract(leftleft,target)
    bias = np.ones((np.shape(training_data[0])[0],1))
    train_withBias = np.hstack((train_data,bias))
    gradient = np.dot(left.T,train_withBias)
    return np.matrix(gradient,dtype="float64")

def accuracy(predicted,target):
    correct = 0
    left = np.array(np.argmax(predicted,axis=0)).flatten()
    right = np.argmax(target,axis=1)
    confusion_mat = np.zeros((len(np.unique(right)),len(np.unique(left))))
    for i in range(len(target)):
        if(left[i] == right[i]):
            correct+=1
        confusion_mat[left[i]][right[i]] =confusion_mat[left[i]][right[i]] +1
    return correct/len(target),pd.DataFrame(np.matrix(confusion_mat,dtype="int32"))

def more_metrics(conf_mat):
    true_positives = 0
    precision = []
    recall = []
    for i in range(len(conf_mat)):
        true_positives += conf_mat.iloc[i,i]
    conf_mat = np.matrix(conf_mat)
    tp_fp = np.array(np.sum(conf_mat,axis=1)).ravel()
    relevant_elements = np.array(np.sum(conf_mat,axis=0)).ravel()
    for i in range(len(conf_mat)):
        precision.append(conf_mat[i,i]/tp_fp[i])
        recall.append(conf_mat[i,i]/relevant_elements[i])
    return true_positives,precision,recall

In [567]:
# Fixed Var
classes = np.unique(training_data[1])
weights = np.random.randn(len(classes), np.shape(training_data[0])[1]+1) * 0.001

In [483]:
target = one_hot_vect(training_data,classes)
scaler = StandardScaler()
scaler.fit(train_data)
processed_train_data = scaler.transform(train_data)

# -540929.479912691 Initial-

540976.0224725967

In [568]:
while(True):
    new_weights = batch_gradient(predicted,target)
    weights = weights - 0.0000003*new_weights
    predicted = softmax(weights,processed_train_data)
    cost = np.asscalar(cross_entropy_error(predicted,target))
    print(cost)

538953.8469144147
536918.9606629665
534938.2319579476
533009.0592003565
531129.6796080701
529298.4962249042
527514.1891057533
525775.4442822334
524081.02618871175
522429.7674005875
520820.57040731644
519252.39689343044
517724.2154190202
516235.04346925934
514783.92840347404
513369.9473793323
511992.21467241406
510649.871839129
509342.0906079751
508068.06015401654
506826.993138233
505618.1135976995
504440.68235187454
503293.94081014115
502177.20372857753
501089.78687093745
500031.00152124686
499000.1978125571
497996.7239492451
497019.93550446414
496069.2214215338
495143.9690727727
494243.5866933285
493367.513862431
492515.1821477375
491686.03862437856
490879.5353145892
490095.1373946265
489332.330797081
488590.61463392916
487869.4894716072
487168.4651932763
486487.07501842163
485824.8505043809
485181.33732829586
484556.0941462795
483948.6858806375
483358.69354577287
482785.6977306854
482229.29607132537
481689.09381056984
481164.70712756185
480655.7579304982
480161.8805740652
479682.7168

KeyboardInterrupt: 

In [676]:
acc,conf_mat  = accuracy(predicted,target)

In [680]:
true_positives,precision,recall = more_metrics(conf_mat)

In [681]:
true_positives

38512

In [682]:
precision

[0.700718993409227,
 0.633575647432985,
 0.8794258373205741,
 0.7223564393235458,
 0.8376198779424586,
 0.8367807446408424,
 0.8551165146909828,
 0.7990696010019681,
 0.8604579207920792,
 0.7982810920121335]

In [683]:
recall

[0.948499594484996,
 0.982388164846777,
 0.7399355877616747,
 0.762007449519702,
 0.7909034780819099,
 0.493786063027075,
 0.8523530599878812,
 0.8629951690821256,
 0.5743494423791822,
 0.6331194867682438]