In [107]:
import pickle
import gzip
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
from PIL import Image
import os
import numpy as np
np.random.seed(666)

In [108]:
filename = '../mnist.pkl.gz'
f = gzip.open(filename, 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
f.close()

In [109]:
# Functions
def one_hot_vect(tuple_data,classes):
    one_hot_encoded=np.zeros((len(tuple_data[1]),len(classes)))
    identity = np.identity(len(classes))
    for i in range(len(tuple_data[1])):
        one_hot_encoded[i] = np.add(one_hot_encoded[i],identity[tuple_data[1][i]])
    return one_hot_encoded

def softmax(weights,train_data):
    bias = np.ones((np.shape(train_data)[0],1))
    train_withBias = np.hstack((train_data,bias))
    num = np.dot(weights,train_withBias.T)
    # High value Fix
    # https://houxianxu.github.io/2015/04/23/logistic-softmax-regression/
    num = np.subtract(num,np.max(num,axis=0))
    num = np.exp(num)
    # Fix softmax when using batch size 1 the dimension of deno changes
    if(len(train_data) == 1):
        deno = np.sum(num,axis=0)
    else:
        deno = np.sum(num,axis=1)
        deno = deno.reshape((10,1))
    return np.divide(num,deno)

def cross_entropy_error(predicted,target):
    right = np.log(predicted+ 1/10**30)
    loss = np.multiply(np.matrix(target),np.transpose(np.matrix(right)))
    return -np.sum(np.sum(loss,axis=1),axis=0)

def batch_gradient(predicted,train_data,target):
    leftleft = np.transpose(predicted)
    left = np.subtract(leftleft,target)
    bias = np.ones((np.shape(train_data)[0],1))
    train_withBias = np.hstack((train_data,bias))
    gradient = np.dot(left.T,train_withBias)
    gradient = gradient/len(train_data)
    return np.matrix(gradient,dtype="float64")

def accuracy(predicted,target):
    correct = 0
    left = np.array(np.argmax(predicted,axis=0)).flatten()
    right = np.argmax(target,axis=1)
    confusion_mat = np.zeros((len(np.unique(right)),len(np.unique(left))))
    for i in range(len(target)):
        if(left[i] == right[i]):
            correct+=1
        confusion_mat[left[i]][right[i]] =confusion_mat[left[i]][right[i]] +1
    return correct/len(target),pd.DataFrame(np.matrix(confusion_mat,dtype="int32"))

def more_metrics(conf_mat):
    true_positives = 0
    precision = []
    recall = []
    for i in range(len(conf_mat)):
        true_positives += conf_mat.iloc[i,i]
    conf_mat = np.matrix(conf_mat)
    tp_fp = np.array(np.sum(conf_mat,axis=1)).ravel()
    relevant_elements = np.array(np.sum(conf_mat,axis=0)).ravel()
    for i in range(len(conf_mat)):
        precision.append(conf_mat[i,i]/tp_fp[i])
        recall.append(conf_mat[i,i]/relevant_elements[i])
    return true_positives,precision,recall

def epoch_shuffle(processed_train_data,target):
    # Merge
    trainDF = pd.DataFrame(processed_train_data)
    targetDF = pd.DataFrame(target)
    result = pd.concat([trainDF.reset_index(),targetDF.reset_index()],join='inner',axis=1)
    result = result.sample(frac=1)
    
    return np.asarray(result.iloc[:,1:np.shape(trainDF)[1]+1]),np.asarray(result.iloc[:,np.shape(trainDF)[1]+2:])

In [110]:
# Fixed Var
classes = np.unique(training_data[1])
weights = np.random.randn(len(classes), np.shape(training_data[0])[1]+1) * 0.001
train_data = training_data[0]

In [117]:
target = one_hot_vect(training_data,classes)
target_val = one_hot_vect(validation_data,classes)
target_test = one_hot_vect(test_data,classes)
scaler = StandardScaler()
scaler.fit(train_data)
scaler.fit(validation_data[0])
scaler.fit(test_data[0])
processed_train_data = scaler.transform(train_data)
processed_val_data = scaler.transform(validation_data[0])
processed_test_data = scaler.transform(test_data[0])
predicted = softmax(weights,processed_train_data)

In [141]:
batchSize = 512
start = 0
end = batchSize
batches = len(processed_train_data)/batchSize
weights = np.random.randn(len(classes), np.shape(training_data[0])[1]+1) * 0.001
predicted = softmax(weights,processed_train_data[start:end,:])
cost_initial = np.asscalar(cross_entropy_error(softmax(weights,processed_train_data),target))
cost_new = cost_initial*10
training_loss =[]
val_loss=[]
i=1
logging=False
while(i<batches):
        print("Percentage Done: "+str(i/batches))
        new_weights = batch_gradient(predicted,processed_train_data[start:end,:],target[start:end,:])
        weights = weights - 0.0003*new_weights
        if(logging and i%1==0):
            # Predict Train & Validation for these weights
            predict_val = softmax(weights,processed_val_data)
            # Calculate cost of predicting Validation and Training, To see if the hyper param are working
            acc,_  = accuracy(predict_val,target_val)
            val_loss.append(acc)
            predict_train = softmax(weights,processed_train_data)
            acc,_  = accuracy(predict_train,target)
            training_loss.append(acc)
        #print("Training Loss: "+str(cost))
        start = start + batchSize
        end = end + batchSize
        # New Prediction for the data
        predicted = softmax(weights,processed_train_data[start:end,:])
        if(not logging):
            cost = np.asscalar(cross_entropy_error(predicted,target[start:end,:]))
            print("Cost of Batch: "+str(cost))
        i = i+1

Percentage Done: 0.01024
Cost of Batch: 3191.8758388915717
Percentage Done: 0.02048
Cost of Batch: 3189.7329937908717
Percentage Done: 0.03072
Cost of Batch: 3216.730936190191
Percentage Done: 0.04096
Cost of Batch: 3187.905868602589
Percentage Done: 0.0512
Cost of Batch: 3193.2544285841504
Percentage Done: 0.06144
Cost of Batch: 3207.8128106787976
Percentage Done: 0.07168
Cost of Batch: 3183.6146998499603
Percentage Done: 0.08192
Cost of Batch: 3184.2066259615417
Percentage Done: 0.09216
Cost of Batch: 3181.3453743648624
Percentage Done: 0.1024
Cost of Batch: 3181.297754155782
Percentage Done: 0.11264
Cost of Batch: 3178.1537108474968
Percentage Done: 0.12288
Cost of Batch: 3178.116291463516
Percentage Done: 0.13312
Cost of Batch: 3175.696013113741
Percentage Done: 0.14336
Cost of Batch: 3174.9520248395506
Percentage Done: 0.1536
Cost of Batch: 3172.84614041246
Percentage Done: 0.16384
Cost of Batch: 3175.2320345425405
Percentage Done: 0.17408
Cost of Batch: 3172.3612846517926
Percent

In [186]:
i = 0
batchSize = 1
start = 0
end = batchSize
batches = len(processed_train_data)/batchSize
weights = np.random.randn(len(classes), np.shape(training_data[0])[1]+1) * 0.001
predicted = softmax(weights,processed_train_data[start:end,:])
cost_initial = np.asscalar(cross_entropy_error(softmax(weights,processed_train_data),target))
cost_new = cost_initial*0.1
train_acc = 0
while(cost_initial-cost_new > 0.001):
    i = 0
    start = 0
    end = batchSize
    while(i<batches-1):
        #print("Percentage Done: "+str(i/batches))
        new_weights = batch_gradient(predicted,processed_train_data[start:end,:],target[start:end,:])
        weights = weights - 0.0003*new_weights
        if(logging and i%1==0):
            # Predict Train & Validation for these weights
            predict_val = softmax(weights,processed_val_data)
            # Calculate cost of predicting Validation and Training, To see if the hyper param are working
            acc,_  = accuracy(predict_val,target_val)
            val_loss.append(acc)
            predict_train = softmax(weights,processed_train_data)
            acc,_  = accuracy(predict_train,target)
            training_loss.append(acc)
        #print("Training Loss: "+str(cost))
        start = start + batchSize
        end = end + batchSize
        # New Prediction for the data
        predicted = softmax(weights,processed_train_data[start:end,:])
        i = i+1
    cost_initital = cost_new
    pred_train = softmax(weights,processed_train_data)
    cost_new = np.asscalar(cross_entropy_error(pred_train,target))
    predict_val = softmax(weights,processed_val_data)
    val_acc,_ = accuracy(predict_val,target_val)
    train_acc_old = train_acc
    train_acc,_ = accuracy(pred_train,target)
    if(float(train_acc) - float(train_acc_old) < 0.000001 ):
        # Early stopping where just cost is changing but accuracy is stuck
        break
    print("Cost of Batch: "+str(cost_new))
    print("Val Acc: "+str(val_acc))
    print("Train Acc: "+str(train_acc))
    processed_train_data,target = epoch_shuffle(processed_train_data,target)

Cost of Batch: 447774.5214433932
Val Acc: 0.9161
Train Acc: 0.90656
Cost of Batch: 444569.00375248585
Val Acc: 0.9192
Train Acc: 0.9149
Cost of Batch: 443695.60562662286
Val Acc: 0.9223
Train Acc: 0.91916
Cost of Batch: 443898.7765061805
Val Acc: 0.9242
Train Acc: 0.92132
Cost of Batch: 442905.73542048794
Val Acc: 0.9249
Train Acc: 0.92288
Cost of Batch: 442416.99567592907
Val Acc: 0.9252
Train Acc: 0.92366
Cost of Batch: 442133.4571315744
Val Acc: 0.9264
Train Acc: 0.925
Cost of Batch: 442312.30546869314
Val Acc: 0.9251
Train Acc: 0.9265
Cost of Batch: 441564.2105875264
Val Acc: 0.9268
Train Acc: 0.9269
Cost of Batch: 442136.2830677732
Val Acc: 0.9266
Train Acc: 0.92836
Cost of Batch: 441555.1654576626
Val Acc: 0.9266
Train Acc: 0.9287
Cost of Batch: 442098.0657845792
Val Acc: 0.9271
Train Acc: 0.92906
Cost of Batch: 442150.7130962244
Val Acc: 0.9264
Train Acc: 0.92926
Cost of Batch: 441485.9173708781
Val Acc: 0.9279
Train Acc: 0.9299
Cost of Batch: 441034.51911005325
Val Acc: 0.9288


In [8]:
df = pd.DataFrame(training_loss)
ax = df.plot(figsize=(10,15))
ax.ticklabel_format(useOffset=False)
plt.savefig('./train_loss.png',bbox_inches='tight')
plt.show(ax)

TypeError: Empty 'DataFrame': no numeric data to plot

In [9]:
df = pd.DataFrame(val_loss)
ax = df.plot(figsize=(10,15))
ax.ticklabel_format(useOffset=False)
plt.savefig('./val_loss.png',bbox_inches='tight')
plt.show(ax)

TypeError: Empty 'DataFrame': no numeric data to plot

In [167]:
predict_val = softmax(weights,processed_val_data)
accuracy(predict_val,target_val)

(0.9276,      0     1    2    3    4    5    6     7    8    9
 0  965     0    4    7    2   12    3     6    4    4
 1    0  1043   13    2   10    4    3     5   31    5
 2    6     4  901   16    2   11    8     6    8    3
 3    1     2   14  918    2   30    0     5   22    9
 4    3     1   10    1  930    6    6     7    2   28
 5    4     5    5   46    0  795    5     0   22    6
 6    6     0    7    4    6   26  939     0    7    1
 7    4     1   14    5    4    6    1  1033    9   31
 8    2     7   18   23    2   20    2     1  884    6
 9    0     1    4    8   25    5    0    27   20  868)

In [168]:
predict_test = softmax(weights,processed_test_data)
accuracy(predict_test,target_test)

(0.9244,      0     1    2    3    4    5    6    7    8    9
 0  960     0    9    3    2    7   11    2    6   11
 1    0  1106    8    1    4    3    4    9   11    6
 2    0     3  930   20    7    2    6   22    8    1
 3    1     2   12  916    1   31    1    6   17   11
 4    1     1    8    3  918    8    7    6   10   29
 5    5     1    4   26    0  783   14    1   27    6
 6    8     4   13    3    8   17  911    0   13    0
 7    2     2   10   11    4    7    2  950   14   25
 8    3    16   34   19    6   30    2    2  857    7
 9    0     0    4    8   32    4    0   30   11  913)

In [133]:
true_positives,precision,recall = more_metrics(conf_mat)

NameError: name 'conf_mat' is not defined

# USPS Data Test

In [135]:
USPSMat  = []
USPSTar  = []
curPath  = '../USPSdata/Numerals'
savedImg = []

for j in range(0,10):
    curFolderPath = curPath + '/' + str(j)
    imgs =  os.listdir(curFolderPath)
    for img in imgs:
        curImg = curFolderPath + '/' + img
        if curImg[-3:] == 'png':
            img = Image.open(curImg,'r')
            img = img.resize((28, 28))
            savedImg = img
            imgdata = (255-np.array(img.getdata()))/255
            USPSMat.append(imgdata)
            USPSTar.append(j)

In [136]:
target_USPS = one_hot_vect((1,USPSTar),classes)
scaler.fit(USPSMat)
processed_USPSDat = scaler.transform(USPSMat)


In [170]:
predict_USPS = softmax(weights,processed_USPSDat)
accuracy(predict_USPS,target_USPS)

(0.3375668783439172,      0    1     2    3    4    5    6    7    8    9
 0  523   74   105   81   34  143  118  137  222   33
 1   13  350    92   75   96   67   24  236   77  140
 2  146  149  1104  140   31  144  366   58   79   79
 3   57  238    97  911   23  129   77  385  252  402
 4  153  338    35   16  994   43   62   61  135  121
 5  135   99   127  399   56  894  299  105  339   31
 6  220   46   196   46   69  244  727   36  203   18
 7  231  220    29   42  170   74   50  353   84  381
 8  129  346   152  204  304  210   61  442  453  353
 9  393  140    62   86  223   52  216  187  156  442)

In [37]:
predict_USPS

matrix([[5.37503408e-06, 3.69834962e-07, 2.79342750e-06, ...,
         1.70547558e-05, 1.76974197e-07, 1.87009545e-04],
        [3.37966925e-06, 1.07965933e-07, 3.25091099e-07, ...,
         9.24967841e-05, 3.95829284e-07, 7.08793064e-06],
        [7.26833205e-06, 5.04313666e-07, 1.78261529e-06, ...,
         8.34814537e-05, 4.30276090e-07, 7.55579221e-06],
        ...,
        [2.62365956e-05, 2.03227730e-06, 2.22502579e-06, ...,
         2.48036457e-04, 1.45435313e-04, 1.30467584e-04],
        [3.92569968e-06, 1.89821420e-06, 8.30071770e-07, ...,
         2.59259738e-04, 1.39723249e-07, 2.70461446e-05],
        [7.28843744e-06, 3.17793972e-07, 5.65148371e-07, ...,
         1.14518824e-05, 3.49620059e-06, 5.05272703e-06]])