### Softmax Implementation formulas from https://www.ics.uci.edu/~pjsadows/notes.pdf

In this implementation we use a bias node instead of the + b in the linear equation

In [1]:
import numpy as np
import pandas as pd

### Downloading Datasets

In [2]:
from keras.datasets import mnist

(train_images_original, train_labels_original), (test_images_original, test_labels_original) = mnist.load_data()

Using TensorFlow backend.


In [3]:
train_images_original.shape

(60000, 28, 28)

In [4]:
m_train = train_images_original.shape[0]
m_test = test_images_original.shape[0]
pixels = train_images_original.shape[1] * train_images_original.shape[2]

#### Flattening Image 

In [5]:
#I made the top left to always activate as a bias node
train_images = train_images_original.reshape((60000, 28 * 28))
train_images[:,0] = 255
train_images = train_images.astype('float32') / 255

test_images = test_images_original.reshape((10000, 28 * 28))
test_images[:,0] = 255
test_images = test_images.astype('float32') / 255

In [6]:
train_images[:,][0].shape

(784,)

In [7]:
import matplotlib.pyplot as plt
plt.imshow(train_images[:,][5].reshape(28,28)), 


(<matplotlib.image.AxesImage at 0x7faf836189b0>,)

#### One hot encoding

In [8]:
from keras.utils import to_categorical

train_labels = to_categorical(train_labels_original)
test_labels = to_categorical(test_labels_original)

train_labels = train_labels.reshape(60000, 10)
test_labels = test_labels.reshape(10000, 10)


In [9]:
train_labels.shape

(60000, 10)

In [10]:
train_labels[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [11]:
def generateWeights(k, n):
    return np.zeros(n*k).reshape(k,n)

def generateRandomVector(k,n):
    return np.random.rand(n*k).reshape(k,n)

In [12]:
generateWeights(10, 784).shape, generateRandomVector(10,784)[0].shape

((10, 784), (784,))

In [13]:
def backPropSM(X,y,W,target,batch_size):
    m = X.shape[0]
    ds = y - target
    dw = np.dot(ds.T, X)
    
    return dw/m

In [14]:
#z is a vector of n size = 10 (number of nodes in the last layer represents (0-9 activation nodes))
def softmax(s):
    y = np.exp(s) / np.sum(np.exp(s))
    #print("{}".format(y.sum()))
    return y

In [15]:
def forwardPropSM(X, W, batch_size):
    m = X.shape[0]
    n = X.shape[1]
    y = []
    #print("{} {}".format(w.shape, X.shape))

    for i in range(0,m):
        s = (np.dot(W,X[i]))
        a = softmax(s)
        #print("{}".format(a.sum()))
        #print("{}".format(a.shape))
        y.append(a)
        
    
    #a should be (10,m)
    return y

In [16]:
#y = forwardPropSM2(test_images,trainedWeights, test_images.shape[0])
#y[0]

In [17]:
#Does not work 
def forwardPropSM2(X, W, batch_size):
    m = batch_size
    n = X.shape[1] 
    #print("{} {}".format(w.shape, X.shape))
    s = (np.dot(X,W.T))
    y = softmax(s)
  
    #a should be (10,m)
    return y

In [18]:
def train_mini_batch_sm(epochs, lr, batch_size, X, t):
    n = train_images.shape[1]
    m = train_images.shape[0]
    weights = generateWeights(10, 784)
    for epoch in range(epochs):
        shuffled_indices = np.random.permutation(m)
        train_images_shuffled = X[shuffled_indices]
        train_labels_shuffled = t[shuffled_indices]
        for e in range(0,m,batch_size):    
            xi = train_images_shuffled[e:e+batch_size]
            ti = train_labels_shuffled[e:e+batch_size]

            y = forwardPropSM(xi,weights,batch_size)
            dw = backPropSM(xi,y,weights,ti,batch_size)
            weights = weights - lr * dw  
        print("Finished Epoch {}".format(epoch))
            
    return weights

In [27]:
%%time
epochs = 5
lr = .5
batch_size = 500
trainedWeights = train_mini_batch_sm(epochs,lr,batch_size,train_images,train_labels)

Finished Epoch 0
Finished Epoch 1
Finished Epoch 2
Finished Epoch 3
Finished Epoch 4
CPU times: user 8.35 s, sys: 8.92 s, total: 17.3 s
Wall time: 4.64 s


In [28]:
trainedWeights.shape

(10, 784)

In [29]:
import pandas as pd

In [30]:
predicted = forwardPropSM(test_images,trainedWeights, test_images.shape[0])
df = pd.DataFrame(predicted)
df = df.T
pred_labels = df.idxmax()
score = (pred_labels.values == test_labels_original).sum()
print("Test Score {}/{} : Accuracy {}%".format(score, test_labels_original.shape[0], 100*score/test_labels_original.shape[0]))


Test Score 9174/10000 : Accuracy 91.74%


In [31]:
predicted = forwardPropSM(train_images,trainedWeights, train_images.shape[0])
df = pd.DataFrame(predicted)
df = df.T
pred_labels = df.idxmax()
score = (pred_labels.values == train_labels_original).sum()
print("Train Score {}/{} : Accuracy {}%".format(score, train_labels_original.shape[0], 100*score/train_labels_original.shape[0]))


Train Score 54917/60000 : Accuracy 91.52833333333334%


In [32]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59990,59991,59992,59993,59994,59995,59996,59997,59998,59999
0,0.004448,0.9993364,0.000788,1.8e-05,1e-05,0.000579,7.319772e-09,9.207924e-05,2e-06,1e-05,...,0.000118,0.002113,0.000827,0.022238,0.000219,4e-05,0.0001298837,0.000168,0.038036,0.061263
1,5.2e-05,4.944697e-09,0.000221,0.961175,0.000292,2e-06,0.9955506,1.313064e-07,0.985461,1e-06,...,1.7e-05,4.8e-05,0.000317,0.000438,0.913071,0.000333,1.325476e-05,4.5e-05,3e-05,0.000119
2,0.004651,3.058382e-05,0.020079,0.014383,3.4e-05,0.978692,0.00022122,0.0008668177,0.000441,0.000152,...,4.4e-05,0.963386,0.00041,0.006254,0.027442,0.001232,0.0006016804,5e-06,0.00962,0.014074
3,0.32102,4.144978e-05,0.076153,0.002722,0.000105,0.00188,0.003372199,0.9937346,0.00555,7e-06,...,0.000321,5.8e-05,0.005912,0.012329,0.01076,0.016261,0.9872609,0.005741,0.00172,0.001917
4,6e-06,2.574033e-08,0.856416,0.000116,0.089956,2.3e-05,5.852591e-06,1.966798e-06,7e-05,0.997429,...,0.006779,0.003138,0.007376,0.003809,0.003588,4.5e-05,4.54163e-06,0.001193,0.011439,0.000201
5,0.654395,0.000498117,0.001005,0.00025,0.000369,0.000727,4.363409e-05,0.0002465957,0.001371,0.000518,...,0.000891,0.000537,0.010477,0.931789,0.004962,0.002592,0.000860886,0.971741,0.039328,0.027765
6,0.000895,8.384048e-06,0.004836,0.000175,0.000107,4.5e-05,2.551157e-05,8.13947e-08,0.00029,0.001039,...,0.000101,0.023125,0.000168,0.019818,0.001076,8e-06,3.138084e-07,3e-05,0.896493,0.000973
7,0.006455,1.310902e-05,0.006454,0.000581,0.028644,0.000321,1.598086e-05,3.819584e-06,0.000631,2.7e-05,...,0.021462,8e-06,0.18099,0.000319,0.005733,4e-06,8.40199e-07,0.000418,0.001155,0.002204
8,0.006833,6.64404e-05,0.005087,0.020472,0.012618,0.01009,0.0006948281,0.004766916,0.004814,0.000633,...,0.003733,0.007538,0.010793,0.002446,0.031894,0.979336,0.01105796,0.017239,0.000801,0.813045
9,0.001244,5.517397e-06,0.028962,0.000108,0.867866,0.00764,7.012063e-05,0.0002869941,0.00137,0.000183,...,0.966535,4.9e-05,0.782731,0.000561,0.001256,0.000147,6.969083e-05,0.003421,0.001377,0.07844
