### Softmax Implementation formulas from https://www.ics.uci.edu/~pjsadows/notes.pdf

In [1]:
import numpy as np
import pandas as pd

### Downloading Datasets

In [2]:
from keras.datasets import mnist

(train_images_original, train_labels_original), (test_images_original, test_labels_original) = mnist.load_data()

Using TensorFlow backend.


In [3]:
train_images_original.shape

(60000, 28, 28)

In [4]:
m_train = train_images_original.shape[0]
m_test = test_images_original.shape[0]
pixels = train_images_original.shape[1] * train_images_original.shape[2]

#### Flattening Image 

In [5]:
#I made the top left to always activate as a bias node
train_images = train_images_original.reshape((60000, 28 * 28))
train_images[:,0] = 255
train_images = train_images.astype('float32') / 255

test_images = test_images_original.reshape((10000, 28 * 28))
test_images[:,0] = 255
test_images = test_images.astype('float32') / 255

train_images.shape, test_images.shape

((60000, 784), (10000, 784))

In [6]:
train_images[:,][0].shape

(784,)

In [7]:
import matplotlib.pyplot as plt
plt.imshow(train_images[:,][5].reshape(28,28)), 


(<matplotlib.image.AxesImage at 0x7f42b6dd72e8>,)

#### One hot encoding

In [8]:
from keras.utils import to_categorical

train_labels = to_categorical(train_labels_original)
test_labels = to_categorical(test_labels_original)

train_labels = train_labels.reshape(60000, 10)
test_labels = test_labels.reshape(10000, 10)


In [9]:
train_labels.shape

(60000, 10)

In [10]:
train_labels[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [11]:
def generateWeights(k, n):
    return np.zeros(n*k).reshape(k,n)

def generateRandomVector(k,n):
    return np.random.rand(n*k).reshape(k,n)

In [12]:
generateWeights(10, 784).shape, generateRandomVector(10,784)[0].shape

((10, 784), (784,))

In [13]:
def backPropSM(X,y,W,target,batch_size):
    m = X.shape[0]
    ds = y - target
    dw = np.dot(ds.T, X)
    
    return dw/m

In [14]:
#z is a vector of n size = 10 (number of nodes in the last layer represents (0-9 activation nodes))
def softmax(s):
    y = np.exp(s) / np.sum(np.exp(s))
    #print("{}".format(y.sum()))
    return y

In [15]:
def forwardPropSM(X, W, batch_size):
    m = X.shape[0]
    n = X.shape[1]
    y = []
    #print("{} {}".format(w.shape, X.shape))

    for i in range(0,m):
        s = (np.dot(W,X[i]))
        a = softmax(s)
        #print("{}".format(a.sum()))
        #print("{}".format(a.shape))
        y.append(a)
        
    
    #a should be (10,m)
    return y

y = forwardPropSM(test_images,trainedWeights, test_images.shape[0])
y[0]

y = forwardPropSM2(test_images,trainedWeights, test_images.shape[0])
#test_images.shape, trainedWeights.shape
y[0].sum()

In [16]:
#Does not work 
def forwardPropSM2(X, W, batch_size):
    m = X.shape[0]
    n = X.shape[1] 
    #print("{} {}".format(w.shape, X.shape))
    s = (np.dot(W, X.T))
    y = softmax(s)
  
    #a should be (10,m)
    return y

In [17]:
def train_mini_batch_sm(epochs, lr, batch_size, X, t):
    n = train_images.shape[1]
    m = train_images.shape[0]
    weights = generateWeights(10, 784)
    for epoch in range(epochs):
        shuffled_indices = np.random.permutation(m)
        train_images_shuffled = X[shuffled_indices]
        train_labels_shuffled = t[shuffled_indices]
        for e in range(0,m,batch_size):    
            xi = train_images_shuffled[e:e+batch_size]
            ti = train_labels_shuffled[e:e+batch_size]

            y = forwardPropSM(xi,weights,batch_size)
            dw = backPropSM(xi,y,weights,ti,batch_size)
            weights = weights - lr * dw  
        print("Finished Epoch {}".format(epoch))
            
    return weights

In [18]:
%%time
epochs = 5
lr = .5
batch_size = 1000
trainedWeights = train_mini_batch_sm(epochs,lr,batch_size,train_images,train_labels)

Finished Epoch 0
Finished Epoch 1
Finished Epoch 2
Finished Epoch 3
Finished Epoch 4
CPU times: user 8.16 s, sys: 8.39 s, total: 16.6 s
Wall time: 4.67 s


In [19]:
trainedWeights.shape

(10, 784)

In [20]:
import pandas as pd

In [21]:
predicted = forwardPropSM(test_images,trainedWeights, test_images.shape[0])
df = pd.DataFrame(predicted)
df = df.T
pred_labels = df.idxmax()
score = (pred_labels.values == test_labels_original).sum()
print("Test Score {}/{} : Accuracy {}%".format(score, test_labels_original.shape[0], 100*score/test_labels_original.shape[0]))


Test Score 9142/10000 : Accuracy 91.42%


In [22]:
predicted = forwardPropSM(train_images,trainedWeights, train_images.shape[0])
df = pd.DataFrame(predicted)
df = df.T
pred_labels = df.idxmax()
score = (pred_labels.values == train_labels_original).sum()
print("Train Score {}/{} : Accuracy {}%".format(score, train_labels_original.shape[0], 100*score/train_labels_original.shape[0]))


Train Score 54571/60000 : Accuracy 90.95166666666667%


In [23]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59990,59991,59992,59993,59994,59995,59996,59997,59998,59999
0,0.009365,0.9986837,0.001781,5.7e-05,3e-05,0.001855,2.32731e-07,0.0002983655,1.3e-05,3.4e-05,...,0.000408,0.003934,0.002068,0.030015,0.000544,0.00014,0.000396,0.000553,0.065346,0.084899
1,0.000106,2.200571e-08,0.000293,0.948561,0.000315,1.1e-05,0.9895645,5.155393e-07,0.976348,5e-06,...,5.2e-05,0.000169,0.0006,0.000751,0.884079,0.000554,2.7e-05,7.4e-05,6.1e-05,0.000316
2,0.006853,3.223959e-05,0.015409,0.017244,5.7e-05,0.938432,0.0009606558,0.001269143,0.000952,0.000259,...,0.000111,0.941714,0.000915,0.006601,0.029512,0.002342,0.000973,1.2e-05,0.010126,0.016478
3,0.322166,6.61456e-05,0.065642,0.004064,0.000144,0.003469,0.006886528,0.9907447,0.007617,2.3e-05,...,0.000691,0.000106,0.008532,0.016247,0.015261,0.015983,0.983441,0.01003,0.002351,0.003436
4,2.3e-05,1.077014e-07,0.846312,0.00033,0.084022,0.000201,4.489413e-05,1.393986e-05,0.000227,0.994972,...,0.009667,0.006845,0.009802,0.006898,0.006224,0.000216,2.1e-05,0.001931,0.014315,0.000673
5,0.636064,0.001093768,0.002023,0.000618,0.000515,0.001846,0.0002458788,0.0007911696,0.002611,0.001366,...,0.002065,0.000898,0.015525,0.910141,0.007852,0.004398,0.001734,0.948163,0.048442,0.027639
6,0.00189,1.846577e-05,0.006889,0.000338,0.000136,0.000258,0.0001387129,5.352259e-07,0.000676,0.001457,...,0.000288,0.036579,0.000425,0.023578,0.00186,2.8e-05,1e-06,8.7e-05,0.853958,0.002972
7,0.011257,2.128714e-05,0.005159,0.001085,0.020541,0.00144,9.834479e-05,1.251188e-05,0.001368,9.9e-05,...,0.021626,3.9e-05,0.164053,0.000646,0.007186,1.8e-05,4e-06,0.001083,0.001484,0.004078
8,0.009521,7.323498e-05,0.006072,0.027463,0.007388,0.013362,0.001699655,0.006198518,0.007336,0.001031,...,0.004816,0.00951,0.014392,0.003526,0.045269,0.976004,0.01327,0.02852,0.000844,0.72866
9,0.002756,1.104471e-05,0.05042,0.000239,0.886853,0.039125,0.0003606362,0.000670602,0.002852,0.000754,...,0.960277,0.000206,0.783689,0.001597,0.002212,0.000316,0.000132,0.009547,0.003073,0.13085
