### Softmax Implementation formulas from https://www.ics.uci.edu/~pjsadows/notes.pdf

In [1]:
import numpy as np
import pandas as pd

### Downloading Datasets

In [2]:
from keras.datasets import mnist

(train_images_original, train_labels_original), (test_images_original, test_labels_original) = mnist.load_data()

Using TensorFlow backend.


In [3]:
train_images_original.shape

(60000, 28, 28)

In [4]:
m_train = train_images_original.shape[0]
m_test = test_images_original.shape[0]
pixels = train_images_original.shape[1] * train_images_original.shape[2]

#### Flattening Image 

In [5]:
#I made the top left to always activate as a bias node
train_images = train_images_original.reshape((60000, 28 * 28))
train_images[:,0] = 255
train_images = train_images.astype('float32') / 255

test_images = test_images_original.reshape((10000, 28 * 28))
test_images[:,0] = 255
test_images = test_images.astype('float32') / 255

train_images.shape, test_images.shape

((60000, 784), (10000, 784))

In [6]:
train_images[:,][0].shape

(784,)

In [7]:
import matplotlib.pyplot as plt
plt.imshow(train_images[:,][5].reshape(28,28)), 


(<matplotlib.image.AxesImage at 0x7fb8e8b2c240>,)

#### One hot encoding

In [8]:
from keras.utils import to_categorical

train_labels = to_categorical(train_labels_original)
test_labels = to_categorical(test_labels_original)

train_labels = train_labels.reshape(60000, 10)
test_labels = test_labels.reshape(10000, 10)


In [9]:
train_labels.shape

(60000, 10)

In [10]:
train_labels[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [11]:
def generateWeights(k, n):
    return np.zeros(n*k).reshape(k,n)

def generateRandomVector(k,n):
    return np.random.rand(n*k).reshape(k,n)

In [12]:
generateWeights(10, 784).shape, generateRandomVector(10,784)[0].shape

((10, 784), (784,))

In [13]:
def backPropSM(X,y,W,target,batch_size):
    m = X.shape[0]
    ds = y - target
    dw = np.dot(ds.T, X)
    
    return dw/m

In [14]:
#z is a vector of n size = 10 (number of nodes in the last layer represents (0-9 activation nodes))
def softmax(s):
    y = np.exp(s) / np.sum(np.exp(s))
    #print("{}".format(y.sum()))
    return y

In [15]:
def forwardPropSM(X, W, batch_size):
    m = X.shape[0]
    n = X.shape[1]
    y = []
    #print("{} {}".format(w.shape, X.shape))

    for i in range(0,m):
        s = (np.dot(W,X[i]))
        a = softmax(s)
        #print("{}".format(a.sum()))
        #print("{}".format(a.shape))
        y.append(a)
        
    
    #a should be (10,m)
    return y

In [27]:
y = forwardPropSM(test_images,trainedWeights, test_images.shape[0])
y[0]

array([1.18034029e-04, 2.16724433e-07, 1.37261743e-04, 1.64065114e-03,
       1.60910086e-05, 4.65887056e-05, 6.66842905e-07, 9.96620387e-01,
       8.45614925e-05, 1.33554109e-03])

In [28]:
y = forwardPropSM2(test_images,trainedWeights, test_images.shape[0])
y.shape

(10000, 10)

In [17]:
#Does not work 
def forwardPropSM2(X, W, batch_size):
    m = X.shape[0]
    n = X.shape[1] 
    #print("{} {}".format(w.shape, X.shape))
    s = (np.dot(X,W.T))
    y = softmax(s)
  
    #a should be (10,m)
    return y

In [18]:
def train_mini_batch_sm(epochs, lr, batch_size, X, t):
    n = train_images.shape[1]
    m = train_images.shape[0]
    weights = generateWeights(10, 784)
    for epoch in range(epochs):
        shuffled_indices = np.random.permutation(m)
        train_images_shuffled = X[shuffled_indices]
        train_labels_shuffled = t[shuffled_indices]
        for e in range(0,m,batch_size):    
            xi = train_images_shuffled[e:e+batch_size]
            ti = train_labels_shuffled[e:e+batch_size]

            y = forwardPropSM(xi,weights,batch_size)
            dw = backPropSM(xi,y,weights,ti,batch_size)
            weights = weights - lr * dw  
        print("Finished Epoch {}".format(epoch))
            
    return weights

In [19]:
%%time
epochs = 5
lr = .5
batch_size = 1000
trainedWeights = train_mini_batch_sm(epochs,lr,batch_size,train_images,train_labels)

Finished Epoch 0
Finished Epoch 1
Finished Epoch 2
Finished Epoch 3
Finished Epoch 4
CPU times: user 8.55 s, sys: 9.59 s, total: 18.1 s
Wall time: 4.95 s


In [20]:
trainedWeights.shape

(10, 784)

In [21]:
import pandas as pd

In [22]:
predicted = forwardPropSM(test_images,trainedWeights, test_images.shape[0])
df = pd.DataFrame(predicted)
df = df.T
pred_labels = df.idxmax()
score = (pred_labels.values == test_labels_original).sum()
print("Test Score {}/{} : Accuracy {}%".format(score, test_labels_original.shape[0], 100*score/test_labels_original.shape[0]))


Test Score 9132/10000 : Accuracy 91.32%


In [23]:
predicted = forwardPropSM(train_images,trainedWeights, train_images.shape[0])
df = pd.DataFrame(predicted)
df = df.T
pred_labels = df.idxmax()
score = (pred_labels.values == train_labels_original).sum()
print("Train Score {}/{} : Accuracy {}%".format(score, train_labels_original.shape[0], 100*score/train_labels_original.shape[0]))


Train Score 54501/60000 : Accuracy 90.835%


In [24]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59990,59991,59992,59993,59994,59995,59996,59997,59998,59999
0,0.010014,0.9988999,0.001779,4.5e-05,3.2e-05,0.001898,1.943534e-07,0.0002741466,1.2e-05,3.3e-05,...,0.000447,0.003881,0.002182,0.032598,0.000477,0.000198,0.000373,0.000576,0.060565,0.103369
1,0.000142,2.618404e-08,0.000308,0.965264,0.000396,1.4e-05,0.991673,6.368489e-07,0.980208,6e-06,...,6.7e-05,0.000196,0.000751,0.000914,0.904961,0.000977,3.3e-05,9.1e-05,6.3e-05,0.000468
2,0.00649,3.171954e-05,0.015358,0.013459,5.9e-05,0.945987,0.0007061545,0.001164669,0.000789,0.000247,...,0.000122,0.940578,0.00098,0.006958,0.025761,0.003492,0.000939,1.2e-05,0.008964,0.021367
3,0.364404,6.583809e-05,0.066808,0.003127,0.000144,0.003537,0.00576568,0.9937401,0.006634,2.3e-05,...,0.00075,0.000111,0.008924,0.017688,0.013183,0.023104,0.988126,0.010583,0.002208,0.004164
4,2.5e-05,1.052311e-07,0.849866,0.000259,0.089523,0.000199,3.724713e-05,1.361137e-05,0.000202,0.995381,...,0.011085,0.006672,0.010639,0.007624,0.005487,0.000309,2e-05,0.002171,0.013101,0.000827
5,0.591585,0.0008923738,0.00185,0.000427,0.000468,0.001603,0.0001841746,0.0005755171,0.002182,0.001167,...,0.002045,0.000794,0.014925,0.901494,0.006353,0.005337,0.001338,0.951846,0.040878,0.030804
6,0.002281,1.940262e-05,0.007329,0.000277,0.000156,0.000295,0.0001243403,5.658792e-07,0.000621,0.001586,...,0.000355,0.039821,0.000489,0.026984,0.001679,4.4e-05,1e-06,0.000103,0.869403,0.003868
7,0.014655,2.39157e-05,0.005375,0.000949,0.024836,0.001716,9.044583e-05,1.448506e-05,0.00129,0.000109,...,0.027058,4.1e-05,0.19642,0.000741,0.006639,2.9e-05,4e-06,0.001313,0.001473,0.00557
8,0.007873,5.763014e-05,0.005284,0.016016,0.005624,0.009617,0.001130937,0.003687259,0.005597,0.000788,...,0.004301,0.007718,0.012302,0.003374,0.033564,0.966088,0.009057,0.024024,0.000686,0.683846
9,0.00253,9.052882e-06,0.046044,0.000178,0.878762,0.035135,0.0002878214,0.0005290552,0.002465,0.00066,...,0.95377,0.000188,0.752388,0.001624,0.001897,0.000421,0.00011,0.009282,0.002659,0.145716
