### Softmax Implementation formulas from https://www.ics.uci.edu/~pjsadows/notes.pdf

In [1]:
import numpy as np
import pandas as pd

### Downloading Datasets

In [2]:
from keras.datasets import mnist

(train_images_original, train_labels_original), (test_images_original, test_labels_original) = mnist.load_data()

Using TensorFlow backend.


In [3]:
train_images_original.shape

(60000, 28, 28)

In [4]:
m_train = train_images_original.shape[0]
m_test = test_images_original.shape[0]
pixels = train_images_original.shape[1] * train_images_original.shape[2]

#### Flattening Image 

In [5]:
#I made the top left to always activate as a bias node
train_images = train_images_original.reshape((60000, 28 * 28))
train_images[:,0] = 255
train_images = train_images.astype('float32') / 255

test_images = test_images_original.reshape((10000, 28 * 28))
test_images[:,0] = 255
test_images = test_images.astype('float32') / 255

train_images.shape, test_images.shape

((60000, 784), (10000, 784))

In [6]:
train_images[:,][0].shape

(784,)

In [7]:
import matplotlib.pyplot as plt
plt.imshow(train_images[:,][5].reshape(28,28)), 


(<matplotlib.image.AxesImage at 0x7f9e9196b278>,)

#### One hot encoding

In [8]:
from keras.utils import to_categorical

train_labels = to_categorical(train_labels_original)
test_labels = to_categorical(test_labels_original)

train_labels = train_labels.reshape(60000, 10)
test_labels = test_labels.reshape(10000, 10)


In [9]:
train_labels.shape

(60000, 10)

In [10]:
train_labels[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [11]:
def generateWeights(k, n):
    return np.zeros(n*k).reshape(k,n)

def generateRandomVector(k,n):
    return np.random.rand(n*k).reshape(k,n)

In [12]:
generateWeights(10, 784).shape, generateRandomVector(10,784)[0].shape

((10, 784), (784,))

In [13]:
def backPropSM(X,y,W,target,batch_size):
    m = X.shape[0]
    ds = y - target
    dw = np.dot(ds.T, X)
    
    return dw/m

In [14]:
#z is a vector of n size = 10 (number of nodes in the last layer represents (0-9 activation nodes))
def softmax(s):
    y = np.exp(s) / np.sum(np.exp(s))
    #print("{}".format(y.sum()))
    return y

In [15]:
def forwardPropSM(X, W, batch_size):
    m = X.shape[0]
    n = X.shape[1]
    y = []
    #print("{} {}".format(w.shape, X.shape))

    for i in range(0,m):
        s = (np.dot(W,X[i]))
        a = softmax(s)
        #print("{}".format(a.sum()))
        #print("{}".format(a.shape))
        y.append(a)
        
    
    #a should be (10,m)
    return y

In [16]:
#y = forwardPropSM2(test_images,trainedWeights, test_images.shape[0])
#y[0]

In [17]:
#Does not work 
def forwardPropSM2(X, W, batch_size):
    m = batch_size
    n = X.shape[1] 
    #print("{} {}".format(w.shape, X.shape))
    s = (np.dot(X,W.T))
    y = softmax(s)
  
    #a should be (10,m)
    return y

In [18]:
def train_mini_batch_sm(epochs, lr, batch_size, X, t):
    n = train_images.shape[1]
    m = train_images.shape[0]
    weights = generateWeights(10, 784)
    for epoch in range(epochs):
        shuffled_indices = np.random.permutation(m)
        train_images_shuffled = X[shuffled_indices]
        train_labels_shuffled = t[shuffled_indices]
        for e in range(0,m,batch_size):    
            xi = train_images_shuffled[e:e+batch_size]
            ti = train_labels_shuffled[e:e+batch_size]

            y = forwardPropSM(xi,weights,batch_size)
            dw = backPropSM(xi,y,weights,ti,batch_size)
            weights = weights - lr * dw  
        print("Finished Epoch {}".format(epoch))
            
    return weights

In [25]:
%%time
epochs = 5
lr = .5
batch_size = 1000
trainedWeights = train_mini_batch_sm(epochs,lr,batch_size,train_images,train_labels)

Finished Epoch 0
Finished Epoch 1
Finished Epoch 2
Finished Epoch 3
Finished Epoch 4
CPU times: user 8.39 s, sys: 8.79 s, total: 17.2 s
Wall time: 4.74 s


In [26]:
trainedWeights.shape

(10, 784)

In [27]:
import pandas as pd

In [28]:
predicted = forwardPropSM(test_images,trainedWeights, test_images.shape[0])
df = pd.DataFrame(predicted)
df = df.T
pred_labels = df.idxmax()
score = (pred_labels.values == test_labels_original).sum()
print("Test Score {}/{} : Accuracy {}%".format(score, test_labels_original.shape[0], 100*score/test_labels_original.shape[0]))


Test Score 9134/10000 : Accuracy 91.34%


In [29]:
predicted = forwardPropSM(train_images,trainedWeights, train_images.shape[0])
df = pd.DataFrame(predicted)
df = df.T
pred_labels = df.idxmax()
score = (pred_labels.values == train_labels_original).sum()
print("Train Score {}/{} : Accuracy {}%".format(score, train_labels_original.shape[0], 100*score/train_labels_original.shape[0]))


Train Score 54545/60000 : Accuracy 90.90833333333333%


In [30]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59990,59991,59992,59993,59994,59995,59996,59997,59998,59999
0,0.00873,0.9986898,0.001785,5.7e-05,3.2e-05,0.002092,2.32176e-07,0.0003338088,1.3e-05,3.5e-05,...,0.000438,0.004447,0.002154,0.029413,0.000544,0.00018,0.000433,0.000478,0.062785,0.095876
1,0.000102,2.272894e-08,0.000294,0.955608,0.000342,1.3e-05,0.9899067,6.120227e-07,0.977362,5e-06,...,5.5e-05,0.000195,0.000633,0.000754,0.892461,0.000751,3.1e-05,6.6e-05,5.9e-05,0.000368
2,0.005758,2.877895e-05,0.013992,0.016804,6.1e-05,0.937359,0.0009196881,0.001291724,0.000951,0.000239,...,0.000111,0.934518,0.000904,0.005859,0.029176,0.002984,0.00096,1e-05,0.008674,0.018153
3,0.278689,5.956469e-05,0.061808,0.00372,0.000147,0.003648,0.006877198,0.9915969,0.007551,2.3e-05,...,0.000666,0.000118,0.008115,0.015096,0.014552,0.019429,0.984571,0.007681,0.002157,0.003533
4,2e-05,1.027733e-07,0.853724,0.0003,0.085347,0.000205,4.23053e-05,1.490356e-05,0.000216,0.994921,...,0.009913,0.007521,0.009777,0.006559,0.005929,0.000269,2.2e-05,0.001636,0.013033,0.0007
5,0.684332,0.001101584,0.002065,0.000602,0.000537,0.002076,0.0002528087,0.001006384,0.002707,0.001408,...,0.00228,0.000992,0.016688,0.913064,0.007648,0.005936,0.002082,0.960638,0.046577,0.030989
6,0.001924,1.978341e-05,0.007075,0.000335,0.000154,0.000302,0.0001480202,6.570255e-07,0.000697,0.001566,...,0.000321,0.042141,0.000452,0.02379,0.001879,3.9e-05,2e-06,8.2e-05,0.861628,0.003361
7,0.010802,2.285043e-05,0.005357,0.001031,0.023195,0.001658,0.0001010974,1.44512e-05,0.001376,0.000106,...,0.02366,4.5e-05,0.171202,0.000639,0.006963,2.5e-05,4e-06,0.000998,0.001471,0.004592
8,0.007267,6.732273e-05,0.005721,0.021316,0.006317,0.011969,0.001411439,0.00504751,0.006354,0.000958,...,0.004611,0.009804,0.013039,0.003309,0.038705,0.96999,0.011761,0.020504,0.000789,0.699875
9,0.002375,1.018333e-05,0.048179,0.000227,0.883868,0.040678,0.0003405305,0.0006930477,0.002773,0.000739,...,0.957944,0.000219,0.777034,0.001517,0.002144,0.000398,0.000135,0.007906,0.002825,0.142553
