In [50]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.callbacks import Callback
from keras.datasets import mnist
import numpy as np
import keras

import matplotlib.pyplot as plt
%matplotlib inline

In [51]:
(x_train,y_train),(x_test,y_test) = mnist.load_data()

In [52]:
# standardizing the data
X_train = x_train.astype('float32')/255
X_test = x_test.astype('float32')/255

In [53]:
# transforming to cateorical variables using one hot encoding
Y_train = to_categorical(y_train,10)
Y_test = to_categorical(y_test,10)

In [54]:
# flatten the data
X_train = np.reshape(X_train,(60000,784))
X_test = np.reshape(X_test,(10000,784))

In [55]:
#sigmoid activation function
nn_sigmoid = Sequential()
nn_sigmoid.add(Dense(700,input_dim=784,activation='sigmoid'))
nn_sigmoid.add(Dense(700,activation='sigmoid'))
nn_sigmoid.add(Dense(700,activation='sigmoid'))
nn_sigmoid.add(Dense(700,activation='sigmoid'))
nn_sigmoid.add(Dense(700,activation='sigmoid'))
nn_sigmoid.add(Dense(350,activation='sigmoid'))
nn_sigmoid.add(Dense(100,activation='sigmoid'))
nn_sigmoid.add(Dense(10,activation='softmax')) # 10 neurons because 10 classes.

In [56]:
# compiling using sgd 
nn_sigmoid.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy']) 

In [57]:
#relu activation function
nn_relu = Sequential()
nn_relu.add(Dense(700,input_dim=784,activation='relu'))
nn_relu.add(Dense(700,activation='relu'))
nn_relu.add(Dense(700,activation='relu'))
nn_relu.add(Dense(700,activation='relu'))
nn_relu.add(Dense(700,activation='relu'))
nn_relu.add(Dense(350,activation='relu'))
nn_relu.add(Dense(100,activation='relu'))
nn_relu.add(Dense(10,activation='softmax')) # 10 neurons because 10 classes.

In [58]:
nn_relu.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])

In [59]:
#elu activation function
nn_elu = Sequential()
nn_elu.add(Dense(700,input_dim=784,activation='elu'))
nn_elu.add(Dense(700,activation='elu'))
nn_elu.add(Dense(700,activation='elu'))
nn_elu.add(Dense(700,activation='elu'))
nn_elu.add(Dense(700,activation='elu'))
nn_elu.add(Dense(350,activation='elu'))
nn_elu.add(Dense(100,activation='elu'))
nn_elu.add(Dense(10,activation='softmax')) # 10 neurons because 10 classes.

In [60]:
nn_elu.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])

In [61]:
class history_loss(keras.callbacks.Callback):
    
     def on_train_begin(self, logs={}):
        self.losses = []
    
     def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))

In [62]:
#hyper-parameters
n_epochs = 20
batch_size = 256
validation_split = 0.2 # 80% training and 20% testing data

In [None]:
history_sigmoid = history_loss()
nn_sigmoid.fit(X_train,Y_train,epochs=n_epochs,batch_size=batch_size,callbacks=[history_sigmoid],validation_split=validation_split,verbose=2)

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
 - 17s - loss: 2.3183 - acc: 0.1102 - val_loss: 2.3022 - val_acc: 0.1060
Epoch 2/20
 - 16s - loss: 2.3013 - acc: 0.1140 - val_loss: 2.3025 - val_acc: 0.1060
Epoch 3/20
 - 15s - loss: 2.3014 - acc: 0.1140 - val_loss: 2.3022 - val_acc: 0.1060
Epoch 4/20
 - 16s - loss: 2.3015 - acc: 0.1140 - val_loss: 2.3021 - val_acc: 0.1060
Epoch 5/20
 - 14s - loss: 2.3013 - acc: 0.1140 - val_loss: 2.3019 - val_acc: 0.1060
Epoch 6/20
 - 15s - loss: 2.3014 - acc: 0.1140 - val_loss: 2.3024 - val_acc: 0.1060
Epoch 7/20
 - 14s - loss: 2.3013 - acc: 0.1140 - val_loss: 2.3022 - val_acc: 0.1060
Epoch 8/20


In [None]:
history_relu = history_loss()
nn_relu.fit(X_train,Y_train,epochs=n_epochs,batch_size=batch_size,callbacks=[history_relu],validation_split=validation_split,verbose=2)

In [None]:
history_elu = history_loss()
nn_elu.fit(X_train,Y_train,epochs=n_epochs,batch_size=batch_size,callbacks=[history_elu],validation_split=validation_split,verbose=2)

In [None]:
# plot the losses of each activation function
plt.plot(np.arange(len(history_sigmoid.losses)),history_sigmoid.losses,label='sigmoid')
plt.plot(np.arange(len(history_relu.losses)),history_relu.losses,label='relu')
plt.plot(np.arange(len(history_elu.losses)),history_elu.losses,label='elu')
plt.title('Losses of different activation function')
plt.xlabel('number of batches')
plt.legend(loc='best')
plt.show()