In [1]:
import keras
from keras.datasets import mnist

from keras.models import Sequential
from keras.layers import *
#utilities help us transform our data later
from keras.utils import * 
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, TensorBoard
import os
import tensorflow as tf

Using TensorFlow backend.


# Training a Neural Network: 

Training a NN using Keras involves following steps: 

i. **Layers, that are combined in a network (or model)**

ii. **Input data and corresponding targets**

iii. **Loss function that defines the feedback signal used for learning**

iv. **Optimizer, that determines how learning proceeds** 


## i. Layers in Keras 

A layer is a data processing module that takes input one or more tensors and that outputs one or more tensors. 

### Different Layers 

Different layers are appropriate for different tensor formats and different types of data processing. 

Like simple vector data, stored in 2D tensors of shape (samples, features) is often processed by **densely connected layers** also called **fully connected or dense layers** (model.fit function always takes data in form of (samples, features) 

Sequence data stored in 3D tensors of shape (samples, timesteps, features) is typically processed by **recurrent layers** such as an **LSTM** layer.  

Image data, stored in 4D tensors, is usually processed by **2D convolution layers(Conv2D)**. ((model.fit function always takes data in form of (samples, width, height, color depth) )

## Loss function (Objective function)

It is the quantity that will be minimized during training. It represents a measure of success for the task at hand

A neural network that has multiple outputs may have multiple loss functions (one per output). But the gradient-descent process must be based on a single scalar loss value; so, for multiloss networks, all losses are combined (via averaging) into a single scalar quantity.

Choosing the right loss function for the particular problem is very necessary. However when it comes to common problems such as classification, regression, and sequence prediction, there are simple guidelines that can followed such as: 

** For two-class classification problem , binary crossentropy loss func. is used** 

** For multi/many class classification problem, categorical crossentropy loss func. is used**

** Mean Squared error for a regression problem ** 

** Connectionist temporal classification(CTC) for a sequence learning problem** 

and so on. 

## Optimizer

It determines how the network will be updated based on the loss function. 

Most **common optimizer function used is "stochastic gradient descent"**

In [43]:

class TrainMnistDataSetDense:

    def __init__(self):

        self.image_height = 28
        self.image_width = 28
        self.image_depth = 1
    
    def preProcess(self, x_train, y_train, x_test, y_test):

        x_train = x_train.reshape(x_train.shape[0], 28*28)
        x_test = x_test.reshape(x_test.shape[0], 28*28)

        # Convert data type and normalise values

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')


        x_train /= 255
        x_test /= 255 

        # Convert 1-dimensional class arrays to 10-dimensional class matrices

        y_train = np_utils.to_categorical(y_train,10)
        y_test = np_utils.to_categorical(y_test,10)

        return x_train, y_train, x_test, y_test
    
    
    def ModelDense(self, input_shape, num_classes):
        
        model = Sequential()
        model.add(Dense(512, input_shape=(input_shape,)))
        model.add(Activation('relu'))
        
        model.add(Dense(512))
        model.add(Activation('relu'))
        
        model.add(Dense(num_classes))
        model.add(Activation('softmax'))
        
        return model

 


In [44]:

trainMnist = TrainMnistDataSetDense()
(x_train, y_train), (x_test, y_test) =  mnist.load_data()
X_train, Y_train, X_test, Y_test = trainMnist.preProcess(x_train, y_train, x_test, y_test)
input_shape = 28*28
# print "X traing ", 
# print "Y train ", Y_train.shape
num_classes = 10
nb_epoch = 1
batch_size = 32

# model = trainMnist.modelCNN(input_shape, num_classes)
model = trainMnist.ModelDense(X_train.shape[1], num_classes)

model_save_path = 'TrainedModelMnist.hdf5'

model.compile(loss='categorical_crossentropy', optimizer='adadelta',metrics=['accuracy'])

print(model.summary())

csv_logger = CSVLogger('training.log')

early_stop = EarlyStopping('val_acc', patience=200, verbose=1)
model_checkpoint = ModelCheckpoint(model_save_path,
                                    'val_acc', verbose=0,
                                    save_best_only=True)

# tbCallBack = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
model_callbacks = [early_stop, model_checkpoint, csv_logger]


# K.get_session().run(tf.global_variables_initializer())

model.fit(X_train,Y_train,batch_size,nb_epoch, verbose=1,validation_data=(X_test,Y_test),callbacks = model_callbacks)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 512)               401920    
_________________________________________________________________
activation_10 (Activation)   (None, 512)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 512)               262656    
_________________________________________________________________
activation_11 (Activation)   (None, 512)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)                5130      
_________________________________________________________________
activation_12 (Activation)   (None, 10)                0         
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
None

<keras.callbacks.History at 0x7fc2249ace90>