# Keras Basics
We will learn about
* Dense layers
* Categorical cross-entropy

A toy example to show how to train a classifier with Keras and use it. The data comes from three gaussian distributions.

In [0]:
## DATA GENERATION
import numpy as np

def generateX(cls):
    '''
    Inputs:
        cls: class {0, 1, 2}
    Outputs:
        x: a sample from cls; a np array of shape (2,)
    '''
    assert cls in [0,1,2]
    if cls==0:
        x = np.random.normal(np.array([0,0]),100)
#         100 is std deviation. std deviation is zero when x=xmean
    elif cls==1:
        x = np.random.normal(np.array([200,200]),100)
    elif cls==2:
        x = np.random.normal(np.array([-200,200]),100)
    return x
Nx = 2 # shape of a sample is (2,)
Ny = 3 # 3 classes

In [2]:
print(generateX(1))

[269.2475539 267.0045024]


Could you write a function to generate N samples from class 0 and N samples from class 1?

In [0]:
def generateXY(N):
    '''
    Inputs:
        N: no. of samples of each class
    Outputs:
        X: np array of samples; shape = (3*N, 2)
        Y: np array of samples; shape = (3*N, 1)
    '''
    # YOUR CODE HERE
    X=[]
    Y=[]
    for i in range(0,3):
      for j in range(0,N):
        X+=[generateX(i)]
        Y+=[[i]]
    
#     print(Y.shape)
#     print(X,Y)
    X=np.array(X).reshape(3*N,2)
    Y=np.array(Y).reshape(3*N,1)
#     print(X," ",Y)
    return X, Y

In [0]:

# def generateXY(N):
#     '''
#     Inputs:
#         N: no. of samples of each class
#     Outputs:
#         X: np array of samples; shape = (3*N, 2)
#         Y: np array of samples; shape = (3*N, 1)
#     '''
#     # YOUR CODE HERE
#     return X, Y

In [5]:
def test_generateXY():
    X_train, Y_train = generateXY(50)
    assert X_train.shape==(150,2)
    assert Y_train.shape==(150,1)
    print('Test passed', '\U0001F44D')
test_generateXY()

Test passed 👍


### One-hot encoding

Now our Y is in the form [0], [1] and [2]. We want to convert them to [1,0,0], [0,1,0] and [0,0,1], respectively. 
Could you write a code to convert Y (with one column) into one-hot encoded Y (with 3 columns)?

In [0]:
def oneHot(y, Ny):
    '''
    Input:
        y: an int in {0, 1, 2}
        Ny: Number of classes, e.g., 3 here.
    Output:
        Y: a vector of Ny (=3) tuples
    '''
    # YOUR CODE HERE
    Y=np.eye(Ny)
#     print(Y.shape)
    return Y[y]


In [7]:
def test_oneHot():
    assert np.all(oneHot(0,3)==np.array([1,0,0]))
    assert np.all(oneHot(1,3)==np.array([0,1,0]))
    assert np.all(oneHot(2,3)==np.array([0,0,1]))
    print('Test passed', '\U0001F44D')
test_oneHot()

Test passed 👍


### Input Normalization
X can lie in any unbounded range. We need to curtail to a narrow range close to zero. This helps in enhancing the stability of training and hyper-parameter tuning.
This is normally achieved by scaling the X to have zero mean and unit standard deviation (std).

$X \leftarrow \frac{X-mean(X)}{std(X)}$, where this is element wise division

Could you use training samples to find mean and std, and normalize your X_train with that?

In [0]:
def findMeanStddev(X):
    '''
    Input: 
        X: a matrix of size (no. of samples, dimension of each sample)
    Output:
        mean: mean of samples in X; shape is (dimension of each sample,)
        stddev: element-wise std dev of sample in X; shape is (dimension of each sample,)
    '''
    # YOUR CODE HERE
    mean=[]
    stddev=[]
    X=X.T
    for i in range(0,X.shape[0]):
      mean+=[np.mean(X[i])]
      stddev+=[np.std(X[i])]
    return mean, stddev

In [9]:
def test_findMeanStddev():
    X = np.array([[3,2,6],[7,4,2],[3,5,1]])
    mean, stddev = findMeanStddev(X)
    assert np.isclose(mean, np.array([4.33, 3.66, 3.]), atol=0.1).all()
    assert np.isclose(stddev, np.array([1.88, 1.24, 2.16]), atol=0.1).all()
    print('Test passed', '\U0001F44D')
test_findMeanStddev()

Test passed 👍


In [0]:
def normalizeX(X, mean, stddev):
    '''
    Input:
        X: a matrix of size (no. of samples, dimension of each sample)
        mean: mean of samples in X (same size as X)
        stddev: element-wise std dev of sample in X (same size as X) 
    Output:
        Xn: X modified to have 0 mean and 1 std dev
    '''
    # YOUR CODE HERE
#     mean=np.array(mean).reshape(X.shape[0],X.shape[1])
#     stddev=stddev.reshape(X.shape[0],X.shape[1])
    X=X.T
    for i in range(0,X.shape[0]):
      X[i]-=mean[i]
      if(not(stddev[i]==0)):
        X[i]/=stddev[i]
    
#     X=X/stddev
#     for i in range(0,X.shape[0]):
#       X[i]-=mean
#       X[i]/=stddev
    return X.T

In [11]:
def test_normalizeX():
    X = np.ones((3,3))
    m,s = findMeanStddev(X)
    assert np.all(m==np.ones(3))
    assert np.all(s==np.zeros(3))
    assert np.all(normalizeX(X,m,s)==0*X)
    # test on random X
    X = np.random.random((5,3))
    m,s = findMeanStddev(X)
    Xn = normalizeX(X,m,s)
    mn, sn = findMeanStddev(Xn)
    assert np.allclose(mn, np.zeros(3))
    assert np.allclose(sn, np.ones(3))
    print('Test passed', '\U0001F44D')
test_normalizeX()

Test passed 👍


### Plotting
Could you plot all the samples in X_train with different colors for different classes?

In [0]:
import matplotlib.pyplot as plt
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
def plotXY(X, Y):
    '''
    Inputs:
        X: a matrix of size (no. of samples, dimension of each sample)
        Y: a matrix of size (no. of samples, no. of classes) - these are one-hot vectors
    Action:
        Plots the samples in X, their color depends on Y
    '''
    Ny = Y.shape[1]
    for cls in range(Ny):
        idx = np.where(Y[:,cls]==1)[0]
        plt.plot(X[idx,0], X[idx,1], colors[cls]+'.')


## Creating the Network
We now create the network with dense layers: 
$y = f(Wx)$

ReLU activation: 
$f(h) = h, h>0; 0, h\le 0$

Softmax activation: 
$f(h_i) = \frac{\exp(h_i)}{\sum_j \exp(h_j)}$

Categorical cross-entropy loss:
$\mathcal{L} = -\sum_t y^d_t \log y_t$

Stochastic Gradient Descent:
$w_{ij} \leftarrow w_{ij} - \eta \frac{\partial \mathcal{L}}{\partial w_{ij}}$

In [13]:
import keras
from keras.layers import Input, Dense
from keras.models import Model
from keras import optimizers

def makeNN(Nx, Nh, Ny):
    '''
    Input:
        Nx: int; no. of input nodes; shape of each sample; i.e., X.shape[1:] 
        Nh: int; no. of hidden neurons
        Ny: int; no. of output nodes; shape of output; i.e., Y.shape[1]
    Output:
        model: keras NN model with Input layer, Dense layer with Nh neurons, 
                and Dense output layer with softmax non-linearity, loss function
                categorical-crossentropy, optimizer SGD.
    '''
    # YOUR CODE HERE
    input_layer=Input(shape=(Nx,))
#     print(input_layer.shape)
    hidden_layer=Dense(Nh, activation="softmax")(input_layer)
#     print(hidden_layer.shape)
    output_layer=Dense(Ny, activation="softmax")(hidden_layer)
#     print(output_layer.shape)
    
    model=Model(inputs=[input_layer],outputs=[output_layer])
    model.compile(optimizer=optimizers.Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

Using TensorFlow backend.


### Plotting the model

In [0]:
def plotModel(model):
    from keras.utils import plot_model
    plot_model(model, show_shapes=True, show_layer_names=True, to_file='model.png')
    from IPython.display import Image
    Image(retina=True, filename='model.png')

### Training


In [0]:
def trainNN(model, X_train, Y_train, Nepochs):
    '''
    Action:
        Train model with model.fit
    '''
    # YOUR CODE HERE
    history=    model.fit(X_train, Y_train, validation_split = 0.1, epochs=Nepochs)
# Use 10% of samples for validation, validation_split is the relevant parameter


In [0]:
def trainModel(N, Nh, Nepochs):
    '''
    generateXY, normalizeX, oneHot, makeNN, trainNN
    Input:
        N: int; no. of training samples per class
        Nh: int; no. of neurons in hidden layer
    Output:
        model: keras NN model trained with the training data
        mean_train, stddev_train: mean and stddev of training data - you will 
                            need this for normalizing your test data
    '''
    # YOUR CODE HERE
    X, Y=generateXY(N)
    mean ,stddev=findMeanStddev(X)
    X=normalizeX(X,mean,stddev)
#     print(np.unique(Y).shape[0])
    Y=oneHot(Y,np.unique(Y).shape[0])
    Y=Y.reshape(Y.shape[0],Y.shape[-1])
#     print(X.shape,Y.shape,Y)
    index=np.arange(3*N)
    np.random.shuffle(index)
    X=X[index]
    Y=Y[index] 
    model=makeNN(X.shape[1],Nh,Y.shape[1])
    trainNN(model,X,Y,Nepochs)
    mean_train=mean
    stddev_train=stddev
    return model, mean_train, stddev_train

### Evaluation
Could you:
- Generate 20 samples from each class
- Normalize them with mean_train and stddev_train
- Get Y_test as one hot encoded labels

In [0]:
def testModel(model, Ntest, mean_train, stddev_train):
    '''
    generateXY for test, normalize, onehot, evaluate the model
    Inputs:
        model: trained Keras NN model
        Ntest: int; number of test samples per class
    Output:
        accuracy: float; accuracy on the test data
        CM: confusion matrix on the test data
    '''
    # YOUR CODE HERE
    Xtest,Ytest=generateXY(Ntest)
#     mean,stddev=findMeanStddev(Xtest)
    Xtest=normalizeX(Xtest,mean_train,stddev_train)
    Ytest=oneHot(Ytest,np.unique(Ytest).shape[0])    
    Ytest=Ytest.reshape(Ytest.shape[0],Ytest.shape[-1])
#     trainNN(model,Xtest,Ytest,50)
    from sklearn.metrics import confusion_matrix
    loss, accuracy = model.evaluate(Xtest, Ytest,verbose=0)  # Evaluate the model
    print('Accuracy :%0.3f'%accuracy)

    pred_Ytest = model.predict(Xtest)
    cm = confusion_matrix(Ytest.argmax(axis=1), pred_Ytest.argmax(axis=1))
    print(cm )

    return accuracy, cm


In [18]:
model, mean_train, stddev_train = trainModel(50, 20, 500)
accuracy, CM = testModel(model, 10, mean_train, stddev_train)

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 2)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 20)                60        
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 63        
Total params: 123
Trainable params: 123
Non-trainable params: 0
_________________________________________________________________
Instructions for updating:
Use tf.cast instead.
Train on 135 samples, validate on 15 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch

# ADVANCED QUESTIONS



### Effect of changing Nh
### Effect of changing Nepochs
### Effect of changing N, no. of training samples

Can you observe overfitting? 

Can you do hyperparameter tuning here? 

To normalize test data, why do we use the mean and stddev of training data?


In [19]:
Nh=[50,100]
Nepochs=[100,200]
N=[100,200]
accuracy=[]
k=0
for nh in Nh:
  for ne in Nepochs:
    for n in N:
      print("NH =",nh)
      print("Nepochs =",ne)
      print("N =",n)
      model, mean_train, stddev_train = trainModel(n, nh, ne)
      acc, CM = testModel(model, 10, mean_train, stddev_train)
      accuracy+=[acc]
      k+=1

      print("---------------------------------------------------------------------------------------------------------------------------")

NH = 50
Nepochs = 100
N = 100
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 2)                 0         
_________________________________________________________________
dense_3 (Dense)              (None, 50)                150       
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 153       
Total params: 303
Trainable params: 303
Non-trainable params: 0
_________________________________________________________________
Train on 270 samples, validate on 30 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoc

In [20]:
print(k)
i=0
for nh in Nh:
  for ne in Nepochs:
    for n in N:
      if(i<=k):
        print("NH =",nh)
        print("Nepochs =",ne)
        print("N =",n)
        print("Accuracy =",accuracy[i])
        i+=1
        print("=========================================================================================================================================")

8
NH = 50
Nepochs = 100
N = 100
Accuracy = 0.9333333373069763
NH = 50
Nepochs = 100
N = 200
Accuracy = 0.8999999761581421
NH = 50
Nepochs = 200
N = 100
Accuracy = 0.8999999761581421
NH = 50
Nepochs = 200
N = 200
Accuracy = 0.9666666388511658
NH = 100
Nepochs = 100
N = 100
Accuracy = 0.8999999761581421
NH = 100
Nepochs = 100
N = 200
Accuracy = 0.9666666388511658
NH = 100
Nepochs = 200
N = 100
Accuracy = 0.8333333134651184
NH = 100
Nepochs = 200
N = 200
Accuracy = 0.8333333134651184
