## Build a model using Keras

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests


In [0]:
### Download data from google drive. You need not mess with this code.

import requests

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                
if __name__ == "__main__":
    file_id = '1DHF4b0sBB_KLQ4oxNEMp0sGrViu0gpeG'
    destination = 'data.csv'
    download_file_from_google_drive(file_id, destination)

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests


# Importing and cleaning data using pandas library
data = pd.read_csv('data.csv')

## Last column is output features. Rest are inputs
X = data.iloc[:, 1:-1].values

# 2nd column is output labels
y = data.iloc[:, -1].values

In [8]:
print(np.amax(y), np.amin(y))
print(y.shape)
print(X.shape)


7 1
(15120,)
(15120, 54)


Convert labels to intermediate representation where each label is replaced by a number from 0 to Ny-1
(Ny is number of classes)

In [0]:
def conv_labels(y_raw):
    """
    Inputs:
        y_raw: numpy array of labels
    Outputs:
        y: numpy array of ints, each label is replaced by an int from 0 to Ny-1
        Ny: number of classes
    """
    # YOUR CODE HERE
    y =  []
    uniques = np.unique(y_raw).tolist()
    for i in y_raw:
      y.append(uniques.index(i))
    y = np.array(y)
    Ny = len(uniques)
    return y, Ny

In [13]:
y1, ny = conv_labels(y)
print(y1)
print(y)
print(ny)


[4 4 1 ... 2 2 2]
[5 5 2 ... 3 3 3]
7


In [0]:
### One-hot encode Y_v
def oneHot(y, Ny):
    '''
    Input:
        y: an int in {0, Ny -1 }
        Ny: Number of classes, e.g., 2 here.
    Output:
        Y: a vector of shape (Ny,)
    '''
    # YOUR CODE HERE
    #Y = np.zeros(Ny)
    #Y[y] = 1
    
    from keras.utils import to_categorical
    Y = to_categorical(y, Ny)
    return Y
    

In [0]:
"""Test for one-hot"""
assert np.all(oneHot(0,3)==np.array([1,0,0]))



#### Create Y_o which is one-hot encoding of Y using above functions

In [0]:
def create_Y_o(y):
    """
    Inputs:
        y: numpy array of class labels
    Outputs:
        Y_o: numpy array of shape(samples, Ny) with one-hot encodings of y
        Ny: number of unique classes
    """
    
    # YOUR CODE HERE
    y, Ny = conv_labels(y)
    Y_o = np.zeros((y.shape[0], Ny))
    for i in range(y.shape[0]):
      Y_o[i] = oneHot(y[i], Ny)
    return Y_o, Ny

Y_o, Ny = create_Y_o(y)


In [0]:
### Split data into train and test. Keep 10% of samples for testing
## Divide the data into these variables - X_train, X_test, y_train, y_test
# YOUR CODE HERE
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y_o, test_size = 0.1)

In [0]:
"""test for splitting"""
assert(X_train.shape[0] == 13608)

In [0]:
## Normalize the Data
def findMeanStddev(X):
    '''
    Input: 
        X: a matrix of size (no. of samples, dimension of each sample)
    Output:
        mean: mean of samples in X (same size as X)
        stddev: element-wise std dev of sample in X (same size as X)
    '''
    # YOUR CODE HERE
    mean = np.sum(X, axis = 0)/X.shape[0]
    stddev = np.std(X, axis = 0)
    return mean, stddev

In [0]:
sum = np.sum(X_train, axis = 0)
np.any(np.isnan(X_train/X_train.shape[0]))

False

In [0]:
def normalizeX(X, mean, stddev):
    '''
    Input:
        X: a matrix of size (no. of samples, dimension of each sample)
        mean: mean of samples in X (same size as X)
        stddev: element-wise std dev of sample in X (same size as X) 
    Output:
        Xn: X modified to have 0 mean and 1 std dev
    '''
    # YOUR CODE HERE
    Xn = (X - mean)/(stddev + 10**(-8))
    return Xn

mean_train, stddev_train = findMeanStddev(X_train)
X_train = normalizeX(X_train, mean_train, stddev_train)
X_test = normalizeX(X_test, mean_train, stddev_train)

In [0]:
np.any(np.isnan(X_train))

False

In [0]:
print(mean_train, stddev_train)
np.any(np.isnan(mean_train))

[2.74983333e+03 1.56787551e+02 1.65130071e+01 2.26619709e+02
 5.09782481e+01 1.71267578e+03 2.12642710e+02 2.18939888e+02
 1.35125073e+02 1.51123273e+03 2.39932393e-01 3.29218107e-02
 4.18797766e-01 3.08348031e-01 2.26337449e-02 4.12257496e-02
 6.40064668e-02 5.54820694e-02 1.04350382e-02 4.29159318e-02
 0.00000000e+00 7.34861846e-05 7.34861846e-04 1.42122281e-01
 2.71164021e-02 1.51381540e-02 3.09376837e-02 1.05820106e-02
 0.00000000e+00 7.71604938e-03 4.08583186e-02 3.74779541e-03
 3.08641975e-03 9.18577307e-03 1.10229277e-03 2.26337449e-02
 5.07054674e-02 1.71222810e-02 7.34861846e-05 3.82128160e-03
 1.02880658e-03 5.14403292e-04 8.59053498e-02 4.90887713e-02
 2.05026455e-02 4.54144621e-02 4.07113463e-02 1.46972369e-03
 6.90770135e-03 7.34861846e-04 2.27807172e-03 4.90887713e-02
 4.24015285e-02 3.04967666e-02] [4.16994909e+02 1.10202725e+02 8.45316704e+00 2.09767132e+02
 6.13326597e+01 1.32283512e+03 3.05555061e+01 2.28322500e+01
 4.59334334e+01 1.09564856e+03 4.27041965e-01 1.78431

False

#### Create model. 
- Choose the number of hidden layers, neurons, activations, loss function, learning rate and optimizers on your own.
- Report accuracy metric
- Use no more than 100 epochs
- Use validation_split = 0.1

In [0]:
print(X.shape[1:])

(54,)


In [0]:
import keras
def create_model():
    """
    Inputs:
        None
    Outputs:
        model: keras model afteer compiling
    """
    # YOUR CODE HERE
    from keras.layers import Input, Dense
    from keras.models import Model
    
    input_layer = Input(shape = X_train.shape[1:])
    h1 = Dense(10, activation = 'sigmoid')(input_layer)
    output_layer = Dense(Ny, activation = 'softmax')(h1)
    
    model = Model(inputs = [input_layer], outputs=[output_layer])
    model.compile(loss='categorical_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])
    return model

In [0]:

model = create_model()
history = model.fit(X_train, y_train, epochs=100, batch_size = 100, validation_split = 0.1)

In [24]:
from matplotlib import pyplot as plt
plt.plot(history.history['val_acc'])

NameError: ignored

#### Evalutaion


In [0]:
"""Test for model"""


In [0]:
"""Test for model"""


In [0]:
"""Test for model"""


#### Confusion Matrix

In [0]:
def create_cm(Y_test, Y_pred):
    """
    Inputs:
        Y_test: numpy array with true labels
        Y_pred: numpy array with predicted labels
    Outputs:
        CM: numpy array (ndim=2) containing confusion matrix
    """
    # YOUR CODE HERE
    from sklearn.metrics import confusion_matrix
    CM= confusion_matrix(Y_test.argmax(axis=1), Y_pred.argmax(axis = 1))
    return CM

In [0]:
"""Test for confusion matrix"""

In [0]:
def precision(cm, cls):
    """
    Inputs:
        cm: numpy array of shape (Ny, Ny) containing confusion matrix
        cls: class for which precision has to be computed
    Outputs:
        prec: precision for class cls 
    """
    # YOUR CODE HERE
    prec = cm[cls, cls]/np.sum(cm[:, cls])
    return prec
    

In [0]:
cm = create_cm(y_test, model.predict(X_test))

In [0]:
cm

array([[141,  41,   0,   0,   8,   1,  22],
       [ 64, 100,   4,   0,  39,  10,   1],
       [  0,   1, 102,  53,   7,  68,   0],
       [  0,   0,  22, 193,   0,  14,   0],
       [  9,  21,  14,   0, 157,  11,   0],
       [  3,   2,  32,  22,  15, 123,   0],
       [ 33,   1,   2,   0,   0,   0, 176]])

In [0]:
precision(cm, 0)

0.564

In [0]:
141/(141+64+9+3+33)

0.564