In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Flatten
import pydot
import datetime
from sklearn.ensemble.gradient_boosting import train_test_split

In [2]:
#Importing the Data
digit_data = pd.read_csv('train.csv')
X_test = pd.read_csv('test.csv')

In [3]:
(X_train,X_val) = train_test_split(digit_data, test_size=.1)
y_train = X_train['label']
y_val = X_val['label']
X_train = X_train.drop('label', axis=1) / 255.0
X_val = X_val.drop('label', axis=1) / 255.0
X_test = X_test / 255.0

In [4]:
X_train.shape

(37800, 784)

In [5]:
class_names = [0,1,2,3,4,5,6,7,8,9]

In [6]:
model = keras.models.Sequential([
    Flatten(input_shape=[784]),
    Dense(300,activation='relu'),
    Dense(100,activation='relu'),
    Dense(100,activation='relu'),
    Dense(100,activation='relu'),
    Dense(10,activation='softmax')
])
model1 = keras.models.Sequential([
    Flatten(input_shape=[784]),
    Dense(500,activation='relu'),
    Dense(1000,activation='relu'),
    Dense(300,activation='relu'),
    Dense(100,activation='relu'),
    Dense(10,activation='softmax')
])
model2 = keras.models.Sequential([
    Flatten(input_shape=[784]),
    Dense(500,activation='relu'),
    Dense(100,activation='relu'),
    Dense(10,activation='softmax')
])
model3 = keras.models.Sequential([
    Flatten(input_shape=[784]),
    Dense(300,activation='relu'),
    Dense(100,activation='relu'),
    Dense(10,activation='softmax')
])
model4 = keras.models.Sequential([
    Flatten(input_shape=[784]),
    Dense(1000,activation='relu'),
    Dense(100,activation='relu'),
    Dense(100,activation='relu'),
    Dense(100,activation='relu'),
    Dense(10,activation='softmax')
])

models = [model,model1,model2,model3,model4]

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_4 (Dense)              (None, 10)                1010      
Total params: 286,810
Trainable params: 286,810
Non-trainable params: 0
__________________________________________________

In [8]:
model.layers

[<tensorflow.python.keras.layers.core.Flatten at 0x2d9f8d53580>,
 <tensorflow.python.keras.layers.core.Dense at 0x2d9f8b70730>,
 <tensorflow.python.keras.layers.core.Dense at 0x2d9f8b70a60>,
 <tensorflow.python.keras.layers.core.Dense at 0x2d9f8b70df0>,
 <tensorflow.python.keras.layers.core.Dense at 0x2d9f8c0f130>,
 <tensorflow.python.keras.layers.core.Dense at 0x2d9f8c0f490>]

In [9]:
model.layers[1].name

'dense'

In [10]:
weights, biases = model.layers[1].get_weights()

In [11]:
weights

array([[ 0.05213445, -0.0523987 , -0.05810249, ..., -0.06837106,
         0.068583  , -0.05298483],
       [ 0.0650496 , -0.04828077,  0.0430701 , ...,  0.04284758,
         0.04887644, -0.01396381],
       [-0.02699849, -0.04587036, -0.06631325, ..., -0.00626767,
        -0.02063423,  0.05234554],
       ...,
       [ 0.06495905, -0.0165869 , -0.03519252, ..., -0.05269681,
         0.05798168, -0.03291587],
       [ 0.03256438,  0.00622503, -0.04819008, ..., -0.02715836,
         0.05988452,  0.06614803],
       [ 0.02775332,  0.00335781, -0.04486591, ...,  0.02074573,
         0.05773409, -0.00578506]], dtype=float32)

In [12]:
biases

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [13]:
weights.shape

(784, 300)

In [14]:
biases.shape

(300,)

In [15]:
for m in models:
    m.compile(loss='sparse_categorical_crossentropy',
        optimizer='sgd',
        metrics=['accuracy'])

In [16]:
#for m in models:
#    m.fit(X_train,y_train,
#        epochs=40,
#        validation_split=0.2)

In [17]:
models[0].history

In [18]:
#for m in models:
#    print(f'{m} Graph')
#    pd.DataFrame(m.history.history).plot(figsize=(8,5))
#    plt.grid(True)
#    plt.gca().set_ylim(0,1)
#    plt.show()

In [19]:
#checkpoint_cb = keras.callbacks.ModelCheckpoint('my_keras_model.h5', save_best_only=True)
#history = models[4].fit(X_train,y_train, epochs=40, 
#    validation_split=0.2, callbacks=[checkpoint_cb])
#best_model = keras.models.load_model('my_keras_model.h5')

In [20]:
import matplotlib.pyplot as plt

#pd.DataFrame(digits.history).plot(figsize=(15,5))
#plt.grid(True)
#plt.gca().set_ylim(0,1)
#plt.gca().set_xlim(0,40)
#plt.show()

In [21]:
#digits = model.fit(X_train, y_train,
#    epochs=40,
#    validation_split=0.2)
#    
#pd.DataFrame(digits.history).plot(figsize=(8,5))
#plt.grid(True)
#plt.gca().set_ylim(0,1)
#plt.show()

In [22]:
#test_predictions = best_model.predict(test_data)

In [23]:
#test_predictions

In [24]:
#test_predictions_classes = np.argmax(best_model.predict(test_data), axis=-1)

In [25]:
#results = pd.DataFrame(test_predictions_classes)
#results.index = np.arange(1, len(results) + 1)
#results.to_csv('results.csv')

# Fine-tuning NN Hyperparameters

## Function to build & compile Keras model, given a set of hyperparameters

## Function creates a simple Sequential model for univariate regression using a SGD optimizer

In [26]:
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[784], activation='relu'):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation=activation))
    model.add(keras.layers.Dense(1))
    optimizer = keras.optimizers.SGD(lr=learning_rate)
    model.compile(loss="mse", optimizer=optimizer)
    return model

## Create a Kerasregressor based on this build_model() function

In [27]:
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)

### Train, Evaluate, Predict

In [28]:
keras_reg.fit(X_train, y_train, epochs=100,
                validation_data=(X_val,y_val),
                callbacks=[keras.callbacks.EarlyStopping(patience=10)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100

KeyboardInterrupt: 

In [27]:
mse_test = keras_reg.score(X_val, y_val)
#y_pred = keras_reg.predict(X_new)



### Too many hyperparameters, prefer to use a randomized search & not grid search. Explore no. of hidden layers, no. of neurons, & learning rate

In [29]:
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV

In [31]:
param_distribs = {
    "n_hidden": [0, 1, 2, 3],
    "n_neurons": np.arange(1,100),
    "learning_rate": reciprocal(3e-4, 3e-2),
}

rnd_serach_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter = 10, cv = 3)
rnd_serach_cv.fit(X_train, y_train,
    epochs = 100,
    validation_data = (X_val, y_val),callbacks=[keras.callbacks.EarlyStopping(patience=10)])

45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100


KeyboardInterrupt: 

In [60]:
import os
import time
root_logdir = os.path.join(os.curdir, 'my_logs')

def get_run_logdir():
    run_id = time.strftime('run_%Y_%m_%d-%H_%M_%S')
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir()

In [62]:
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
#TODO add best param model here
history = keras_reg.fit(X_train,y_train,
    epochs=100,
    validation_data=(X_val,y_val),
    callbacks=[tensorboard_cb, keras.callbacks.EarlyStopping(patience=10)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100


In [65]:
keras_reg.predict(X_test)

array([2.6644855, 0.2920136, 8.155593 , ..., 2.8528655, 9.187014 ,
       1.3953745], dtype=float32)