In [3]:
from numpy.random import seed
seed(123)
from tensorflow import set_random_seed
set_random_seed(123)

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV

In [5]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier 
from keras.utils import np_utils

import keras

Using TensorFlow backend.


In [7]:
training = pd.read_csv("../src/Data/train.csv")
testing = pd.read_csv("../src/Data/test.csv")

##Separating into Training, and Validation data

In [30]:
X = training.values[:, 1:]
Y = training.values[:, 0]

test = testing.values

In [10]:
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size = 0.25, random_state = 123)

In [11]:
training.head(5)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#Neural Network Implementation

In [12]:
encoder = LabelEncoder()
encoder.fit(Y_train)
encoded_Y = encoder.transform(Y_train)
dummy_Y_train = np_utils.to_categorical(encoded_Y)

In [13]:
encoder = LabelEncoder()
encoder.fit(Y_valid)
encoded_Y = encoder.transform(Y_valid)
dummy_Y_valid = np_utils.to_categorical(encoded_Y)

In [34]:
dummy_Y_valid.categories

AttributeError: 'numpy.ndarray' object has no attribute 'categories'

In [14]:
def deep_forward_model(nodes = 32,
                       rate = 0.5):
    
    #Building Model
    nn_model = Sequential()
    nn_model.add(Dense(nodes, activation = 'relu', input_dim = 784))
    nn_model.add(Dense(nodes, activation = 'relu'))
    nn_model.add(Dropout(rate = rate, seed = 123))
    nn_model.add(Dense(nodes, activation = 'relu'))
    nn_model.add(Dense(10, activation = 'softmax'))
    
    #Compliling Model
    nn_model.compile(loss = 'categorical_crossentropy', 
                    optimizer = 'adam',
                    metrics = ['accuracy'])
    
    return nn_model

In [15]:
def nn_tune(model, params, n = 5, times = 10):
    nn_search = GridSearchCV(estimator=model, param_grid=params, cv = n)
    #nn_search = RandomizedSearchCV(estimator=model, param_distributions = params, n_iter = times)
    nn_result = nn_search.fit(X_train, dummy_Y_train)
    return nn_result.best_params_

In [16]:
model = KerasClassifier(build_fn = deep_forward_model, epochs = 100, batch_size = 5, verbose = 0)

## Baseline Score

In [28]:
model.fit(X_train, dummy_Y_train)
model.score(X_train, dummy_Y_train)

0.5747936634364582

## Tuning Parameters

### Batch Size and Epochs

In [None]:
param_grid = {'batch_size' : [10, 20, 30, 40, 50, 60, 80, 100],             
              'epochs' : [10, 25, 40, 65, 100]}
nn_tune(model, param_grid)

### Nodes and Rate

In [None]:
param_grid = {'nodes' : [8, 16, 24, 32, 64, 128],
              'rate' : [0.1, 0.3, 0.5, 0.7, 0.9]}

model = KerasClassifier(build_fn = deep_forward_model, batch_size = 60, epochs = 10, verbose = 0)
nn_tune(model, param_grid)

### Loss Function, Optimizer, and Metrics

In [None]:
param_grid = {'' : [],
             '' : []}

## Evaluating Validation Set Score

In [22]:
final_model = deep_forward_model(nodes = 8, rate = 0.1)
final_model.fit(X_train, dummy_Y_train, batch_size = 60, epochs = 5, validation_split = 0.25, verbose = 0)
score = final_model.evaluate(X_valid, dummy_Y_valid, batch_size = 60, verbose = 0)

In [23]:
final_model.metrics_names

['loss', 'acc']

In [24]:
score

[1.81469952583313, 0.29209524116345814]

In [18]:
print("Baseline: {}% ({}%)").format(cv_results.mean()*100, cv_results.std()*100)

NameError: name 'cv_results' is not defined

## Producing Testing Target Values for Kaggle

In [31]:
final_model.predict(test)

array([[4.20947444e-11, 1.21757395e-07, 9.99997854e-01, ...,
        5.76430722e-13, 3.62614192e-12, 1.97446443e-12],
       [1.13145106e-01, 1.30594388e-01, 3.30133811e-02, ...,
        1.25803739e-01, 1.13927491e-01, 1.15861602e-01],
       [1.13145106e-01, 1.30594388e-01, 3.30133811e-02, ...,
        1.25803739e-01, 1.13927491e-01, 1.15861602e-01],
       ...,
       [1.13145106e-01, 1.30594388e-01, 3.30133811e-02, ...,
        1.25803739e-01, 1.13927491e-01, 1.15861602e-01],
       [1.13145106e-01, 1.30594388e-01, 3.30133811e-02, ...,
        1.25803739e-01, 1.13927491e-01, 1.15861602e-01],
       [1.60667639e-08, 7.48086222e-06, 9.99934316e-01, ...,
        6.29888031e-10, 2.50388399e-09, 1.58440538e-09]], dtype=float32)