# Imports

In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from keras import regularizers
from keras.utils import np_utils
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras.layers import Conv2D, MaxPooling2D
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Dense, Dropout, Activation, Flatten

np.random.seed(3)
%matplotlib inline

# Data Preprocessing

In [20]:
df_train = pd.read_csv('data/train.csv')
df_train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
X = df_train.drop('label', axis=1).values
y = df_train['label'].values

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=3, stratify=y)

In [39]:
np.sqrt(X_train.shape[1])

28.0

In [40]:
X_train = X_train.reshape(X_train.shape[0], # of images
                          28, 28, # 28 x 28 images
                          1)      # grey scale image


X_test = X_test.reshape(X_test.shape[0], # of images
                          28, 28, # 28 x 28 images
                          1)      # grey scale image

# switch from int to float (python 2.x issues)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# (speed up preformance) scale data from 0-255 -> 0-1
X_train /= 255
X_test /= 255

In [41]:
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

# 1. Keras Sequential

In [42]:
model = Sequential()

# Start with a convolutional layer.
model.add(Conv2D(filters=6,            # no. of filters
                 kernel_size=3,        # filters should be 3x3
                 activation='relu',
                 input_shape=(28,28,1))) # dimensions of training images

# Next, a max pooling layer.
model.add(MaxPooling2D(pool_size=(2,2)))
# by default, MaxPool will select the stride so the pooled areas don't overlap.

# Next, another convolutional layer.
model.add(Conv2D(filters = 16,       
                 kernel_size = 3,   
                 activation = 'relu'))

# Next, a max pooling layer.
model.add(MaxPooling2D(pool_size=(2,2)))

# Next, let's add some dropout.
model.add(Dropout(.5))

# In order to go from a convolutional/pooling layer, we have to organize our neurons.
model.add(Flatten())

# Let's add a fully connected layer.
model.add(Dense(128, activation='relu'))

# Let's add dropout here, too.
model.add(Dropout(.5))

# Let's finally do the output layer!
model.add(Dense(10,activation='softmax'))

# Softmax activation function: 
# ensure that our predictions are non-negative
# ensure that our predictions sum to 100%

# 6. Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 7. Fit model on training data
results = model.fit(X_train,y_train,
                      batch_size=32,
                      epochs=3,
                      verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [43]:
score = model.evaluate(X_test, y_test, verbose = 1)
labels = model.metrics_names



In [44]:
print(str(labels[0]) + ": " + str(score[0]))
print(str(labels[1]) + ": " + str(score[1]))

loss: 0.09933096527414663
acc: 0.9685714285714285
