In [1]:
from numpy.random import seed
seed(123)
from tensorflow import set_random_seed
set_random_seed(123)

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

import seaborn as sns


In [3]:
training = pd.read_csv("../src/Data/train.csv")
testing = pd.read_csv("../src/Data/test.csv")

In [4]:
training.iloc[:,0].astype('category').unique()

[1, 0, 4, 7, 3, 5, 8, 9, 2, 6]
Categories (10, int64): [1, 0, 4, 7, ..., 8, 9, 2, 6]

In [5]:
x_train = (training.iloc[:,1:].values).astype('float32')
y_train = (training.iloc[:,0].values).astype('int32')
x_test = (testing.values).astype('float32')

## Feature Scaling

In [6]:
np.array(x_train.max(), x_test.max()).max()

255.0

In [7]:
x_train = x_train/255.0
x_test = x_test/255.0

## Reshaping Data

In [8]:
X_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
X_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

In [9]:
X_train.shape

(42000, 28, 28, 1)

In [10]:
X_test.shape

(28000, 28, 28, 1)

## Keras Libraries

In [11]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D
from keras.utils import to_categorical

from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [12]:
num_classes = 10
input_shape = (28,28,1)
batch_size = 64
epochs = 20

### Converting Y to categorical

In [13]:
y_train = to_categorical(y_train, num_classes = 10)
X_train, X_valid, Y_train, Y_valid = train_test_split(X_train, y_train, test_size = 0.25, random_state = 123)

## CNN Model

In [14]:
model = Sequential()

model.add(Conv2D(64, kernel_size = 3, activation = 'relu', input_shape = (28,28,1)))
model.add(Conv2D(32, kernel_size = 3, activation = 'relu'))
model.add(Flatten())
model.add(Dense(10, activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy', 
                    optimizer = 'adam',
                    metrics = ['accuracy'])

In [16]:
model.fit(X_train, Y_train, validation_data = (X_valid, Y_valid), epochs = 5, batch_size = 64)

Train on 31500 samples, validate on 10500 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x10dcda510>

In [20]:
testing_preds = model.predict_classes(X_test)

In [22]:
data = {'ImageID' : list(range(1,28001)), 'Label' : testing_preds}
preds = pd.DataFrame(data)
preds.head()

Unnamed: 0,ImageID,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3


In [23]:
preds.to_csv('Predictions.csv')