In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from keras.models import Sequential
from keras.layers import Conv2D, Lambda, MaxPooling2D # convolution layers
from keras.layers import Dense, Dropout, Flatten # core layers

from keras.layers.normalization import BatchNormalization

from keras.preprocessing.image import ImageDataGenerator

from keras.utils.np_utils import to_categorical

#from keras.datasets import mnist

In [9]:
#loading data
test_data = pd.read_csv(r'C:\Users\Admin\Desktop\test.csv')
train_data = pd.read_csv(r'C:\Users\Admin\Desktop\train.csv')

sub = pd.read_csv(r'C:\Users\Admin\Desktop\sample.csv')

In [3]:
test.shape

(28000, 784)

In [5]:
from keras.optimizers import RMSprop
from keras.callbacks import ReduceLROnPlateau
from keras.datasets import mnist

In [6]:
(x_train1, y_train1), (x_test1, y_test1) = mnist.load_data()
x_train1 = np.concatenate((x_test1, x_train1))
y_train1 = np.concatenate((y_test1, y_train1))

x_train1 = x_train1.reshape((x_train1.shape[0], 28, 28, 1))
print(x_train1.shape, y_train1.shape)

(70000, 28, 28, 1) (70000,)


In [10]:
x = np.array(train_data.drop(['label'], axis = 1))
y = np.array(train_data['label'])
test_data = np.array(test_data)

# building the input vector from the 28x28 pixels
x = x.reshape((x.shape[0], 28, 28, 1))
test_data = test_data.reshape(test_data.shape[0], 28, 28, 1)

x = np.concatenate((x, x_train1))
y = np.concatenate((y, y_train1))


# normalizing the data to help with the training
x = x/255
test_data = test_data/255

# one-hot encoding using keras numpy-related utilities
y = to_categorical(y, num_classes = 10)

print(x.shape, y.shape)

(112000, 28, 28, 1) (112000, 10)


##### TRAIN-TEST SPLIT

In [11]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.10, shuffle = True)
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(100800, 28, 28, 1) (100800, 10) (11200, 28, 28, 1) (11200, 10)


In [12]:
# building a linear stack of layers with the sequential model
model = Sequential()
# convolutional layer
model.add(Conv2D(filters = 32, kernel_size = (3,3), activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (3,3), activation ='relu'))
model.add(MaxPool2D((2,2)))

model.add(BatchNormalization())

# convolutional layer
model.add(Conv2D(filters = 64, kernel_size = (3,3), activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3), activation ='relu'))

model.add(BatchNormalization())

# convolutional layer
model.add(Conv2D(filters = 64, kernel_size = (3,3), activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3), activation ='relu'))

model.add(BatchNormalization())

# flatten output of conv
model.add(Flatten())

# hidden layers
model.add(Dense(128, activation = "relu"))
model.add(Dropout(0.30))
model.add(Dense(10, activation = "softmax"))


optimizer = RMSprop(lr = 0.01, rho = 0.9, epsilon = 1e-08, decay = 0.0)

# compiling the sequential model
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 32)        9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 12, 12, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 10, 10, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 8, 64)          36928     
_________________________________________________________________
batch_normalization_1 (Batch (None, 8, 8, 64)          2

##### IMAGE AUGMENTATION

In [13]:
datagen = ImageDataGenerator(
        rotation_range = 10,
        zoom_range = 0.1,
        width_shift_range = 0.1,
        height_shift_range = 0.1,)

train_batch = datagen.flow(x, y, batch_size = 64)
val_batch = datagen.flow(x_test, y_test, batch_size = 64)

#### LEARNING RATE REDUCTION

In [14]:
learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_loss', 
                                            patience = 3, 
                                            verbose = 1, 
                                            factor = 0.1, 
                                            min_lr = 0.00001)

## backpropagation

In [17]:
# training the model for 20 epochs
history = model.fit_generator(generator = train_batch,
                              epochs = 20, 
                              steps_per_epoch = len(train_batch),
                              validation_data = val_batch,
                              validation_steps = len(val_batch),
                              verbose = 1,
                             callbacks = [learning_rate_reduction])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 00004: ReduceLROnPlateau reducing learning rate to 9.999999310821295e-05.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 00012: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


##### OUTPUT

In [18]:
res = model.predict_classes(test_data, batch_size = 64)
result = pd.Series(res, name = 'Label')
submission = pd.concat([pd.Series(range(1, 28001), name = 'ImageId'), result], axis = 1)
submission.to_csv('digit01.csv', index = False)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [20]:
res = model.evaluate(x, y, batch_size = 1024)
print(res[1]*100)

99.33571219444275
