In [None]:
# Import
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.utils.np_utils import to_categorical 
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Load data
train = pd.read_csv('../input/digit-recognizer/train.csv')
test = pd.read_csv('../input/digit-recognizer/test.csv')
subs =  pd.read_csv('../input/digit-recognizer/sample_submission.csv')

In [None]:
# Find null-values
print('Null values in train data: ' + str(train.isnull().any().sum()))
print('Null values in test data: ' + str(test.isnull().any().sum()))

In [None]:
# Split data for train and prediction datasets
X = train.drop('label', axis=1).values
y = train.label.values
X_pred = test.values
X = X / 255
X_pred = X_pred / 255

In [None]:
# Show 5 random samples from train data
nn = 5
for i, num in enumerate([random.randint(1, X.shape[0]) for _ in range(nn)]):
    plt.subplot(1, nn, i+1)
    plt.xlabel(y[num])
    plt.imshow(X[num].reshape(28, 28))

In [None]:
# Data shape
print(X.shape)
print(y.shape)
print(X_pred.shape)

In [None]:
# Convert data
X = X.reshape([42000,28,28,1])
y = y.reshape([42000,1])
X_pred = X_pred.reshape([28000,28,28,1])
y = to_categorical(y, num_classes = 10)

In [None]:
# Split X and y data for train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1)

In [None]:
# Make model
model = tf.keras.Sequential([])
model.add(tf.keras.layers.Conv2D(32, (3, 3), padding = 'same', activation='relu', input_shape=(28, 28, 1)))
model.add(tf.keras.layers.MaxPooling2D(2, 2)),
model.add(tf.keras.layers.Conv2D(64, (3, 3), padding = 'same', activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2, 2)),
model.add(tf.keras.layers.Conv2D(128, (3, 3), padding = 'same', activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2, 2)),
model.add(tf.keras.layers.Conv2D(128, (3, 3), padding = 'same', activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2, 2)),
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
print(model.summary())

In [None]:
# Complile model
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy']
             )

In [None]:
#Add Data Generator
datagen = ImageDataGenerator(rotation_range=20,
                             zoom_range=0.2
                            )
datagen.fit(X_train)

In [None]:
# Fit model
#model.fit(X_train, y_train, validation_data = (X_test,y_test), epochs=10)
model.fit_generator(datagen.flow(X_train, y_train, batch_size=32),
                    steps_per_epoch=len(X_train) / 32, epochs=10)

In [None]:
# Evaluation
model.evaluate(X_test,y_test)

In [None]:
# Predict data
predictions = model.predict(X_pred)
y_pred = np.argmax(predictions, axis=1)

In [None]:
# Show 5 random prediction
nn = 5
for i, num in enumerate([random.randint(1, X_pred.shape[0]) for _ in range(nn)]):
    plt.subplot(1, nn, i+1)
    plt.xlabel(y_pred[num])
    plt.imshow(X_pred[num].reshape(28, 28))    

In [None]:
# Save data
subs['Label'] = pd.DataFrame(y_pred)
subs.to_csv('./DRSubmissions.csv', index = False)