In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing the libraries

In [2]:
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import datasets, layers, models

# Loading the data

In [3]:
pd_train = pd.read_csv('../input/digit-recognizer/train.csv')
pd_test = pd.read_csv('../input/digit-recognizer/test.csv')

x_train = pd_train.drop(['label'], axis=1)
y_train = pd_train['label']
x_test = pd_test

# Preprocessing 

In [4]:
x_train = x_train / 255.0
x_test = x_test / 255.0

## Linear dimensionality reduction using Singular Value Decomposition of the data

In [5]:
from sklearn.decomposition import PCA

def run_PCA(X, num_components):
    pca = PCA(n_components=num_components)
    pca.fit(X)
    pca_array = pca.transform(X)
    return pca, pca_array

## Split data set into random train and test subsets

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(x_train, y_train, test_size=0.2, shuffle=True, stratify=y_train, random_state=34)

# Building neural network

In [7]:
X_train = np.array(X_train).reshape(-1,28,28,1)
X_val = np.array(X_val).reshape(-1,28,28,1)
Y_train = np_utils.to_categorical(np.array(Y_train))
Y_val = np_utils.to_categorical(np.array(Y_val))

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(layers.Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(layers.MaxPool2D(pool_size=(2,2)))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
model.add(layers.Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(layers.Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
model.add(layers.Flatten())
model.add(layers.Dense(256, activation = "relu"))
model.add(layers.Dropout(0.5))
model.add(layers.BatchNormalization())
model.add(layers.Dense(10, activation = "softmax"))

model.summary()

In [9]:
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [None]:
history = model.fit(X_train, Y_train, validation_data = (X_val, Y_val), epochs=50, batch_size = 128)

# Predicting 

In [11]:
X_test = np.array(x_test).reshape(-1,28,28,1)

In [12]:
from keras.models import load_model
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred,1)
pd_submit = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
pd_submit['Label'] = y_pred
pd_submit.to_csv('submission.csv', header=True, index=False)

In [13]:
pd_submit