# Libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks

# Data

In [2]:
train_data=pd.read_csv('../input/digit-recognizer/train.csv')
test_data=pd.read_csv('../input/digit-recognizer/test.csv')/255

print(train_data.shape)
train_data.head()

**Check for null values**

In [3]:
print('Number of null values in training set:',train_data.isnull().sum().sum())
print('')
print('Number of null values in test set:',test_data.isnull().sum().sum())

**Explore label distribution**

In [4]:
sns.countplot(x='label', data=train_data)
plt.title('Distribution of labels in training set')

**Labels and features**

In [5]:
y=train_data.label
y=pd.get_dummies(y)                       # one-hot encoding of labels

X=train_data.drop('label', axis=1)/255    # scale features to be in [0,1]

**Break off validation set**

In [6]:
X_train, X_valid, y_train, y_valid = train_test_split(X,y,train_size=0.8,
                                                             test_size=0.2,random_state=0)

# Model

In [7]:
model = keras.Sequential([
    
    # hidden layer 1
    layers.Dense(units=256, activation='relu', input_shape=[784]),
    layers.Dropout(rate=0.4),
    
    # hidden layer 2
    layers.Dense(units=256, activation='relu'),
    layers.Dropout(rate=0.2),
    
    # output layer
    layers.Dense(units=10, activation='softmax') # softmax returns a probability distribution
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)

# Train model

In [8]:
batch_size=int(len(X_train)/50)

history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=batch_size,
    epochs=100,
    callbacks=[early_stopping],
    verbose=False
)

**Learning curves**

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
history_df.loc[:, ['categorical_accuracy', 'val_categorical_accuracy']].plot(title="Accuracy")

print('Final accuracy on validation set:', 
      history_df.loc[len(history_df)-1,'val_categorical_accuracy'])

# Make predictions

In [None]:
test_data=pd.get_dummies(test_data)
preds=model.predict(test_data)
pred_classes = np.argmax(preds,axis=1)

# Save predictions to file
output = pd.DataFrame({'ImageId': test_data.index+1,
                       'Label': pred_classes})

# Check format
output.head()

In [None]:
output.to_csv('submission.csv', index=False)