## Loading data

In [None]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

train_df = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_df = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [None]:
train_df.head()

In [None]:
train_target = train_df['label']
train_features = train_df.drop(['label'], axis=1)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

## plot The number of digit classes

In [None]:
plt.figure(figsize=(15, 7))
sns.countplot(x=train_target)
plt.title("The number of digit classes")

In [None]:
img = train_features.iloc[0].values
img = img.reshape((28, 28))
plt.imshow(img, cmap='gray')
plt.title(train_target[0])
plt.axis('off')
plt.show()

In [None]:
train_features = train_features/255.0

In [None]:
# before train_features type: DataFrame
print(train_features.shape, type(train_features))

# after train_features type: ndarray
train_features = train_features.values.reshape(-1, 28, 28, 1)
print(train_features.shape, type(train_features))

## One-hot Encoding

In [None]:
from keras.utils.np_utils import to_categorical 
train_target = to_categorical(train_target, num_classes=10)

In [None]:
# split train_df into train data and validation data for the fitting
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train_features, train_target, test_size=0.1,\
                                                   random_state=156)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from tensorflow import keras
import tensorflow as tf

In [None]:
# SELU has self-normalization property with lecun_normal in DNN
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=X_train.shape[1:]),
    keras.layers.Dense(70, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(50, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(30, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.AlphaDropout(rate=0.5),
    keras.layers.Dense(10, activation='softmax', kernel_initializer='glorot_uniform')
])

## schedule learning rate

In [None]:
K = keras.backend

class ExponentialLearningRate(keras.callbacks.Callback):
    def __init__(self, factor):
        self.factor = factor
        self.rates = []
        self.losses = []
    def on_batch_end(self, batch, logs):
        self.rates.append(K.get_value(self.model.optimizer.lr))
        self.losses.append(logs["loss"])
        K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)

def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):
    init_weights = model.get_weights()
    iterations = np.math.ceil(len(X) / batch_size) * epochs
    factor = np.exp(np.log(max_rate / min_rate) / iterations)
    init_lr = K.get_value(model.optimizer.lr)
    K.set_value(model.optimizer.lr, min_rate)
    exp_lr = ExponentialLearningRate(factor)
    history = model.fit(X, y, epochs=epochs, batch_size=batch_size,
                        callbacks=[exp_lr])
    K.set_value(model.optimizer.lr, init_lr)
    model.set_weights(init_weights)
    return exp_lr.rates, exp_lr.losses

def plot_lr_vs_loss(rates, losses):
    plt.plot(rates, losses)
    plt.gca().set_xscale('log')
    plt.hlines(min(losses), min(rates), max(rates))
    plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 2])
    plt.xlabel("Learning rate")
    plt.ylabel("Loss")
    print('proper_rate:',min(rates))

In [None]:
batch_size=32
n_epochs=30

In [None]:
# class is identical so we use categorical_crossentropy
model.compile(optimizer=keras.optimizers.Nadam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
rates, losses = find_learning_rate(model, X_train, y_train, epochs=1, batch_size=batch_size)
plot_lr_vs_loss(rates, losses)

## Model Fit

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=batch_size, 
                    epochs=n_epochs, callbacks=[keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)])

## plot accuracy curve

In [None]:
plt.plot(history.history['val_accuracy'], label='validation accuracy')
plt.plot(history.history['accuracy'], label='accuracy')
plt.title('Test accuracy')
plt.xlabel("Number of Epochs")
plt.ylabel('accuracy')
plt.legend()
plt.show()

## submit answer

In [None]:
test_df = test_df/255.0
test_df = test_df.values.reshape(-1, 28, 28, 1)

In [None]:
# predict results
results = model.predict(test_df)

# select index with the maximum probability
results = np.argmax(results,axis = 1)


my_submission = pd.DataFrame({'ImageId': range(1,28001),
                             'Label': results})
my_submission.to_csv('/kaggle/working/submission.csv', index=False)