In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
#import seaborn as sn
from sklearn.model_selection import train_test_split

from random import seed
seed(1)
seed = 43

import tensorflow as tf
from tensorflow import keras
print("Tensorflow Version: ", tf.__version__)
print("Keras Version: ",keras.__version__)


kaggle = 1 # Kaggle path active = 1

# change your local path here
if kaggle == 1 :
    MNIST_PATH= '../input/digit-recognizer'
else:
    MNIST_PATH= '../Digit_Recognition_with_a_Deep_Neural_Network/data/input/digit-recognizer'



import os
for dirname, _, filenames in os.walk(MNIST_PATH): 
    for filename in filenames:
        print(os.path.join(dirname, filename))
        

# Introduction - MNIST Training Competition
Link to the topic: https://www.kaggle.com/c/digit-recognizer/data

This is another Notebook to take a look into annother algorithm. Here I want to give the Deep Neural Network with the Framework Keras a try. As already mentioned in other notebooks, I will skip some explanations about the data set here. Moreover I will use the already discovered knowledge about the data and transform/prepare the data rightaway.

If you are interested in some more clearly analysis of the dataset take a look into my other notebooks about the MNIS-dataset:
- Another MNIST Try: https://www.kaggle.com/skiplik/another-mnist-try
- First NN by Detecting Handwritten Characters: https://www.kaggle.com/skiplik/first-nn-by-detecting-handwritten-characters
...




# Get Data

In [None]:
# Data path and file
CSV_FILE_TRAIN='train.csv'
CSV_FILE_TEST='test.csv'
 
def load_mnist_data(minist_path, csv_file):
    csv_path = os.path.join(minist_path, csv_file)
    return pd.read_csv(csv_path)

def load_mnist_data_manuel(minist_path, csv_file):
    csv_path = os.path.join(minist_path, csv_file)
    csv_file = open(csv_path, 'r')
    csv_data = csv_file.readlines()
    csv_file.close()
    return csv_data

def split_train_val(data, val_ratio):
    return 
    

train = load_mnist_data(MNIST_PATH,CSV_FILE_TRAIN)
test = load_mnist_data(MNIST_PATH,CSV_FILE_TEST)

In [None]:
y = train['label'].copy()
X = train.drop(['label'], axis=1)

X_test = test.copy()

## Train / Val Split

In [None]:
print("Shape of the Features: ",X.shape)
print("Shape of the Labels: ", y.shape)

### Label Value Count
Visualizing the label distribution of the full train dataset.

In [None]:
train.value_counts('label')

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=seed, test_size=0.15
                                                  , stratify=y
                                                 )

Comparing the equally splitted train- and val-sets based on the given label y.

In [None]:
print("Train - Set Distribution")
print(y_train.value_counts() / y_train.value_counts().sum() )
print('--------------------------------------------------------------')
print('--------------------------------------------------------------')
print('--------------------------------------------------------------')
print("Val - Set Distribution")
print(y_val.value_counts() / y_val.value_counts().sum() )


In [None]:
print("X: ", X.shape)
print("X_train: ", X_train.shape)
print("X_val: ", X_val.shape)

print("y_train: ", y_train.shape)
print("y_val: ", y_val.shape)

## Building Transforming Piplines

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler

pipeline = Pipeline([
    ('normalizer', Normalizer())
    #('std_scalar',StandardScaler())
])

In [None]:
X_train_prep = pipeline.fit_transform(X_train)      # fitting the pipeline to the train and transform it
X_val_prep = pipeline.transform(X_val)              # transform val data with this information

# Building a Deep Neural Network based on RandomizedSearch

## Preparing Model Visualization with Tensorboard (not for Kaggle)

In [None]:
root_logdir = "../../tensorboard-logs"

print("Relative root_logdir: ",root_logdir)

def get_run_logdir():
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir,run_id)

In [None]:
run_logdir = get_run_logdir()
print("Current run logdir for Tensorboard: ", run_logdir)

In [None]:
run_logdir

### Keras Callbacks for Tensorboard
With Keras there is a way of using Callbacks for the Tensorboard to write log files for the board and visualize the different graphs (loss and val curve)


In [None]:
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

## Building Model Architecture

### Architecture for Hyperparameter Optimization
- Amount of Layers
- Amount of Neurons
- Learningrate
- Checkpoints
- Early Stopping 

In [None]:
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[784]):
    model = keras.models.Sequential()                               # base model structure (Sequential API by Keras)

    model.add(keras.layers.InputLayer(input_shape=input_shape))     # input layer

    for layer in range(n_hidden):                                   # add layers as often as defined in constructor 
        model.add(keras.layers.Dense(n_neurons,activation="relu"))  # add layer with given neurons and relu activation function

    model.add(keras.layers.Dense(10, activation="softmax"))                               # add output layer 

    optimizer = keras.optimizers.SGD(learning_rate=learning_rate)   # define optimizer (especially the larning rate for hyperparameter optimization)

    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])                  # make it ready

    return model


In [None]:
# Using keras wrapper as hull 
keras_cl = keras.wrappers.scikit_learn.KerasClassifier(build_model)

### Hyperparameter Space

In [None]:
from scipy.stats import reciprocal

# Hyperparameter set
param_dist= {
            "n_neurons": range(20, 500, 20)
            ,"n_hidden": range(10, 100, 10)
            ,"learning_rate": [1e-3, 2e-3]
    }


param_dist_lr= {
        "n_neurons": [10, 50, 100, 150, 300]
        ,"n_hidden": [10, 50, 100, 150]
        ,"learning_rate": [1e-3, 3e-4, 3e-2]
}


param_dist_bestrun_1 = {
        "n_neurons": [150]
        ,"n_hidden": [30]
        ,"learning_rate": [2e-3]  
}


param_dist_bestrun_2 = {
        "n_neurons": [100]
        ,"n_hidden": [10]
        ,"learning_rate": [2e-3]  
}

## Model Checkpoints

In [None]:
checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cl_model.h5", save_best_only=True, save_weights_only=False)

## Model Training

### Randomized Search
Finding best hyperparameters with Randomized search

In [None]:
from sklearn.model_selection import RandomizedSearchCV

ran_ker_cl = RandomizedSearchCV(keras_cl, param_dist_lr, n_iter=10, n_jobs=5, cv=3, random_state=seed, return_train_score=True)
history_ker_cl = ran_ker_cl.fit(X_train_prep, y_train, epochs=50, validation_data=(X_val_prep, y_val), callbacks=[checkpoint_cb, keras.callbacks.EarlyStopping(patience=5), tensorboard_cb])

In [None]:
history_ker_cl.best_params_

In [None]:
history_ker_cl

### Model Training with Full Dataset 
In this part I will train the model with the full dataset. This time I will use the discovered hyperparameters from the randomized search from the previous part.

Based on the hyperparameter search the following parameters were found:
- n_neurons = 150
- n_hidden = 10
- learning_rate = 0.03

In [None]:
# Creating wrapped regression model with our function. 
keras_cl_model = keras_cl.build_fn(n_neurons= 150, n_hidden= 10, learning_rate=0.03)

In [None]:
keras_cl_model.summary()

In [None]:
# creating a new log dir for tensorboard
tensorboard_cb_f = keras.callbacks.TensorBoard(get_run_logdir())
checkpoint_cb_f = keras.callbacks.ModelCheckpoint("my_keras_cl_model.h5", save_best_only=False, save_weights_only=False)

In [None]:
# preparing data based on our beautifull trained data pipeline
X_prep_all = pipeline.transform(X)

In [None]:
# Train the model again pleeeeease with all you got .... especially the new transformed data matrix X 
keras_cl_model.fit(X_prep_all, y, epochs=100, callbacks=[tensorboard_cb_f, checkpoint_cb_f])

# Image Prediction of Unknown Data (Test Data)

## Peparing Test Data
The test data for the competition needs to be prepared as well as did with the training data set. Therefore the trained pipeline (trained only on the training dataset) will be used.

In [None]:
X_test_prep = pipeline.transform(X_test)

In [None]:
X_test_prep

## Creating Competition File

In [None]:
mnist_competition_file = pd.DataFrame(columns=['ImageId','Label'])

## Prediction of Testdata

In [None]:
plt.imshow(X_test_prep[43].reshape(28,28), cmap='Greys')

In [None]:
print("Propability of all lables for given pixels: ", keras_cl_model.predict(X_test_prep[43].reshape(1,-1)))

In [None]:
print("Predicted Digit: ",np.argmax(keras_cl_model.predict(X_test_prep[43].reshape(1,-1))))

In [None]:
i = 1
for row in X_test_prep:
    index = i
    predicted_label = np.argmax(keras_cl_model.predict(row.reshape(1,-1)))

    mnist_competition_file = mnist_competition_file.append({'ImageId': index, 'Label': predicted_label}, ignore_index = True )
    i = i + 1
    pass

In [None]:
mnist_competition_file

In [None]:
mnist_competition_file.ImageId = mnist_competition_file.ImageId.astype(int)
mnist_competition_file.Label = mnist_competition_file.Label.astype(int)

In [None]:
mnist_competition_file.to_csv('mnist_submission.csv', index=False)