<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">MNIST: DNN</h1>
</div>

Problem Type: Multi-classification

## Evaluation Metric

In [1]:
# Black formatter https://black.readthedocs.io/en/stable/

! pip install nb-black > /dev/null

%load_ext lab_black

[0m

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Configuration</h1>
</div>

In [2]:
TARGET = "label"

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Import Libraries</h1>
</div>

A best practise is to include all libraries here.  However, I will put a few imports farther down where they are first used so beginners can learn with an "as needed" approach.

In [3]:
import os
import time
from pathlib import Path

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Library</h1>
</div>

Creating a few functions that we will reuse in each project.

In [4]:
def read_data(path):
    data_dir = Path(path)

    train = pd.read_csv(data_dir / "train.csv")
    test = pd.read_csv(data_dir / "test.csv")
    submission_df = pd.read_csv(data_dir / "sample_submission.csv")

    print(f"train data: Rows={train.shape[0]}, Columns={train.shape[1]}")
    print(f"test data : Rows={test.shape[0]}, Columns={test.shape[1]}")
    return train, test, submission_df

In [5]:
def create_submission(model_name, target, preds, is_log_target=False):
    if is_log_target:
        #         preds = np.exp(preds)
        preds = np.expm1(preds)

    sample_submission[target] = preds
    if len(model_name) > 0:
        sample_submission.to_csv(f"submission_{model_name}.csv", index=False)
    else:
        sample_submission.to_csv(f"submission.csv", index=False)

    return sample_submission[:5]

In [6]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, f1_score


def show_scores(gt, yhat):
    f1 = f1_score(gt, yhat, average="macro")
    mse = mean_squared_error(gt, yhat)
    mae = mean_absolute_error(gt, yhat)
    rmse = np.sqrt(mse)
    #     r2_square = r2_score(gt, yhat)

    print(f"f1: {f1:.4f}")

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Load Train/Test Data</h1>
</div>

- train.csv - Data used to build our machine learning model
- test.csv - Data used to build our machine learning model. Does not contain the target variable
- sample_submission.csv - A file in the proper format to submit test predictions

In [7]:
train, test, sample_submission = read_data("../input/digit-recognizer")

train data: Rows=42000, Columns=785
test data : Rows=28000, Columns=784


In [8]:
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
FEATURES = [
    col
    for col in train.columns
    if col
    not in [
        "Id",
        TARGET,
        #         TARGET_ENC,
    ]
]

In [10]:
y = train[TARGET]
X = train[FEATURES].copy()

X_test = test[FEATURES].copy()

In [11]:
from keras.utils.np_utils import to_categorical

y = to_categorical(y)

In [12]:
y[:5]

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [13]:
X_train, X_valid, y_train, y_valid = train_test_split(
    X,
    y,
    test_size=0.1,
    random_state=42,
)
X_train.shape, y_train.shape, X_valid.shape, y_valid.shape

((37800, 784), (37800, 10), (4200, 784), (4200, 10))

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Create Models</h1>
</div>

In [14]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


def get_tf_model01(x_shape):

    inputs = keras.Input(shape=x_shape)

    x = keras.layers.Dense(128, activation="relu")(inputs)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dense(128, activation="relu")(x)
    x = keras.layers.BatchNormalization()(x)
    #     x = keras.layers.Dropout(0.1)(x)

    outputs = keras.layers.Dense(10, activation="softmax")(x)

    model = keras.Model(inputs, outputs)

    model.compile(
        loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"]
    )

    #     model.compile(
    #         optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    #         loss="mean_absolute_error",
    #     )

    return model

In [15]:
X_train.shape[1]

784

In [16]:
X.shape[1]

784

In [17]:
# Normalize the data
# X_train = X_train / 255.0
# test = test / 255.0

In [18]:
%%time
model = get_tf_model01(x_shape=(X.shape[1]))

history = model.fit(
    X_train,
    y_train,
    batch_size=64,
    epochs=10,
    # Suppress logging.
    verbose=1,
    # Calculate validation results on 20% of the training data.
    #     validation_split = 0.2
)

2022-07-15 00:56:13.275901: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-15 00:56:13.386762: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-15 00:56:13.387681: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-15 00:56:13.389511: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 29.8 s, sys: 4.03 s, total: 33.8 s
Wall time: 28.7 s


## Predict and Score

In [19]:
valid_preds = model.predict(X_valid)
valid_preds[:5]

array([[3.5294960e-08, 7.6129481e-07, 4.0059689e-07, 2.9409034e-07,
        1.9413487e-07, 7.9267597e-09, 3.5857906e-07, 6.3959689e-08,
        9.9999738e-01, 5.3223732e-07],
       [6.4648241e-08, 9.9987519e-01, 1.7589974e-06, 1.5814105e-07,
        3.5688012e-05, 4.2598111e-07, 3.2369947e-08, 1.9513927e-05,
        6.6943809e-05, 1.9999025e-07],
       [8.4657431e-10, 5.0637459e-09, 2.1418360e-09, 4.4683759e-08,
        1.0365173e-06, 1.4410257e-06, 1.2898773e-09, 6.3459120e-05,
        1.0226434e-08, 9.9993396e-01],
       [6.2426647e-10, 1.7996646e-10, 3.7765419e-11, 7.5864946e-08,
        1.8739626e-06, 9.5306127e-09, 5.1494390e-12, 8.1076105e-05,
        3.2984588e-08, 9.9991691e-01],
       [1.3001706e-07, 3.6772665e-08, 2.5288587e-09, 5.1354370e-09,
        2.1925255e-06, 4.8164422e-10, 1.1838229e-06, 4.9353157e-11,
        9.9999630e-01, 1.7198674e-07]], dtype=float32)

In [20]:
valid_preds = np.argmax(valid_preds, axis=1)

In [21]:
# show_scores(y_valid, valid_preds)



In [22]:
preds = model.predict(X_test)
preds[:5]

array([[1.1266848e-06, 3.4412952e-08, 9.9998927e-01, 2.7214505e-06,
        2.3976713e-06, 1.4856602e-08, 1.4521358e-07, 1.0043669e-07,
        7.3772486e-07, 3.4218824e-06],
       [9.9965560e-01, 5.7126005e-08, 1.8620793e-07, 5.3711693e-07,
        4.0611430e-09, 3.4031938e-04, 1.9647530e-07, 2.9371004e-06,
        1.0388989e-07, 1.4124012e-07],
       [1.8710606e-06, 1.1740385e-06, 2.9326889e-06, 6.9562766e-06,
        2.2163674e-02, 8.1027438e-06, 8.4065950e-06, 2.5110625e-05,
        8.6222007e-04, 9.7691953e-01],
       [4.2592897e-04, 1.2184404e-08, 2.2432692e-05, 2.4536046e-06,
        2.0652291e-05, 1.0581384e-09, 1.7538533e-06, 2.1053406e-06,
        3.9830013e-07, 9.9952435e-01],
       [3.5166337e-07, 4.7124679e-05, 2.0887093e-05, 9.9964285e-01,
        3.7093815e-07, 1.0267143e-04, 2.1928932e-07, 8.1121465e-07,
        1.5182435e-04, 3.2960568e-05]], dtype=float32)

In [23]:
preds = np.argmax(preds, axis=1)

## Submission

In [24]:
# Different TARGET name used in test.csv
ss = create_submission("", "Label", preds)
ss

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
