<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">MNIST: CNN</h1>
</div>

Problem Type: Multi-classification

## Evaluation Metric

In [1]:
# Black formatter https://black.readthedocs.io/en/stable/

! pip install nb-black > /dev/null

%load_ext lab_black

[0m

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Configuration</h1>
</div>

In [2]:
TARGET = "label"

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Import Libraries</h1>
</div>

A best practise is to include all libraries here.  However, I will put a few imports farther down where they are first used so beginners can learn with an "as needed" approach.

In [3]:
import os
import time
from pathlib import Path

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Library</h1>
</div>

Creating a few functions that we will reuse in each project.

In [4]:
def read_data(path):
    data_dir = Path(path)

    train = pd.read_csv(data_dir / "train.csv")
    test = pd.read_csv(data_dir / "test.csv")
    submission_df = pd.read_csv(data_dir / "sample_submission.csv")

    print(f"train data: Rows={train.shape[0]}, Columns={train.shape[1]}")
    print(f"test data : Rows={test.shape[0]}, Columns={test.shape[1]}")
    return train, test, submission_df

In [5]:
def create_submission(model_name, target, preds, is_log_target=False):
    if is_log_target:
        #         preds = np.exp(preds)
        preds = np.expm1(preds)

    sample_submission[target] = preds
    if len(model_name) > 0:
        sample_submission.to_csv(f"submission_{model_name}.csv", index=False)
    else:
        sample_submission.to_csv(f"submission.csv", index=False)

    return sample_submission[:5]

In [6]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, f1_score


def show_scores(gt, yhat):
    f1 = f1_score(gt, yhat, average="macro")
    mse = mean_squared_error(gt, yhat)
    mae = mean_absolute_error(gt, yhat)
    rmse = np.sqrt(mse)
    #     r2_square = r2_score(gt, yhat)

    print(f"f1: {f1:.4f}")

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Load Train/Test Data</h1>
</div>

- train.csv - Data used to build our machine learning model
- test.csv - Data used to build our machine learning model. Does not contain the target variable
- sample_submission.csv - A file in the proper format to submit test predictions

In [7]:
train, test, sample_submission = read_data("../input/digit-recognizer")

train data: Rows=42000, Columns=785
test data : Rows=28000, Columns=784


In [8]:
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
FEATURES = [
    col
    for col in train.columns
    if col
    not in [
        "Id",
        TARGET,
        #         TARGET_ENC,
    ]
]

In [10]:
y = train[TARGET]
X = train[FEATURES].copy()

X_test = test[FEATURES].copy()

In [11]:
# Normalize the data
X = X / 255.0
X_test = X_test / 255.0

In [12]:
X = X.values.reshape(-1, 28, 28, 1)
X_test = X_test.values.reshape(-1, 28, 28, 1)

In [13]:
# X[:3]

In [14]:
from keras.utils.np_utils import to_categorical

y = to_categorical(y)

In [15]:
y[:5]

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [16]:
X_train, X_valid, y_train, y_valid = train_test_split(
    X,
    y,
    test_size=0.1,
    random_state=42,
)
X_train.shape, y_train.shape, X_valid.shape, y_valid.shape

((37800, 28, 28, 1), (37800, 10), (4200, 28, 28, 1), (4200, 10))

<div style="background-color:rgba(128, 0, 128, 0.6);border-radius:5px;display:fill"><h1 style="text-align: center;padding: 12px 0px 12px 0px;">Create Models</h1>
</div>

In [17]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


def get_tf_model01(x_shape):
    print(f"x_shape=")
    #     input_shape = (height, width, channels)
    inputs = keras.Input(shape=(28, 28, 1))

    x = keras.layers.Conv2D(
        128,
        kernel_size=2,
        activation="relu",
        padding="same",
        input_shape=(28, 28, 1),
        kernel_initializer="he_normal",
    )(inputs)
    x = keras.layers.BatchNormalization()(x)

    #     x = keras.layers.MaxPooling2d((2, 2))(x)
    #     x = keras.layers.Conv2D(
    #         128, activation="relu", padding="same", kernel_initializer="he_normal"
    #     )(x)
    x = keras.layers.Flatten()(x)
#     x = keras.layers.Dense(128, activation="relu")(x)

    x = keras.layers.Dense(64, activation="relu")(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dense(32, activation="relu")(x)
    x = keras.layers.BatchNormalization()(x)
    #     x = keras.layers.Dropout(0.1)(x)

    outputs = keras.layers.Dense(10, activation="softmax")(x)

    model = keras.Model(inputs, outputs)

    model.compile(
        loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"]
    )

    #     model.compile(
    #         optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    #         loss="mean_absolute_error",
    #     )

    return model

In [18]:
X_train.shape[1]

28

In [19]:
X.shape[0]

42000

In [20]:
model = get_tf_model01(x_shape=(28, 28))

model.summary()

x_shape=
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 128)       640       
_________________________________________________________________
batch_normalization (BatchNo (None, 28, 28, 128)       512       
_________________________________________________________________
flatten (Flatten)            (None, 100352)            0         
_________________________________________________________________
dense (Dense)                (None, 64)                6422592   
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)                256       
_________________________________________________________________
dense_1 (Dense)              (None, 32)             

2022-07-15 22:04:23.713616: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [21]:
%%time

history = model.fit(
    X_train,
    y_train,
    batch_size=64,
    epochs=10,
    # Suppress logging.
    verbose=1,
    # Calculate validation results on 20% of the training data.
    #     validation_split = 0.2
)

2022-07-15 22:04:24.194341: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 25min 55s, sys: 18.2 s, total: 26min 13s
Wall time: 7min 35s


## Predict and Score

In [22]:
valid_preds = model.predict(X_valid)
valid_preds[:5]

array([[1.69630438e-11, 1.37111911e-09, 2.90043030e-08, 1.67827682e-10,
        1.53796442e-08, 3.92691568e-09, 4.34656000e-12, 9.09796977e-12,
        1.00000000e+00, 4.94911889e-10],
       [1.38435274e-09, 1.00000000e+00, 4.79696949e-10, 2.64218884e-14,
        5.30585815e-08, 3.18331884e-12, 3.21631077e-09, 1.68878342e-10,
        4.45019820e-11, 6.73642947e-11],
       [6.72432350e-08, 4.15579393e-08, 1.36591805e-08, 1.32039006e-07,
        1.58085225e-08, 1.72120096e-08, 5.89978111e-09, 8.31810553e-07,
        4.37081894e-07, 9.99998450e-01],
       [3.30365197e-07, 4.67462002e-07, 2.53389913e-07, 2.18519745e-05,
        1.40253107e-08, 1.61280320e-06, 7.74594469e-08, 8.08203622e-05,
        4.38610869e-07, 9.99894142e-01],
       [1.57371102e-10, 8.67365138e-11, 1.73250494e-11, 3.08587270e-12,
        8.55125623e-11, 3.78603926e-10, 1.50718275e-11, 4.70434130e-13,
        1.00000000e+00, 1.92575109e-10]], dtype=float32)

In [23]:
valid_preds = np.argmax(valid_preds, axis=1)

In [24]:
# show_scores(y_valid, valid_preds)



In [25]:
preds = model.predict(X_test)
preds[:5]

array([[1.5586227e-10, 2.9588956e-11, 1.0000000e+00, 4.1281905e-11,
        6.4169205e-11, 4.5531279e-10, 4.1125513e-11, 2.2663273e-12,
        7.4930695e-12, 5.8308386e-11],
       [9.9954849e-01, 4.7405545e-05, 5.8210167e-06, 3.0203789e-06,
        3.7298655e-08, 3.7959695e-04, 1.7506418e-08, 1.4346149e-06,
        1.4226627e-05, 2.5906475e-09],
       [2.6832194e-09, 1.5041230e-08, 5.2734968e-09, 2.5591009e-09,
        1.9717612e-07, 7.7745895e-08, 1.0790237e-08, 1.3952309e-09,
        1.2788139e-04, 9.9987173e-01],
       [2.8816348e-02, 1.3915295e-04, 1.0586179e-01, 1.3859327e-04,
        2.8885523e-04, 7.7502962e-05, 5.2745761e-03, 2.8271940e-02,
        7.1850959e-06, 8.3112401e-01],
       [4.7463009e-11, 2.0004650e-07, 1.1857518e-07, 9.9999809e-01,
        2.6072064e-10, 1.5873356e-06, 8.5184221e-11, 2.3007155e-10,
        5.8234896e-08, 8.8400065e-10]], dtype=float32)

In [26]:
preds = np.argmax(preds, axis=1)

## Submission

In [27]:
# Different TARGET name used in test.csv
ss = create_submission("", "Label", preds)
ss

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
