In [None]:
import numpy as np 
import pandas as pd 

# Import Tensorflow modules
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential

import os
import re
from matplotlib import pyplot as plt

In [None]:
# Read the files :) 
for dirname, _, filenames in os.walk('/content/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
BATCH_SIZE=128

In [None]:
def read_input_data(fullpath):
    data = pd.read_csv(fullpath)
    return data

In [None]:
# Define paths where data for training and testing are.
train_path = "/kaggle/input/digit-recognizer/train.csv"
test_path =  "/kaggle/input/digit-recognizer/test.csv"

In [None]:
# read the data from the training path 
raw_data = read_input_data(train_path)
# get size of dataset
dataset_size = raw_data.shape[0]
# get training size to be 80% of all data
train_size = int(0.8 * dataset_size)
val_size = int(0.2 * dataset_size)
# split between training set and validation set
train_features = raw_data.iloc[:train_size,1:]
train_labels = raw_data.iloc[:train_size, :1]
# get validation set features and labels
val_features = raw_data.iloc[train_size:,1:]
val_labels = raw_data.iloc[train_size:,0]

In [None]:
print(train_features.shape)
print(train_labels.shape)

In [None]:
print(val_features.shape)
print(val_labels.shape)

In [None]:
training_ds = tf.convert_to_tensor(train_features, dtype=tf.float32)
training_ds = tf.reshape(training_ds,[train_features.shape[0],28,28])
train_dataset = tf.data.Dataset.from_tensor_slices((training_ds, train_labels))
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.shuffle(1000)
train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=True)

In [None]:
# validation dataset to tensor
validation_ds = tf.convert_to_tensor(val_features, dtype=tf.float32)
validation_ds = tf.reshape(validation_ds,[val_features.shape[0],28,28])
val_dataset = tf.data.Dataset.from_tensor_slices((validation_ds, val_labels))
val_dataset = val_dataset.repeat()
val_dataset = val_dataset.shuffle(1000)
val_dataset = val_dataset.batch(BATCH_SIZE,  drop_remainder=True)

In [None]:
# read the data for testing using Pandas read_csv
test_ds = pd.read_csv(test_path).values

In [None]:
def reshape_test_data():
    # get size of the test dataset
    len_test_ds = test_ds.shape[0]
    print(len_test_ds)
    # Convert the data to be a tensor 
    test_data = tf.convert_to_tensor(test_ds, dtype=tf.float32)
    # reshape the test dataset to be 28000 x 28 x 28 
    test_data = tf.reshape(test_data,[len_test_ds,28,28])
    test_data = tf.data.Dataset.from_tensor_slices((test_data))
    test_data = test_data.batch(BATCH_SIZE)
    return test_data

In [None]:
test_data = reshape_test_data()

In [None]:
# let's check the first features ( labels  + pixels)
for features, label in train_dataset.take(1):
    print(label[:3])
    for i, feature in enumerate(features):
        if i>1:
            break
        print(f" {feature}")

In [None]:
def make_features(features, labels):
    image = tf.stack(features, axis=0)
    image = tf.reshape(image, [-1,28,28,1])

    return image, labels

In [None]:
train_data = train_dataset.map(make_features)
train_data = train_data.cache()

val_data = val_dataset.map(make_features)
val_data = val_data.cache()

In [None]:
print(train_data.cardinality)

In [None]:
for features,labels in train_dataset.take(1):
    print(features[0][0])
    print(label[0])
    break

In [None]:
def show_images(img_batch, label_batch):
    plt.figure(figsize=(4,4), dpi=120)

    # Show 4 images in a row from the batch passed by args
    for n in range(4):
        plt.subplot(1,4,n+1)
        plt.imshow(img_batch[:4][n])
        plt.title(label_batch[n].numpy())
        plt.axis('off')

In [None]:
# Let's explore the data 
img_batch, label_batch = next(iter(train_dataset))
show_images(img_batch, label_batch)

In [None]:
model = Sequential([
    layers.experimental.preprocessing.Rescaling(1./255,input_shape=(28,28,1), name='Rescale_layer'),
    layers.Conv2D(16, 3, padding='same', activation='relu', name='conv2d_1'),
    layers.MaxPooling2D(),
    layers.Dropout(0.3),
    layers.Conv2D(32, 3, padding='same', activation='relu', name='conv2d_2'),
    layers.MaxPooling2D(),
    layers.Dropout(0.3),
    layers.Conv2D(64, 3, padding='same', activation='relu', name='conv2d_3'),
    layers.MaxPooling2D(),
    layers.Dropout(0.3),
    layers.Flatten(name='flatten'),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
epochs = 30

In [None]:
history = model.fit(
    train_data,
    epochs=epochs,
    steps_per_epoch=int(train_size//BATCH_SIZE),
    validation_data=val_data,
    validation_steps=int(val_size//BATCH_SIZE)
)

In [None]:
predictions = model.predict(test_data)

In [None]:
preds = np.argmax(predictions, axis=1)

In [None]:
test_batch = next(iter(test_data))

In [None]:
plt.figure(figsize=(4,4), dpi=120)

# Show 4 images in a row from the batch passed by args
for n in range(4):
    plt.subplot(1,4,n+1)
    plt.imshow(test_batch[:4][n])
    plt.title(f"Pred:{preds[n]}")
    plt.axis('off')

In [None]:
test_dataset_size = preds.shape[0]
test_dataset_size

In [None]:
ids = np.arange(1, test_dataset_size+1)
len(ids)

In [None]:
# Create CSV with Predictions 
pred_df = pd.DataFrame({'ImageId': ids,'Label': preds})
pred_df.head()

In [None]:
pred_df.to_csv('lu_submission.csv', index=False)