[![mnist.gif](https://i.postimg.cc/W1cwV9JR/mnist.gif)](https://postimg.cc/vcXVLtzX)

In [None]:
%%capture
!pip install --user torch==1.9.0 torchvision==0.10.0

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm import tqdm # for progress bar
from PIL import Image # for create images based on array

from fastai.vision.all import *
from fastai.metrics import *
from fastai.imports import *

# ignore warnings
%matplotlib inline
import warnings
warnings.simplefilter('ignore')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
# print all files under the input directory
INPUT_PATH = Path("../input/digit-recognizer")
INPUT_PATH.ls()

In [None]:
train_df = pd.read_csv(INPUT_PATH/"train.csv")
test_df = pd.read_csv(INPUT_PATH/"test.csv")
train_df.head(2)

In [None]:
# fastai dataloaders need images so we need to create images with appropriate folders from dataframe
# create empty directories for digit images
TRAINING_DATA_PATH = Path('./train')
TESTING_DATA_PATH = Path('./test')
for path in [TRAINING_DATA_PATH, TESTING_DATA_PATH]:
    for i in range(10):
        p = path / str(i)
        p.mkdir(parents=True, exist_ok=True)

In [None]:
def save_img(filepath, img_array):
    img = img_array.reshape(28,28)
    img = img.astype(np.uint8())
    img = Image.fromarray(img)
    img.save(filepath)
    
# save train images
for i, data in tqdm(train_df.iterrows()):
    label, data = data[0], data[1:]
    img_path = TRAINING_DATA_PATH/str(label)/f"{i}.png"
    save_img(img_path, data.values)
    
# save test images    
for i, data in tqdm(test_df.iterrows()):
    img_path = TESTING_DATA_PATH/str(label)/f"{i}.png"
    save_img(img_path, data.values)

In [None]:
dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                   get_items=get_image_files,
                   splitter=RandomSplitter(valid_pct=0.2, seed=42),
                   get_y=parent_label,
                   batch_tfms=aug_transforms(do_flip=False, mult=2))

dls = dblock.dataloaders("train", batch_size=64, num_workers=2, device=device)
dls.show_batch(max_n=10, nrows=1, figsize=(12,6))

In [None]:
learner = cnn_learner(dls, resnet34, metrics=accuracy)
learner.lr_find()

In [None]:
os.listdir('.')

In [None]:
learner.fine_tune(30, base_lr=0.01)

In [None]:
interp = ClassificationInterpretation.from_learner(learner)
interp.plot_confusion_matrix(figsize=(8,8), dpi=60)
interp.most_confused(min_val=5)

In [None]:
learner.show_results(max_n=10, nrows=1, figsize=(12,6))

### **Demo**

In [None]:
# %%capture

# !pip install gradio
# import gradio as gr

In [None]:
# labels = learner.dls.vocab

# def classify(img):
#     pred, pred_idx, probs = learner.predict(img)
#     return {labels[i]: float(probs[i]) for i in range(len(labels))}


# interface = gr.Interface(fn=classify, inputs="sketchpad", outputs=gr.outputs.Label(num_top_classes=5), live=True)
# interface.launch(debug=True, share=True)

### **Submission**

In [None]:
# # get predictions on test data
# test_images = get_image_files('test')
# test_dl = learner.dls.test_dl(test_images, with_label=True)
# pred_probas, _, pred_classes = learner.get_preds(dl=test_dl, with_decoded=True) 

In [None]:
# # create submission dataframe with ids and pred classes
# ids = [int(img.stem) + 1 for img in test_images]
# submission = pd.DataFrame(ids, columns=['ImageId'])
# submission['Label'] = pred_classes

In [None]:
# # save submission
# submission.to_csv("submission.csv",index=False)

### **Save model**

In [None]:
learner.export()

In [None]:
# clean temporary folder
!rm -r "./train"
!rm -r "./test"