In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# [Kaggle Link](https://www.kaggle.com/competitions/digit-recognizer)

# The Blueprint 🟦
1. Fetch the dataset
2. Feed the dataset to dataloaders
3. HyperParams
4. Let's Find the optimal learning rate
5. Getting a baseline result
6. Interpretation
7. Final training
8. Sanity check: can we get predictions?
9. Squeezing The Last 99.x% With Ensembling And Test Time Augmentations
10. Kaggle submission

# 1. Fetch The Dataset

In [None]:
dependencies = [
    "kagtool",
    "kaggle",
    "fastai",
    "timm"
]

!pip install -U {" ".join(dependencies)}

import pandas as pd
import gc
from fastai.imports import *
from fastai.vision.all import *

In [None]:
# this works regardless of it is inside or outside kaggle

from kagtool.datasets.kaggle_downloader import KaggleDownloader

dataset_name = 'digit-recognizer'
# add your creds if outside kaggle on a cloud machine
creds = '{"username":"","key":""}'

path = KaggleDownloader(dataset_name, creds).load_or_fetch_kaggle_dataset()
df = pd.read_csv(path/'train.csv')
df.head()


# 2. Baking DataLoaders

In [None]:
from PIL import Image

def get_image_from_pixels(row, img_size=(28, 28)):
    """
    Convert a row of pixel data to a PIL Image
    :param row: Pandas Series or array with pixel values.
    :param img_size: Tuple representing the size of the image.
    :return: PIL Image.
    """
    # Convert the row to a numpy array and reshape into image dimensions
    pixel_data = row.values.reshape(img_size)
    # Convert array to PIL Image
    return Image.fromarray(pixel_data.astype('uint8'), 'L') # 'L' mode for grayscale

Apply data augmentations if needed (for the mnist it's not necessary)

In [None]:
def get_x(row):
    if 'label' in row:
        return get_image_from_pixels(row[1:])  # Exclude the label column
    else:
        return get_image_from_pixels(row)

def get_y(row):
    return row['label']
    
dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                   get_x=get_x,
                   get_y=get_y,
                   splitter=RandomSplitter(valid_pct=0.2, seed=42),
                   # We need to resize to apply data augmentations
                   item_tfms=Resize(128),
                   # We actually perform better without data augmentations for this dataset
                   # Apply augmentations, and multiply number of samples by 4
                   # batch_tfms=[*aug_transforms(),
                   #             Normalize.from_stats(*imagenet_stats)]
                   batch_tfms=Normalize.from_stats(*imagenet_stats)

                  )  
dls = dblock.dataloaders(df, bs=64)
dls.show_batch(max_n=9, figsize=(6, 7))


In [None]:
arch = 'convnext_small_in22k'

# 3. HyperParam

In [None]:
from types import SimpleNamespace

cfg = SimpleNamespace()
cfg.bs = 128

That's not obvious right now, but this will little dictionary will save lives when you start modifying global vars here and there

# 4. Let's Find The Optimal Learning Rate

In [None]:
try:
    del learn
    gc.collect()
    torch.cuda.empty_cache()
except:
    pass

dls = dblock.dataloaders(df, bs=cfg.bs)
learn = vision_learner(dls, arch, metrics=accuracy).to_fp16()
learn.lr_find(suggest_funcs=(slide, valley))

In [None]:
cfg.lr = 1e-3

In [None]:
try:
    del learn
    gc.collect()
    torch.cuda.empty_cache()
except:
    pass

dls = dblock.dataloaders(df, bs=cfg.bs)
learn = vision_learner(dls, arch, metrics=accuracy).to_fp16()

# 5. Getting a Baseline Result

> Note the use of `fine_tune` we are doing transfer learning 

In [None]:
learn.fine_tune(1, base_lr=cfg.lr)

# 6. Interpretation Of The First Results

In [None]:
learn.show_results()

In [None]:
# Plot confusion matrix to see where the model makes mistakes
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(10,10))

# Plot top losses to see the images with the highest loss
interp.plot_top_losses(5, nrows=1)

# 7. Sanity Check: Can We Get Predictions?

At this point the prediction might be garbage as we only trained for one epoch, but let's just try

In [None]:
tst_df = pd.read_csv(path/'test.csv')
tst_dl = learn.dls.test_dl(tst_df)

In [None]:
preds = learn.get_preds(dl=tst_dl)[0]

In [None]:
first_batch = next(iter(tst_dl))
for i in range(3):
    first_batch[0][i].show()
    print("it should be a ", preds[i].argmax().item())

# 8. Training (For Real) Now

In [None]:
dls = dblock.dataloaders(df, bs=cfg.bs)
learn = vision_learner(dls, arch, metrics=accuracy).to_fp16()
learn.fine_tune(5, freeze_epochs=3, base_lr=cfg.lr)

In [None]:
learn.recorder.plot_loss()

# 9. Squeezing The Last 99.x% With Ensembling And Test Time Augmentations

> Note the use of `learn.tta` for test time augmentations

In [None]:
def ensemble():
    dls = dblock.dataloaders(df, bs=cfg.bs)
    learn = vision_learner(dls, arch, metrics=accuracy).to_fp16()
    learn.fine_tune(10, freeze_epochs=3, base_lr=cfg.lr)
    return learn.tta(dl=tst_dl)[0]


learns = [ensemble() for _ in range(3)]
ens_preds = torch.stack(learns).mean(0)

# 10. Kaggle Submission

In [None]:
assert ens_preds.shape == (len(tst_df), dls.c) # should be (280000samples, 10classes)
ens_preds

In [None]:
tst_df['ImageId'] = range(1, len(tst_df) + 1)
tst_df['Label'] = torch.argmax(ens_preds, dim=1)
sub_df = tst_df[['ImageId','Label']]
sub_df.to_csv('submission.csv', index=False)

# !head submission.csv

In [None]:
# hit the submit button or submit the file yourself! lol

# 🎉 Bam

I hope you've found this blueprint helpful for improving your results in this competition - and on Kaggle more generally! If you like it, please give it an upvote ☝️🙏