## Set up

In [None]:
# install fastkaggle if not available
try: import fastkaggle
except ModuleNotFoundError:
    !pip install -q fastkaggle

from fastkaggle import *

In [None]:
comp = 'digit-recognizer'

path = setup_comp(comp, install='fastai fastcore timm nbdev')

In [None]:
path

In [None]:
from fastai.vision.all import *
import os
import seaborn as sns
#set_seed(42)

path.ls()

## Looking at the data

In [None]:
# Load the data
df_train = pd.read_csv(path/"train.csv")
df_test = pd.read_csv(path/"test.csv")

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
Y_train = df_train["label"]

# Drop 'label' column
X_train = df_train.drop(labels = ["label"],axis = 1) 

# free some space
#del train 

Y_train.value_counts().plot(kind='bar')
plt.show()

## Get data into the right format

This part I have adapted from https://www.kaggle.com/code/christianwallenwein/beginners-guide-to-mnist-with-fast-ai

In [None]:
! mkdir /kaggle/working/train
! mkdir /kaggle/working/test

In [None]:
TRAIN = Path("/kaggle/working/train")
TEST = Path("/kaggle/working/test")

# Create training directory
for index in range(10):
    try:
        os.makedirs(TRAIN/str(index))
    except:
        pass
# Test whether creating the training directory was successful
sorted(os.listdir(TRAIN))
#Create test directory
try:
    os.makedirs(TEST)
except:
    pass

In [None]:
def saveDigit(digit, filepath):
    digit = digit.reshape(28,28)
    digit = digit.astype(np.uint8)

    img = Image.fromarray(digit)
    img.save(filepath)
    
# save training images
for index, row in df_train.iterrows():
    
    label,digit = row[0], row[1:]
    
    folder = TRAIN/str(label)
    filename = f"{index}.jpg"
    filepath = folder/filename
    
    digit = digit.values
    
    saveDigit(digit, filepath)
    
# save testing images
for index, digit in df_test.iterrows():

    folder = TEST
    filename = f"{index}.jpg"
    filepath = folder/filename
    
    digit = digit.values
    
    saveDigit(digit, filepath)

## Check images

In [None]:
# import matplotlib to arrange the images properly
import matplotlib.pyplot as plt

def displayTrainingData():
    fig = plt.figure(figsize=(5,10))
    
    for rowIndex in range(1, 10):
        subdirectory = str(rowIndex)
        path = TRAIN/subdirectory
        images = os.listdir(path)
        for sampleIndex in range(1, 6):
            randomNumber = random.randint(0, len(images)-1)
            image = Image.open(path/images[randomNumber])
            ax = fig.add_subplot(10, 5, 5*rowIndex + sampleIndex)
            ax.axis("off")
            
            plt.imshow(image, cmap='gray')
        
    plt.show()
    
def displayTestingData():
    fig = plt.figure(figsize=(5, 10))
    
    paths = os.listdir(TEST)
    
        
    for i in range(1, 51):
        randomNumber = random.randint(0, len(paths)-1)
        image = Image.open(TEST/paths[randomNumber])
        
        ax = fig.add_subplot(10, 5, i)
        ax.axis("off")
        
        plt.imshow(image, cmap='gray')
    plt.show()
print('samples of training data')
displayTrainingData()

## Resize images to 48x48

I'll do this because the model crashes with 28x28

In [None]:
!mkdir train_lrg

In [None]:
trn_path = Path("/kaggle/working/train_lrg")
resize_images(TRAIN, dest=trn_path, max_size=48, recurse=True)

In [None]:
!mkdir test_lrg

In [None]:
test_path = Path("/kaggle/working/test_lrg")
resize_images(TEST, dest=test_path, max_size=224, recurse=True)

In [None]:
tst_files = get_image_files(test_path).sorted()

## Create data loader

In [None]:
dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.01,
                                   #item_tfms=Resize(48, method='squish'),
                                   batch_tfms=aug_transforms(max_rotate=20.0, do_flip=False),
                                   bs=1024)

dls.show_batch(max_n=6)

## Define training function

In [None]:
def train(arch, size, item=Resize(48, method='squish'), accum=1, finetune=True, epochs=12, lr=0.01):
    dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.05,
                                   item_tfms=item,
                                   batch_tfms=aug_transforms(size=size, max_rotate=20.0, do_flip=False),
                                   bs=128//accum)
    cbs = GradientAccumulation(128) if accum else []
    learn = vision_learner(dls, arch, metrics=[error_rate, accuracy], cbs=cbs).to_fp16()
    if finetune:
        learn.fine_tune(epochs, lr)
        return learn.tta(dl=dls.test_dl(tst_files))
    else:
        learn.unfreeze()
        learn.fit_one_cycle(epochs, lr)
    
    return learn
        
        
import gc
def report_gpu():
    print(torch.cuda.list_gpu_processes())
    gc.collect()
    torch.cuda.empty_cache()

## First model

In [None]:
report_gpu()

In [None]:
dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.05,
                                   item_tfms=None,
                                   batch_tfms=aug_transforms(size=224, max_rotate=20.0, do_flip=False),
                                   bs=128)
#learn = vision_learner(dls, 'vit_base_patch16_224_miil', metrics=[error_rate, accuracy]).to_fp16()

In [None]:
learn.lr_find(suggest_funcs=(valley, slide))

In [None]:
train('convnext_small.fb_in22k', size=None, item=None, epochs=1, accum=1, finetune=False)
report_gpu()

In [None]:
train('convnext_large_in22k', size=None, item=None, epochs=1, accum=2, finetune=False)
report_gpu()

In [None]:
train('regnetx_120', size=None, item=None, epochs=1, accum=1, finetune=False)
report_gpu()

In [None]:
train('vit_base_patch16_224_miil', size=224, item=None, epochs=1, accum=1, finetune=False)
report_gpu()

In [None]:
train('regnety_320.swag_ft_in1k', size=None, item=None, epochs=1, accum=1, finetune=False)
report_gpu()

In [None]:
train('beit_large_patch16_224.in22k_ft_in22k_in1k', size=224, item=None, epochs=1, accum=16, finetune=False)
report_gpu()

In [None]:
learn = train('vit_base_patch16_224_miil', size=224, item=None, epochs=15, lr=0.001, accum=1, finetune=False)

In [None]:
report_gpu()
learn_beit = train('beit_large_patch16_224.in22k_ft_in22k_in1k', size=224, item=None, epochs=5, accum=16, finetune=False)
report_gpu()

In [None]:
# save results
save_pickle('model_vit_base_patch16_224_miil.pkl', learn)

In [None]:
learn = load_pickle(Path('/kaggle/input/model-vit-base-patch16-224-miil-digit-recognizer/model_vit_base_patch16_224_miil.pkl'))

In [None]:
learn.fine_tune(epochs=1, base_lr=0.001, freeze_epochs=0)

# Ensemble models

In [None]:
models = {
    'beit_large_patch16_224.in22k_ft_in22k_in1k': {
        (size=224, accum=4)
    }, 'vit_base_patch16_224_miil': {
        (size=224, accum=2),
    }, 'regnetx_120': {
        (size=None, accum=1)
}

In [None]:
# train and predict
tta_res = []

for arch,details in models.items():
    for size, accum in details:
        print('---',arch)
        print(size)
        print(accum)
        tta_res.append(train(arch, size=size, accum=accum, item=None, epochs=12, finetune=True))
        gc.collect()
        torch.cuda.empty_cache()

In [None]:
# save results
save_pickle('tta_res.pkl', tta_res)

## Test time augmentation

To make the predictions even better, we can try test time augmentation (TTA), which our book defines as:

During inference or validation, creating multiple versions of each image, using data augmentation, and then taking the average or maximum of the predictions for each augmented version of the image.

Before trying that out, we'll first see how to check the predictions and error rate of our model without TTA:

In [None]:
valid = learn.dls.valid
preds,targs = learn.get_preds(dl=valid)


In [None]:
error_rate(preds, targs)

In [None]:
learn.dls.train.show_batch(max_n=6, unique=True)


In [None]:
tta_preds,_ = learn.tta(dl=valid)

In [None]:
error_rate(tta_preds, targs)

## Submitting to Kaggle

In [None]:
ss = pd.read_csv(path/'sample_submission.csv')
ss

In [None]:
tst_files = get_image_files(test_path).sorted()
tst_dl = dls.test_dl(tst_files)


In [None]:
# with tta
preds,_ = learn.tta(dl=tst_dl)
idxs = preds.argmax(dim=1)

# without tta
#probs,_,idxs = learn.get_preds(dl=tst_dl, with_decoded=True)
#idxs

## Have a look at some predictions

In [None]:
%matplotlib inline
files = get_image_files(TEST).sorted()
for i in range(10):
    img = PILImage.create(files[i])
    img.to_thumb(28)
    img.show()

In [None]:
order = []
for file in files:
    order.append(int(file.stem)+1)

In [None]:
ss['Label'] = list(idxs.numpy())
ss['ImageId'] = order 
ss.sort_values(by="ImageId", inplace=True)
ss

In [None]:
ss.to_csv('subm.csv', index=False)
!head subm.csv

In [None]:
# Add your PRIVATE credentials
# Do not use "!export KAGGLE_USERNAME= ..." OR "" around your credential
%env KAGGLE_USERNAME=kahnertk
%env KAGGLE_KEY=SmelVUHeti3NUcQ

# Verify
!export -p | grep KAGGLE_USERNAME
!export -p | grep KAGGLE_KEY

In [None]:
# Submit
!kaggle competitions submit -c digit-recognizer -f subm.csv -m "vit_base_patch16_224_miil for 20 epochs"

# View results
!kaggle competitions submissions -c digit-recognizer

In [None]:
! rm -r /kaggle/working/*