In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('./digitalpathology'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

./digitalpathology/X_test.npy
./digitalpathology/X.npy
./digitalpathology/Y.npy


In [2]:
from fastai.vision.all import *

## Load data

In [3]:
path = Path('./digitalpathology')

In [4]:
trn_x = np.load(path/'X.npy')
trn_y = np.load(path/'Y.npy')
print(f'{trn_x.shape}, {trn_y.shape}')

(660, 168, 308), (660,)


In [5]:
trn_y_df = pd.DataFrame({'idx': np.arange(len(trn_y)), 'label': trn_y})
trn_y_df.head()

Unnamed: 0,idx,label
0,0,7
1,1,5
2,2,17
3,3,16
4,4,11


In [6]:
def get_x(row):
    idx = row['idx']
    img_pix = trn_x[idx]
    img_pix = np.stack([img_pix]*3, axis=-1)
    return PILImage.create(img_pix.astype(np.uint8))

## Experimenting with Preprocessing

I want to try different methods of resizing to see if performance increases. I'm going to create a function to create the model and train it to speed this trial-and-error process up.

In [7]:
def train(arch,item,accum=1,epochs=5,finetune=False):
    dblock = DataBlock(
        blocks=(ImageBlock, CategoryBlock),
        get_x=get_x,
        get_y=ColReader('label'),
        item_tfms=item)
    cbs = GradientAccumulation(64) if accum else []
    dls = dblock.dataloaders(trn_y_df, bs=64//accum)
    learn = vision_learner(dls, arch, metrics=accuracy, cbs=cbs).to_fp16()
    if finetune:
        learn.fine_tune(epochs)
    else:
        learn.unfreeze()
        learn.fit_one_cycle(epochs)

Also making a function to use to clear out GPU memory after every train run.

In [8]:
import gc
def clear_gpu():
    print(torch.cuda.list_gpu_processes())
    gc.collect()
    torch.cuda.empty_cache()

Going to retry ```padding``` for a baseline:

In [9]:
train(arch='convnext_tiny_in22k',
     item=Resize(224, method=ResizeMethod.Pad, pad_mode=PadMode.Zeros))
clear_gpu()

  model = create_fn(


model.safetensors:   0%|          | 0.00/178M [00:00<?, ?B/s]

  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,accuracy,time
0,4.167726,2.687897,0.204545,00:13
1,2.572411,0.815804,0.772727,00:12
2,1.722406,0.292323,0.916667,00:12
3,1.248955,0.238293,0.916667,00:12
4,0.934781,0.165983,0.954545,00:12


GPU:0
process       6728 uses     4810.000 MB GPU memory


Let's try ```method='squish'``` first:

In [10]:
train(arch='convnext_tiny_in22k', 
      item=Resize(224, method='squish'))
clear_gpu()

  model = create_fn(
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,accuracy,time
0,4.122965,3.203338,0.242424,00:12
1,2.740054,1.676455,0.575758,00:12
2,1.864566,0.377005,0.893939,00:12
3,1.354755,0.26341,0.939394,00:12
4,1.02774,0.233163,0.939394,00:12


GPU:0
process       6728 uses     4830.000 MB GPU memory


Alright, looks better than padding! Let's try crop next, I hypothesize this won't do as well as the other 2 since there will definitely be loss of some potentially important information.

In [11]:
train(arch='convnext_tiny_in22k', 
      item=Resize(224, method='crop'))
clear_gpu()

  model = create_fn(
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,accuracy,time
0,4.258249,2.394589,0.310606,00:12
1,2.834418,2.037032,0.5,00:12
2,1.982937,0.529543,0.840909,00:12
3,1.458999,0.248052,0.909091,00:12
4,1.116735,0.211637,0.954545,00:12


GPU:0
process       6728 uses     4830.000 MB GPU memory


### Trying image augmentation

In [12]:
def train(arch,item,accum=1,epochs=5,finetune=False):
    dblock = DataBlock(
        blocks=(ImageBlock, CategoryBlock),
        get_x=get_x,
        get_y=ColReader('label'),
        item_tfms=item,
        batch_tfms=aug_transforms(size=224, min_scale=0.75))
    cbs = GradientAccumulation(64) if accum else []
    dls = dblock.dataloaders(trn_y_df, bs=64//accum)
    learn = vision_learner(dls, arch, metrics=accuracy, cbs=cbs).to_fp16()
    if finetune:
        learn.fine_tune(epochs)
        return learn
    else:
        learn.unfreeze()
        learn.fit_one_cycle(epochs)

In [13]:
train(arch='convnext_tiny_in22k',
      item=Resize(224, method='squish'),
      accum=4)
clear_gpu()

  model = create_fn(
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,accuracy,time
0,4.183647,3.325473,0.121212,00:27
1,2.847507,1.275459,0.659091,00:27
2,1.931995,0.691297,0.80303,00:27
3,1.282322,0.278111,0.931818,00:27
4,0.941951,0.253633,0.954545,00:27


GPU:0
process       6728 uses     1932.000 MB GPU memory


## Trying different models

Okay, let's carry on with ```method='squish'```. Let's try some different models now, I'll start with a ViT model:

In [14]:
train(arch='vit_small_r26_s32_224',
     item=Resize(224, method='squish'),
     accum=4)
clear_gpu()

model.safetensors:   0%|          | 0.00/146M [00:00<?, ?B/s]

  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,accuracy,time
0,2.891594,1.498537,0.507576,00:05
1,1.704231,0.847576,0.712121,00:04
2,1.145172,0.565103,0.810606,00:04
3,0.763625,0.303434,0.893939,00:04
4,0.538583,0.238627,0.916667,00:04


GPU:0
process       6728 uses     2288.000 MB GPU memory


In [15]:
train(arch='convnext_large_in22k',
     item=Resize(224, method='squish'),
     accum=4)
clear_gpu()

  model = create_fn(


model.safetensors:   0%|          | 0.00/919M [00:00<?, ?B/s]

  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,accuracy,time
0,4.325794,2.35068,0.287879,01:40
1,3.254385,4.152997,0.151515,01:41
2,2.478571,0.907315,0.734848,01:41
3,1.670405,0.483979,0.840909,01:41
4,1.180059,0.271072,0.924242,01:41


GPU:0
process       6728 uses     7330.000 MB GPU memory


## Final model

The final model is the ```vit_small_r26_s32_224``` with ```method='squish'``` and batch image augmentation since it shows the highest accuracy score from our experiments. For a final boost in performance, we'll use test time augmentation (TTA).

In [16]:
learn = train(arch='vit_small_r26_s32_224',
     item=Resize(224, method='squish'),
     accum=4, epochs=10, finetune=True)
clear_gpu()

  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,accuracy,time
0,3.926334,1.581833,0.545455,00:02


epoch,train_loss,valid_loss,accuracy,time
0,2.284302,1.193364,0.704545,00:04
1,1.878806,0.709234,0.818182,00:04
2,1.47343,0.401274,0.886364,00:04
3,1.075063,0.257678,0.931818,00:04
4,0.791528,0.179757,0.962121,00:04
5,0.577034,0.164639,0.962121,00:04
6,0.435062,0.153967,0.962121,00:04
7,0.380733,0.143904,0.954545,00:04
8,0.341373,0.130304,0.962121,00:04
9,0.324916,0.131209,0.962121,00:04


GPU:0
process       6728 uses     2292.000 MB GPU memory


## Making the submission

In [17]:
def sub(n):
    tst_x = np.load(path/'X_test.npy')
    tst_dl = learn.dls.test_dl(tst_x)
    probs,_ = learn.tta(dl=tst_dl)
    idxs = np.argmax(probs, axis=1)
    preds = pd.Series(learn.dls.vocab[idxs], name='idxs')
    subm = pd.DataFrame({'Id': np.arange(300), 'Class': preds})
    subm.to_csv(f'sub{n}.csv', index=False)

In [18]:
sub(2)

In [19]:
!head sub2.csv

Id,Class
0,4
1,16
2,2
3,18
4,11
5,5
6,6
7,4
8,8
