In [1]:
! pip install -Uq fastai fastcore fastkaggle kaggle pynvml timm==0.6.2.dev0

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m497.9/497.9 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
!mkdir ~/.kaggle
!cp drive/MyDrive/Colab\ Notebooks/kaggle.json ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
from fastkaggle import *
from fastcore.parallel import *
from fastai.vision.all import *

In [4]:
comp = 'paddy-disease-classification'
path = setup_comp(comp, install='fastai "timm==0.6.2.dev0"')


# trn_path = path / 'train_images'

Downloading paddy-disease-classification.zip to /content


100%|██████████| 1.02G/1.02G [00:30<00:00, 35.9MB/s]





In [17]:
trn_path = path/'sml'

In [18]:
resize_images(path/'train_images', dest=trn_path, max_size=256, recurse=True)

In [None]:
df = pd.read_csv(path / 'train.csv', index_col='image_id')
df.head()

In [6]:
df.loc['100330.jpg', 'variety']

'ADT45'

In [7]:
def get_variety(p): return df.loc[p.name, 'variety']

In [8]:
dls = DataBlock(
    blocks=(ImageBlock, CategoryBlock, CategoryBlock),
    n_inp=1,
    get_items=get_image_files,
    get_y=[parent_label, get_variety],
    splitter=RandomSplitter(0.2),
    item_tfms=Resize(192, method=ResizeMethod.Squish),
    batch_tfms=aug_transforms(size=128, min_scale=0.75),
).dataloaders(trn_path)

In [None]:
dls.show_batch(max_n=6)

In [15]:
def disease_loss(inp, disease, variety): return F.cross_entropy(inp[:, :10], disease)
def variety_loss(inp, disease, variety): return F.cross_entropy(inp[:, 10:], variety)
def overall_loss(inp, disease, variety): return disease_loss(inp, disease, variety) + variety_loss(inp, disease, variety)
def disease_err(inp, disease, variety): return error_rate(inp[:,:10], disease)
def variety_err(inp, disease, variety): return error_rate(inp[:,10:], variety)

err_metrics = (disease_err, variety_err)
all_metrics = err_metrics + (disease_loss, variety_loss)

def train_mult(arch,
               item=Resize(192, method=ResizeMethod.Squish),
               batch=aug_transforms(size=128, min_scale=0.75),
               epochs=5, lr=0.01):
    dls = DataBlock(
        blocks=(ImageBlock, CategoryBlock, CategoryBlock),
        n_inp=1,
        get_items=get_image_files,
        get_y=[parent_label, get_variety],
        splitter=RandomSplitter(0.2),
        item_tfms=item,
        batch_tfms=batch,
    ).dataloaders(trn_path)

    learn = vision_learner(dls, arch, n_out=20, loss_func=overall_loss, metrics=all_metrics).to_fp16()
    learn.fine_tune(epochs, lr)
    return learn

In [16]:
arch = 'convnext_small_in22k'
learn = train_mult(arch)



epoch,train_loss,valid_loss,disease_err,variety_err,disease_loss,variety_loss,time


KeyboardInterrupt: 

In [None]:
def train(arch, size, item=Resize(480, method='squish'), accum=1, finetune=True, epochs=12):
    dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.2, item_tfms=item,
        batch_tfms=aug_transforms(size=size, min_scale=0.75), bs=64//accum)
    cbs = GradientAccumulation(64) if accum else []
    learn = vision_learner(dls, arch, metrics=error_rate, cbs=cbs).to_fp16()
    if finetune:
        learn.fine_tune(epochs, 0.01)
        return learn.tta(dl=dls.test_dl(tst_files))
    else:
        learn.unfreeze()
        learn.fit_one_cycle(epochs, 0.01)

In [None]:
train('convnext_small_in22k', 128, epochs=1, accum=1, finetune=False)

Downloading: "https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_224.pth" to /root/.cache/torch/hub/checkpoints/convnext_small_22k_224.pth


epoch,train_loss,valid_loss,error_rate,time
0,0.0,0.0,0.0,00:03


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return F.conv2d(input, weight, bias, self.stride,


In [None]:
import gc
def report_gpu():
    print(torch.cuda.list_gpu_processes())
    gc.collect()
    torch.cuda.empty_cache()

In [None]:
report_gpu()

GPU:0
process     105190 uses     3436.000 MB GPU memory


In [None]:
train('convnext_small_in22k', 128, epochs=1, accum=2, finetune=False)
report_gpu()

epoch,train_loss,valid_loss,error_rate,time
0,0.0,0.0,0.0,00:02


GPU:0
process     105190 uses     2340.000 MB GPU memory


In [None]:
train('convnext_small_in22k', 128, epochs=1, accum=4, finetune=False)
report_gpu()

epoch,train_loss,valid_loss,error_rate,time
0,0.0,0.0,0.0,00:03


GPU:0
process     105190 uses     1800.000 MB GPU memory


In [None]:
train('convnext_large_in22k', 224, epochs=1, accum=2, finetune=False)
report_gpu()

Downloading: "https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth" to /root/.cache/torch/hub/checkpoints/convnext_large_22k_224.pth


epoch,train_loss,valid_loss,error_rate,time
0,0.0,0.0,0.0,00:05


GPU:0
process     105190 uses    10458.000 MB GPU memory


In [None]:
train('convnext_large_in22k', (320,240), epochs=1, accum=2, finetune=False)
report_gpu()

epoch,train_loss,valid_loss,error_rate,time
0,0.0,0.0,0.0,00:06


GPU:0
process     105190 uses    13874.000 MB GPU memory


In [None]:
train('vit_large_patch16_224', 224, epochs=1, accum=2, finetune=False)
report_gpu()

epoch,train_loss,valid_loss,error_rate,time
0,0.0,0.0,0.0,00:06


GPU:0
process     105190 uses    14500.000 MB GPU memory


In [None]:
train('swinv2_large_window12_192_22k', 192, epochs=1, accum=2, finetune=False)
report_gpu()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Downloading: "https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_large_patch4_window12_192_22k.pth" to /root/.cache/torch/hub/checkpoints/swinv2_large_patch4_window12_192_22k.pth


epoch,train_loss,valid_loss,error_rate,time
0,0.0,0.0,0.0,00:07


GPU:0
process     105190 uses    12636.000 MB GPU memory


In [None]:
train('swin_large_patch4_window7_224', 224, epochs=1, accum=2, finetune=False)
report_gpu()

Downloading: "https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22kto1k.pth" to /root/.cache/torch/hub/checkpoints/swin_large_patch4_window7_224_22kto1k.pth


epoch,train_loss,valid_loss,error_rate,time
0,0.0,0.0,0.0,00:05


GPU:0
process     105190 uses    11088.000 MB GPU memory


In [None]:
report_gpu()

GPU:0
process       6824 uses    22686.000 MB GPU memory


In [None]:
res = 640, 480

In [None]:
models = {
    'convnext_large_in22k': {
        (Resize(res), 224),
        (Resize(res), (320, 224)),
    },
    'vit_large_patch16_224': {
        (Resize(480, method='squish'), 224),
        (Resize(res), 224),
    },
    'swinv2_large_window12_192_22k': {
        (Resize(480, method='squish'), 192),
        (Resize(res), 192),
    },
    'swin_large_patch4_window7_224': {
        (Resize(480, method='squish'), 224),
        (Resize(res), 224),
    }
}

In [None]:
trn_path = path / 'train_images'

In [None]:
tta_res = []

for arch, details in models.items():
    for item, size in details:
        print('---', arch)
        print(size)
        print(item.name)
        tta_res.append(train(arch, size, item=item, accum=2))
        gc.collect()
        torch.cuda.empty_cache()

--- convnext_large_in22k
(320, 224)
Resize -- {'size': (480, 640), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.880011,0.504282,0.163383,02:01


  return F.conv2d(input, weight, bias, self.stride,


epoch,train_loss,valid_loss,error_rate,time
0,0.363856,0.194391,0.061989,02:45
1,0.308435,0.180339,0.057184,02:46
2,0.34073,0.203939,0.056704,02:47
3,0.223595,0.192558,0.05334,02:48
4,0.170802,0.132381,0.037963,02:47
5,0.158021,0.127747,0.033157,02:47
6,0.110758,0.131056,0.035079,02:47
7,0.077441,0.106522,0.024507,02:46
8,0.036413,0.08738,0.020183,02:47
9,0.02455,0.087451,0.021144,02:46


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


--- convnext_large_in22k
224
Resize -- {'size': (480, 640), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.772966,0.427369,0.136953,01:31


epoch,train_loss,valid_loss,error_rate,time
0,0.359564,0.217657,0.069678,02:01
1,0.311512,0.222035,0.063431,02:01
2,0.310884,0.193026,0.049976,02:02
3,0.202883,0.197208,0.04469,02:01
4,0.193838,0.183965,0.047093,02:01
5,0.15075,0.176026,0.032677,02:01
6,0.090591,0.14978,0.029793,02:02
7,0.079848,0.142289,0.028832,02:01
8,0.05412,0.136685,0.025949,02:01
9,0.034761,0.12754,0.024027,02:01


--- vit_large_patch16_224
224
Resize -- {'size': (480, 640), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.947546,0.540665,0.170111,02:15


epoch,train_loss,valid_loss,error_rate,time
0,0.408457,0.263263,0.08025,02:53
1,0.369028,0.323277,0.095627,02:53
2,0.377645,0.281253,0.081211,02:53
3,0.284341,0.241293,0.067275,02:53
4,0.205599,0.19705,0.052859,02:53
5,0.169238,0.184683,0.046612,02:53
6,0.117077,0.165064,0.03556,02:53
7,0.095942,0.130665,0.032196,02:53
8,0.0569,0.121188,0.028352,02:52
9,0.038753,0.099885,0.024027,02:52


--- vit_large_patch16_224
224
Resize -- {'size': (480, 480), 'method': 'squish', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.94732,0.589339,0.184527,02:13


epoch,train_loss,valid_loss,error_rate,time
0,0.376395,0.289468,0.084094,02:51
1,0.320674,0.257832,0.08938,02:51
2,0.293757,0.321958,0.086497,02:51
3,0.32735,0.335567,0.093224,02:51
4,0.202855,0.24595,0.068717,02:51
5,0.164421,0.19128,0.047093,02:51
6,0.111686,0.167591,0.037001,02:51
7,0.078822,0.126575,0.027871,02:51
8,0.058033,0.129514,0.029793,02:51
9,0.035588,0.109157,0.020663,02:50


--- swinv2_large_window12_192_22k
192
Resize -- {'size': (480, 480), 'method': 'squish', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.964144,0.549022,0.176838,02:20


epoch,train_loss,valid_loss,error_rate,time
0,0.378806,0.221341,0.072561,02:52
1,0.316831,0.286819,0.085536,02:52
2,0.354232,0.293207,0.090341,02:52
3,0.268863,0.200127,0.050457,02:52
4,0.200348,0.320595,0.08025,02:52
5,0.163566,0.170629,0.040365,02:52
6,0.125938,0.129287,0.02643,02:52
7,0.081286,0.104425,0.022105,02:52
8,0.051623,0.107196,0.027871,02:52
9,0.037482,0.111372,0.024507,02:52


--- swinv2_large_window12_192_22k
192
Resize -- {'size': (480, 640), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.880961,0.535879,0.159539,02:22


epoch,train_loss,valid_loss,error_rate,time
0,0.437262,0.211153,0.067275,02:54
1,0.378411,0.220596,0.061509,02:54
2,0.327394,0.325934,0.099952,02:54
3,0.285825,0.197357,0.058145,02:54
4,0.199255,0.188407,0.052379,02:54
5,0.195623,0.199187,0.05334,02:54
6,0.151424,0.116158,0.030754,02:54
7,0.089628,0.083027,0.021624,02:54
8,0.077002,0.073261,0.022105,02:54
9,0.045659,0.075083,0.023066,02:54


--- swin_large_patch4_window7_224
224
Resize -- {'size': (480, 480), 'method': 'squish', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.997538,0.53017,0.174435,01:53


epoch,train_loss,valid_loss,error_rate,time
0,0.40467,0.223727,0.072081,02:15
1,0.362198,0.222582,0.073042,02:15
2,0.355479,0.21387,0.068717,02:16
3,0.245066,0.177128,0.055262,02:16
4,0.212279,0.121607,0.037482,02:16
5,0.185667,0.115063,0.031716,02:16
6,0.109373,0.124952,0.029793,02:16
7,0.102536,0.098324,0.02643,02:16
8,0.069155,0.084246,0.022585,02:16
9,0.051036,0.07523,0.016819,02:16


--- swin_large_patch4_window7_224
224
Resize -- {'size': (480, 640), 'method': 'crop', 'pad_mode': 'reflection', 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'p': 1.0}


epoch,train_loss,valid_loss,error_rate,time
0,0.990647,0.570013,0.174916,01:55


epoch,train_loss,valid_loss,error_rate,time
0,0.452704,0.22068,0.073042,02:18
1,0.350602,0.202,0.068236,02:18
2,0.295686,0.256219,0.079769,02:18
3,0.268286,0.223318,0.060548,02:19
4,0.201217,0.134035,0.042768,02:19
5,0.203592,0.117535,0.037001,02:18
6,0.125147,0.104489,0.031235,02:18
7,0.104456,0.100258,0.028832,02:18
8,0.068913,0.094596,0.024988,02:18
9,0.071034,0.084696,0.020663,02:18


epoch,train_loss,valid_loss,error_rate,time
0,0.452704,0.22068,0.073042,02:18
1,0.350602,0.202,0.068236,02:18
2,0.295686,0.256219,0.079769,02:18
3,0.268286,0.223318,0.060548,02:19
4,0.201217,0.134035,0.042768,02:19
5,0.203592,0.117535,0.037001,02:18
6,0.125147,0.104489,0.031235,02:18
7,0.104456,0.100258,0.028832,02:18
8,0.068913,0.094596,0.024988,02:18
9,0.071034,0.084696,0.020663,02:18


In [None]:
save_pickle('tta_res.pkl', tta_res)
! mv tta_res.pkl drive/MyDrive/Colab\ Notebooks/tta_res.pkl

In [None]:
?zip

In [None]:
tta_prs = first(zip(*tta_res))

In [None]:
tta_prs += tta_prs[2:4]

In [None]:
pd.DataFrame(tta_prs[0])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,3.681251e-08,6.775891e-08,3.456536e-09,6.425152e-09,4.369403e-08,1.990758e-08,3.940333e-09,9.999998e-01,1.892764e-08,8.632799e-10
1,1.966131e-07,1.570395e-08,1.856091e-09,3.265740e-06,5.817445e-09,1.434195e-08,5.423806e-08,2.062097e-08,9.999965e-01,1.904785e-09
2,1.057718e-08,7.335309e-09,6.222856e-07,9.986652e-01,8.029732e-05,2.944698e-08,1.220030e-03,2.728224e-05,5.743574e-06,8.321451e-07
3,2.337370e-05,4.808617e-07,7.532994e-08,9.999750e-01,2.778676e-08,1.039749e-08,8.532621e-07,1.241216e-07,4.518833e-11,3.589503e-08
4,3.295633e-07,6.889928e-11,3.452145e-07,9.999968e-01,1.094438e-07,1.575185e-10,1.098524e-06,2.217889e-07,1.054176e-06,4.064959e-08
...,...,...,...,...,...,...,...,...,...,...
3464,5.046640e-13,5.954810e-11,4.708323e-12,6.497798e-14,1.134480e-13,1.000000e+00,2.908437e-11,4.632184e-11,4.090151e-11,3.810399e-12
3465,5.904102e-08,7.818710e-10,9.307993e-10,2.489662e-07,1.750527e-14,9.293145e-10,1.052688e-09,9.999693e-01,3.039314e-05,1.143028e-10
3466,4.182481e-09,7.347811e-09,9.792267e-10,8.278795e-09,1.192257e-08,8.901745e-08,1.138290e-10,3.175519e-08,9.999998e-01,1.704914e-10
3467,4.577951e-08,9.998837e-01,1.662535e-06,1.330796e-06,1.120418e-05,7.748718e-08,7.905159e-05,1.411734e-05,1.836856e-06,7.038710e-06


In [None]:
tta_prs[0].shape

torch.Size([3469, 10])

In [None]:
torch.stack(tta_prs).shape

torch.Size([10, 3469, 10])

In [None]:
avg_pr = torch.stack(tta_prs).mean(0)
avg_pr.shape

torch.Size([3469, 10])

In [None]:
dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.2, item_tfms=Resize(480, method='squish'),
                                   batch_tfms=aug_transforms(size=224, min_scale=0.75))

In [None]:
idxs = avg_pr.argmax(dim=1)
ss = pd.read_csv(path/'sample_submission.csv')
ss['label'] = dls.vocab[idxs]
ss.to_csv('subm.csv', index=False)

In [None]:
!head subm.csv

image_id,label
200001.jpg,hispa
200002.jpg,normal
200003.jpg,blast
200004.jpg,blast
200005.jpg,blast
200006.jpg,brown_spot
200007.jpg,dead_heart
200008.jpg,brown_spot
200009.jpg,hispa


In [None]:
from kaggle import api
api.competition_submit_cli('subm.csv', 'big ensemble tta double vit', comp)

100%|██████████| 70.5k/70.5k [00:00<00:00, 78.8kB/s]


Successfully submitted to Paddy Doctor: Paddy Disease Classification