> 1. Butterflies have larger dataset. To check how it works
> 2. Stop training when val acc not improving
> 3. Use incremental training data augmentation (random flip/jitter/crop/rotation) based on val loss/acc
> 4. Compare resnet-152 vs resnet-50

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [4]:
import mynnlib
from mynnlib import *

dataset_dir = "insect-dataset/butterfly"

In [5]:
adult_class_cnt = 0
early_class_cnt = 0
adult_data_dir = f"{dataset_dir}/adult-data"

if os.path.exists(adult_data_dir):
        shutil.rmtree(adult_data_dir)

for species_dir in Path(f"{dataset_dir}/data").iterdir():
    if species_dir.is_dir() and species_dir.name.endswith('-early'):
        early_class_cnt += 1
    else:
        adult_class_cnt += 1

print(f"Adult class count: {adult_class_cnt}")
print(f"Early class count: {early_class_cnt}")

Adult class count: 1125
Early class count: 429


-------------
# A. Train with subset of data (resnet-152)

In [6]:
split_data_for_train_and_val(f"{dataset_dir}/data", 
                             f"{dataset_dir}/splits/test", f"{dataset_dir}/splits/val", f"{dataset_dir}/splits/train", 
                             test_data_weight=0.1, val_data_weight=0.2, min_file_cnt_for_val=4, 
                             class_name_filter_regex=r"^[a-b].*$")

Class count: 246
Total data count: 10155
Training data count: 7165
Validation data count: 1942
Test data count: 1048


In [10]:
model_data = init_model_for_training(f'{dataset_dir}/splits/train', f'{dataset_dir}/splits/val', 
                                     batch_size=32, arch="resnet152", image_size=224, robustness=0.2)

train class count: 246
val class count: 212
feature count: 2048
device: cuda:0


In [11]:
train(model_data, 2, f"{dataset_dir}/checkpoint.butterfly.subset.pth")

Epoch    1 /    2  | Train Loss: 3.2710 Acc: 0.2749  | Val Loss: 2.6863 Acc: 0.3641  | Elapsed time: 0:01:59.682746
Epoch    2 /    2  | Train Loss: 1.4196 Acc: 0.6127  | Val Loss: 1.5306 Acc: 0.5917  | Elapsed time: 0:04:05.166557


In [14]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/splits/train', f'{dataset_dir}/splits/val', 
                                    batch_size=32, image_size=224, robustness=0.5)

train class count: 246
val class count: 212
0 new classes added: []
feature count: 2048
device: cuda:0


In [15]:
train(model_data, 3, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    3  | Train Loss: 2.0365 Acc: 0.4843  | Val Loss: 2.1198 Acc: 0.4598  | Elapsed time: 0:02:09.879565
Epoch    2 /    3  | Train Loss: 1.6094 Acc: 0.5786  | Val Loss: 1.5173 Acc: 0.5984  | Elapsed time: 0:04:20.903457
Epoch    3 /    3  | Train Loss: 1.4076 Acc: 0.6186  | Val Loss: 1.3476 Acc: 0.6473  | Elapsed time: 0:06:32.295907


In [16]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/splits/train', f'{dataset_dir}/splits/val', 
                                    batch_size=32, image_size=224, robustness=1.0)

train class count: 246
val class count: 212
0 new classes added: []
feature count: 2048
device: cuda:0


In [17]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    5  | Train Loss: 1.3403 Acc: 0.6374  | Val Loss: 1.5668 Acc: 0.5850  | Elapsed time: 0:02:13.577118
Epoch    2 /    5  | Train Loss: 1.2182 Acc: 0.6705  | Val Loss: 1.3661 Acc: 0.6370  | Elapsed time: 0:04:31.595314
Epoch    3 /    5  | Train Loss: 0.9580 Acc: 0.7421  | Val Loss: 1.0350 Acc: 0.7276  | Elapsed time: 0:06:44.813407
Epoch    4 /    5  | Train Loss: 0.8063 Acc: 0.7838  | Val Loss: 1.0002 Acc: 0.7364  | Elapsed time: 0:08:57.556221


In [18]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    5  | Train Loss: 0.7660 Acc: 0.7946  | Val Loss: 1.0211 Acc: 0.7327  | Elapsed time: 0:02:11.187269
Epoch    2 /    5  | Train Loss: 0.7135 Acc: 0.8039  | Val Loss: 1.0096 Acc: 0.7400  | Elapsed time: 0:04:24.643225


In [19]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.01)

Epoch    1 /    5  | Train Loss: 0.6945 Acc: 0.8088  | Val Loss: 1.0044 Acc: 0.7420  | Elapsed time: 0:02:08.412037
Epoch    2 /    5  | Train Loss: 0.6495 Acc: 0.8233  | Val Loss: 0.9854 Acc: 0.7518  | Elapsed time: 0:04:19.903302
Epoch    3 /    5  | Train Loss: 0.6396 Acc: 0.8314  | Val Loss: 0.9098 Acc: 0.7642  | Elapsed time: 0:06:33.198182
Epoch    4 /    5  | Train Loss: 0.5850 Acc: 0.8442  | Val Loss: 0.9160 Acc: 0.7616  | Elapsed time: 0:08:45.635875
Epoch    5 /    5  | Train Loss: 0.5944 Acc: 0.8447  | Val Loss: 0.9069 Acc: 0.7698  | Elapsed time: 0:10:59.092743


In [20]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/splits/train', f'{dataset_dir}/splits/val', 
                                    batch_size=32, image_size=224, robustness=2.0)

train class count: 246
val class count: 212
0 new classes added: []
feature count: 2048
device: cuda:0


In [21]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.01)

Epoch    1 /    5  | Train Loss: 0.6543 Acc: 0.8303  | Val Loss: 0.9278 Acc: 0.7667  | Elapsed time: 0:02:07.032418
Epoch    2 /    5  | Train Loss: 0.6352 Acc: 0.8331  | Val Loss: 0.9320 Acc: 0.7657  | Elapsed time: 0:04:19.193135


In [22]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.005)

Epoch    1 /    5  | Train Loss: 0.6592 Acc: 0.8262  | Val Loss: 0.9219 Acc: 0.7667  | Elapsed time: 0:02:07.975810
Epoch    2 /    5  | Train Loss: 0.6441 Acc: 0.8251  | Val Loss: 0.9200 Acc: 0.7714  | Elapsed time: 0:04:20.413373


--------------
# B. Train with subset of data (resnet-50)

In [49]:
split_data_for_train_and_val(f"{dataset_dir}/data", 
                             f"{dataset_dir}/splits/test", f"{dataset_dir}/splits/val", f"{dataset_dir}/splits/train", 
                             test_data_weight=0.1, val_data_weight=0.2, min_file_cnt_for_val=4, 
                             class_name_filter_regex=r"^[a-b].*$")

Class count: 246
Total data count: 10155
Training data count: 7060
Validation data count: 2068
Test data count: 1027


In [53]:
model_data = init_model_for_training(f'{dataset_dir}/splits/train', f'{dataset_dir}/splits/val', 
                                     batch_size=32, arch="resnet50", image_size=224, robustness=0.2)

train class count: 246
val class count: 211


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to C:\Users\User/.cache\torch\hub\checkpoints\resnet50-11ad3fa6.pth
100%|█████████████████████████████████████████████████████████████████████████████| 97.8M/97.8M [00:15<00:00, 6.52MB/s]


feature count: 2048
device: cuda:0


In [54]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.1)

Epoch    1 /    5  | Train Loss: 3.1341 Acc: 0.3255  | Val Loss: 1.9509 Acc: 0.5058  | Elapsed time: 0:01:24.441603
Epoch    2 /    5  | Train Loss: 1.2130 Acc: 0.6697  | Val Loss: 1.0899 Acc: 0.7070  | Elapsed time: 0:02:51.610204
Epoch    3 /    5  | Train Loss: 0.5965 Acc: 0.8258  | Val Loss: 0.8393 Acc: 0.7664  | Elapsed time: 0:04:19.597776


In [55]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/splits/train', f'{dataset_dir}/splits/val', 
                                    batch_size=32, image_size=224, robustness=0.5)

train class count: 246
val class count: 211
0 new classes added: []
feature count: 2048
device: cuda:0


In [56]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    5  | Train Loss: 2.1215 Acc: 0.4670  | Val Loss: 1.9246 Acc: 0.5227  | Elapsed time: 0:01:26.977625
Epoch    2 /    5  | Train Loss: 1.5911 Acc: 0.5795  | Val Loss: 1.7705 Acc: 0.5271  | Elapsed time: 0:02:56.156365


In [57]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/splits/train', f'{dataset_dir}/splits/val', 
                                    batch_size=32, image_size=224, robustness=1.0)

train class count: 246
val class count: 211
0 new classes added: []
feature count: 2048
device: cuda:0


In [58]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    5  | Train Loss: 1.3915 Acc: 0.6283  | Val Loss: 2.4848 Acc: 0.4294  | Elapsed time: 0:01:27.321724
Epoch    2 /    5  | Train Loss: 1.2957 Acc: 0.6483  | Val Loss: 1.5393 Acc: 0.6132  | Elapsed time: 0:02:56.395142
Epoch    3 /    5  | Train Loss: 0.9987 Acc: 0.7288  | Val Loss: 1.2396 Acc: 0.6678  | Elapsed time: 0:04:26.708205
Epoch    4 /    5  | Train Loss: 0.8762 Acc: 0.7646  | Val Loss: 1.1141 Acc: 0.7026  | Elapsed time: 0:05:57.352432


In [59]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/splits/train', f'{dataset_dir}/splits/val', 
                                    batch_size=32, image_size=224, robustness=2.0)

train class count: 246
val class count: 211
0 new classes added: []
feature count: 2048
device: cuda:0


In [60]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.02)

Epoch    1 /    5  | Train Loss: 0.9439 Acc: 0.7428  | Val Loss: 1.1516 Acc: 0.6963  | Elapsed time: 0:01:28.091775
Epoch    2 /    5  | Train Loss: 0.8551 Acc: 0.7670  | Val Loss: 1.1309 Acc: 0.7041  | Elapsed time: 0:02:58.201346


In [61]:
train(model_data, 5, f"{dataset_dir}/checkpoint.butterfly.subset.pth", break_at_val_acc_diff=0.005)

Epoch    1 /    5  | Train Loss: 0.8206 Acc: 0.7742  | Val Loss: 1.1143 Acc: 0.7031  | Elapsed time: 0:01:28.401115
Epoch    2 /    5  | Train Loss: 0.7980 Acc: 0.7800  | Val Loss: 1.0899 Acc: 0.7099  | Elapsed time: 0:02:58.540635
Epoch    3 /    5  | Train Loss: 0.7585 Acc: 0.7929  | Val Loss: 1.1033 Acc: 0.7079  | Elapsed time: 0:04:28.685567


----------------
# C. Train with all data (resnet-152)

In [23]:
model_data = init_model_for_training(f'{dataset_dir}/data', f'{dataset_dir}/val', 
                                     batch_size=32, arch="resnet152", image_size=224, robustness=0.2)

train class count: 1554
val class count: 27
feature count: 2048
device: cuda:0


In [24]:
train(model_data, 2, f"{dataset_dir}/checkpoint.butterfly.all.pth")

Epoch    1 /    2  | Train Loss: 3.6304 Acc: 0.3278  | Val Loss: 2.8959 Acc: 0.2857  | Elapsed time: 0:17:09.740284
Epoch    2 /    2  | Train Loss: 0.7917 Acc: 0.7992  | Val Loss: 2.3061 Acc: 0.3214  | Elapsed time: 0:34:38.225316


In [29]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', 
                                    batch_size=32, image_size=224, robustness=0.5)

train class count: 1554
val class count: 36
0 new classes added: []
feature count: 2048
device: cuda:0


In [28]:
train(model_data, 1, f"{dataset_dir}/checkpoint.butterfly.all.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    1  | Train Loss: 2.9102 Acc: 0.3716  | Val Loss: 1.9418 Acc: 0.4286  | Elapsed time: 0:18:08.790018


In [32]:
train(model_data, 2, f"{dataset_dir}/checkpoint.butterfly.all.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    2  | Train Loss: 2.3218 Acc: 0.4747  | Val Loss: 1.7617 Acc: 0.4808  | Elapsed time: 0:18:02.291987
Epoch    2 /    2  | Train Loss: 2.0838 Acc: 0.5218  | Val Loss: 1.3535 Acc: 0.5769  | Elapsed time: 0:36:08.960538


In [33]:
train(model_data, 2, f"{dataset_dir}/checkpoint.butterfly.all.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    2  | Train Loss: 1.9250 Acc: 0.5552  | Val Loss: 1.1755 Acc: 0.5962  | Elapsed time: 0:17:42.872268
Epoch    2 /    2  | Train Loss: 1.7898 Acc: 0.5821  | Val Loss: 1.8912 Acc: 0.5000  | Elapsed time: 0:35:42.065492


In [34]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', 
                                    batch_size=32, image_size=224, robustness=1.0)

train class count: 1554
val class count: 40
0 new classes added: []
feature count: 2048
device: cuda:0


In [35]:
train(model_data, 2, f"{dataset_dir}/checkpoint.butterfly.all.pth", break_at_val_acc_diff=0.02)

Epoch    1 /    2  | Train Loss: 1.5071 Acc: 0.6462  | Val Loss: 1.0576 Acc: 0.6290  | Elapsed time: 0:17:46.386456
Epoch    2 /    2  | Train Loss: 1.3777 Acc: 0.6775  | Val Loss: 1.0114 Acc: 0.6452  | Elapsed time: 0:35:39.327219


In [36]:
train(model_data, 3, f"{dataset_dir}/checkpoint.butterfly.all.pth", break_at_val_acc_diff=0.01)

Epoch    1 /    3  | Train Loss: 1.3184 Acc: 0.6889  | Val Loss: 0.9510 Acc: 0.6935  | Elapsed time: 0:17:47.142971
Epoch    2 /    3  | Train Loss: 1.2689 Acc: 0.7005  | Val Loss: 0.9186 Acc: 0.7258  | Elapsed time: 0:35:53.394860
Epoch    3 /    3  | Train Loss: 1.2370 Acc: 0.7089  | Val Loss: 0.8701 Acc: 0.7097  | Elapsed time: 0:54:47.346695


In [67]:
# reverting to 12th epoch. seeing continuous decrease in val acc after 11th
model_data = torch.load(f"{dataset_dir}/checkpoint.butterfly.all.12.pth", weights_only=False)

-----------
# Test

In [63]:
# model_data = torch.load(f"{dataset_dir}/checkpoint.butterfly.all.pth", weights_only=False)
model_data = torch.load(f"{dataset_dir}/checkpoint.butterfly.all.12.pth", weights_only=False)

In [66]:
test_top_k(model_data, f"{dataset_dir}/my-test", 3)
test_top_k(model_data, f"{dataset_dir}/my-test", 5, print_preds=False, print_top1_accuracy=False)
test_top_k(model_data, f"{dataset_dir}/my-test", 10, print_preds=False, print_top1_accuracy=False)

acraea-terpsicore             : [32macraea-terpsicore[0m(0.167)  byblia-ilithyia(0.115)  argynnis-hyperbius(0.061)  
athyma-pravara                : athyma-selenophora(0.498)  athyma-kanwa(0.200)  neptis-clinia(0.047)  
colias-fieldii                : [32mcolias-fieldii[0m(0.986)  colias-erate(0.004)  junonia-hierta(0.002)  
danaus-melanippus             : [32mdanaus-melanippus[0m(0.998)  danaus-genutia(0.002)  danaus-chrysippus(0.000)  
delias-descombesi             : [32mdelias-descombesi[0m(0.993)  delias-pasithoe(0.005)  danaus-chrysippus(0.001)  
euploea-core                  : euploea-sylvester(0.664)  [32meuploea-core[0m(0.249)  euploea-klugii(0.062)  
graphium-doson                : [32mgraphium-doson[0m(0.722)  graphium-chironides(0.098)  tirumala-septentrionis(0.074)  
hypolimnas-bolina             : [32mhypolimnas-bolina[0m(0.978)  thaumantis-diores(0.006)  elymnias-hypermnestra(0.005)  
kallima-inachus               : [32mkallima-inachus[0m(0.995)  junonia-h

In [65]:
test_top_k(model_data, f"{dataset_dir}/random-test", 3)
test_top_k(model_data, f"{dataset_dir}/random-test", 5, print_preds=False, print_top1_accuracy=False)
test_top_k(model_data, f"{dataset_dir}/random-test", 10, print_preds=False, print_top1_accuracy=False)

acraea-terpsicore-2           : [32macraea-terpsicore[0m(0.747)  caprona-alida-early(0.085)  danaus-chrysippus(0.084)  
castalius-rosimon-2           : [32mcastalius-rosimon[0m(0.578)  taraka-hamada(0.357)  phengaris-atroguttata(0.015)  
castalius-rosimon             : [32mcastalius-rosimon[0m(0.964)  caleta-decidia(0.018)  discolampa-ethion(0.006)  
chaetoprocta-odata-2          : [32mchaetoprocta-odata[0m(0.971)  sinthusa-chandrana(0.009)  hypolycaena-erylus(0.006)  
chaetoprocta-odata-3          : [32mchaetoprocta-odata[0m(0.998)  sinthusa-virgo(0.002)  chrysozephyrus-syla(0.000)  
chaetoprocta-odata            : [32mchaetoprocta-odata[0m(0.739)  hypolycaena-erylus(0.074)  hypolycaena-kina(0.031)  
cigaritis-zhengweilie         : [32mcigaritis-zhengweilie[0m(0.998)  cigaritis-elima(0.001)  cigaritis-syama(0.000)  
dodona-dipoea-2               : dodona-eugenes(0.914)  [32mdodona-dipoea[0m(0.085)  dodona-ouida(0.001)  
dodona-dipoea                 : [32mdodona-dipoe