In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import mynnlib
from mynnlib import *

dataset_dir = "insect-dataset/lepidoptera"

In [11]:
classes = { class_dir: len([ img for img in os.listdir(f"{dataset_dir}/data/{class_dir}") ]) for class_dir in os.listdir(f"{dataset_dir}/data") }
unidentified_classes = { class_name: count for class_name, count in classes.items() if re.match(r"^.*-(spp|genera|genera-spp)$", class_name) }
print(f"Total Class count: {len(classes)} ( Unidentified: {len(unidentified_classes)} / Identified: {len(classes) - len(unidentified_classes)} )")
print(f"Total Data count : {sum(classes.values())} ( Unidentified: {sum(unidentified_classes.values())} / Identified: {sum(classes.values()) - sum(unidentified_classes.values())} )")

Total Class count: 4757 ( Unidentified: 737 / Identified: 4020 )
Total Data count : 111499 ( Unidentified: 15561 / Identified: 95938 )


In [56]:
if os.path.exists(f"{dataset_dir}/val/"):
    shutil.rmtree(f"{dataset_dir}/val/")
for insect_type in ["butterfly", "moth"]:
    for class_dir in os.listdir(f"{dataset_dir}/../{insect_type}/val/"):
        shutil.copytree(f"{dataset_dir}/../{insect_type}/val/{class_dir}", f"{dataset_dir}/val/{class_dir}")

In [57]:
val_data = { class_dir: len([ img for img in os.listdir(f"{dataset_dir}/val/{class_dir}") ]) for class_dir in os.listdir(f"{dataset_dir}/val") }
print(f"Validation Class count: {len(val_data)}")
print(f"Validation Data count : {sum(val_data.values())}")

Validation Class count: 140
Validation Data count : 326


In [58]:
# check classes in val but not in train
set(val_data) - set(classes)

{'vindula-dejone'}

# Older model results

In [46]:
model_data = torch.load(f"{dataset_dir}/../butterfly/checkpoint.butterfly.adult.tb3.ep040000.pth", weights_only=False)

In [47]:
test_top_k(model_data, f"{dataset_dir}/../butterfly/random-test", 3, print_preds=False, print_top1_accuracy=True, print_no_match=False)
test_top_k(model_data, f"{dataset_dir}/../butterfly/random-test", 5, print_preds=False, print_top1_accuracy=False)
test_top_k(model_data, f"{dataset_dir}/../butterfly/random-test", 10, print_preds=False, print_top1_accuracy=False)

Top   1 accuracy: 113/153 -> 73.86%, genus matched: 139/153 -> 90.85%
Top   3 accuracy: 135/153 -> 88.24%, genus matched: 150/153 -> 98.04%
Top   5 accuracy: 139/153 -> 90.85%, genus matched: 152/153 -> 99.35%
Top  10 accuracy: 145/153 -> 94.77%, genus matched: 153/153 -> 100.00%


In [48]:
model_data = torch.load(f"{dataset_dir}/../moth/checkpoint.moth.adult.resnet101.tb5.ep040001.pth", weights_only=False)

In [49]:
test_top_k(model_data, f"{dataset_dir}/../moth/random-test", 3, print_preds=False, print_top1_accuracy=True, print_no_match=False)
test_top_k(model_data, f"{dataset_dir}/../moth/random-test", 5, print_preds=False, print_top1_accuracy=False)
test_top_k(model_data, f"{dataset_dir}/../moth/random-test", 10, print_preds=False, print_top1_accuracy=False)

Top   1 accuracy: 110/152 -> 72.37%, genus matched: 134/152 -> 88.16%
Top   3 accuracy: 131/152 -> 86.18%, genus matched: 141/152 -> 92.76%
Top   5 accuracy: 138/152 -> 90.79%, genus matched: 145/152 -> 95.39%
Top  10 accuracy: 144/152 -> 94.74%, genus matched: 148/152 -> 97.37%


# A. All data / RESNET-152 / lr=1e-4 / weight_decay=1e-4

In [53]:
model_data = init_model_for_training(f'{dataset_dir}/data', f'{dataset_dir}/val', 
                                     batch_size=32, arch="resnet152", image_size=224, robustness=0.2,
                                     lr=1e-4, weight_decay=1e-4)

train class count: 4757
val class count: 152
feature count: 2048
device: cuda:0


In [54]:
train(model_data, 5, f"{dataset_dir}/checkpoint.lepidoptera.ta.ep01###.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    5  | Train Loss: 3.0172 Acc: 0.5240  | Val Loss: 2.0989 Acc: 0.5289  | Elapsed time: 0:30:18.628318
Epoch    2 /    5  | Train Loss: 0.6855 Acc: 0.8394  | Val Loss: 1.8782 Acc: 0.5714  | Elapsed time: 1:00:26.706301


In [55]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', batch_size=32, image_size=224, robustness=0.5)

train class count: 4757
val class count: 152
0 new classes added: []
feature count: 2048
device: cuda:0


In [56]:
train(model_data, 5, f"{dataset_dir}/checkpoint.lepidoptera.ta.ep02###.pth", break_at_val_acc_diff=0.02)

Epoch    1 /    5  | Train Loss: 1.6657 Acc: 0.6186  | Val Loss: 1.5444 Acc: 0.6261  | Elapsed time: 0:29:40.059155
Epoch    2 /    5  | Train Loss: 1.3415 Acc: 0.6857  | Val Loss: 1.7768 Acc: 0.5988  | Elapsed time: 0:58:08.351487


In [58]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', batch_size=32, image_size=224, robustness=1.0)

train class count: 4757
val class count: 152
0 new classes added: []
feature count: 2048
device: cuda:0


In [60]:
train(model_data, 5, f"{dataset_dir}/checkpoint.lepidoptera.ta.ep03###.pth", break_at_val_acc_diff=0.01)

Epoch    1 /    5  | Train Loss: 1.2208 Acc: 0.7149  | Val Loss: 1.7936 Acc: 0.6140  | Elapsed time: 0:32:31.300956
Epoch    2 /    5  | Train Loss: 1.1547 Acc: 0.7311  | Val Loss: 1.8220 Acc: 0.6201  | Elapsed time: 1:01:34.793630


In [61]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', batch_size=32, image_size=224, robustness=2.0)

train class count: 4757
val class count: 152
0 new classes added: []
feature count: 2048
device: cuda:0


In [62]:
train(model_data, 5, f"{dataset_dir}/checkpoint.lepidoptera.ta.ep04###.pth", break_at_val_acc_diff=0.005)

Epoch    1 /    5  | Train Loss: 1.1848 Acc: 0.7245  | Val Loss: 1.7727 Acc: 0.6292  | Elapsed time: 0:28:31.415015
Epoch    2 /    5  | Train Loss: 0.9349 Acc: 0.7827  | Val Loss: 1.7292 Acc: 0.6383  | Elapsed time: 0:57:28.936496
Epoch    3 /    5  | Train Loss: 0.8609 Acc: 0.8007  | Val Loss: 1.8540 Acc: 0.6049  | Elapsed time: 1:26:37.883498


In [50]:
model_data = torch.load(f"{dataset_dir}/checkpoint.lepidoptera.ta.ep040001.pth", weights_only=False)

In [51]:
test_top_k(model_data, f"{dataset_dir}/../butterfly/random-test", 3, print_preds=False, print_top1_accuracy=True, print_no_match=False)
test_top_k(model_data, f"{dataset_dir}/../butterfly/random-test", 5, print_preds=False, print_top1_accuracy=False)
test_top_k(model_data, f"{dataset_dir}/../butterfly/random-test", 10, print_preds=False, print_top1_accuracy=False)

Top   1 accuracy: 111/153 -> 72.55%, genus matched: 132/153 -> 86.27%
Top   3 accuracy: 132/153 -> 86.27%, genus matched: 147/153 -> 96.08%
Top   5 accuracy: 137/153 -> 89.54%, genus matched: 150/153 -> 98.04%
Top  10 accuracy: 142/153 -> 92.81%, genus matched: 152/153 -> 99.35%


In [52]:
test_top_k(model_data, f"{dataset_dir}/../moth/random-test", 3, print_preds=False, print_top1_accuracy=True, print_no_match=False)
test_top_k(model_data, f"{dataset_dir}/../moth/random-test", 5, print_preds=False, print_top1_accuracy=False)
test_top_k(model_data, f"{dataset_dir}/../moth/random-test", 10, print_preds=False, print_top1_accuracy=False)

Top   1 accuracy: 84/152 -> 55.26%, genus matched: 103/152 -> 67.76%
Top   3 accuracy: 113/152 -> 74.34%, genus matched: 121/152 -> 79.61%
Top   5 accuracy: 120/152 -> 78.95%, genus matched: 127/152 -> 83.55%
Top  10 accuracy: 126/152 -> 82.89%, genus matched: 131/152 -> 86.18%


# B. All data / RESNET-101 / lr=1e-4 / weight_decay=1e-4

In [4]:
model_data = init_model_for_training(f'{dataset_dir}/data', f'{dataset_dir}/val', 
                                     batch_size=32, arch="resnet101", image_size=224, robustness=0.2,
                                     lr=1e-4, weight_decay=1e-4)

train class count: 4757
val class count: 152
feature count: 2048
device: cuda:0


In [5]:
train(model_data, 5, f"{dataset_dir}/checkpoint.lepidoptera.tb.ep01###.pth", break_at_val_acc_diff=0.05)

Epoch    1 /    5  | Train Loss: 3.0888 Acc: 0.4991  | Val Loss: 2.0238 Acc: 0.5228  | Elapsed time: 0:24:52.136037
Epoch    2 /    5  | Train Loss: 0.7449 Acc: 0.8240  | Val Loss: 1.8942 Acc: 0.5471  | Elapsed time: 0:49:32.310488


In [6]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', batch_size=32, image_size=224, robustness=0.5)

train class count: 4757
val class count: 152
0 new classes added: []
feature count: 2048
device: cuda:0


In [7]:
train(model_data, 5, f"{dataset_dir}/checkpoint.lepidoptera.tb.ep02###.pth", break_at_val_acc_diff=0.02)

Epoch    1 /    5  | Train Loss: 1.6470 Acc: 0.6245  | Val Loss: 1.5946 Acc: 0.5957  | Elapsed time: 0:25:35.573505
Epoch    2 /    5  | Train Loss: 1.3504 Acc: 0.6857  | Val Loss: 1.7768 Acc: 0.5775  | Elapsed time: 0:50:47.240504


In [8]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', batch_size=32, image_size=224, robustness=1.0)

train class count: 4757
val class count: 152
0 new classes added: []
feature count: 2048
device: cuda:0


In [9]:
train(model_data, 5, f"{dataset_dir}/checkpoint.lepidoptera.tb.ep03###.pth", break_at_val_acc_diff=0.01)

Epoch    1 /    5  | Train Loss: 1.2958 Acc: 0.6983  | Val Loss: 1.8237 Acc: 0.5897  | Elapsed time: 0:24:53.372874
Epoch    2 /    5  | Train Loss: 1.2082 Acc: 0.7180  | Val Loss: 1.7294 Acc: 0.6109  | Elapsed time: 0:49:55.193874
Epoch    3 /    5  | Train Loss: 1.1559 Acc: 0.7300  | Val Loss: 1.9144 Acc: 0.5684  | Elapsed time: 1:14:59.372581


In [10]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', batch_size=32, image_size=224, robustness=2.0)

train class count: 4757
val class count: 152
0 new classes added: []
feature count: 2048
device: cuda:0


In [11]:
train(model_data, 5, f"{dataset_dir}/checkpoint.lepidoptera.tb.ep04###.pth", break_at_val_acc_diff=0.005)

Epoch    1 /    5  | Train Loss: 1.0109 Acc: 0.7653  | Val Loss: 1.8186 Acc: 0.5897  | Elapsed time: 0:25:05.243520
Epoch    2 /    5  | Train Loss: 0.9216 Acc: 0.7859  | Val Loss: 1.7117 Acc: 0.6140  | Elapsed time: 0:50:12.299412
Epoch    3 /    5  | Train Loss: 0.8822 Acc: 0.7949  | Val Loss: 1.6557 Acc: 0.6109  | Elapsed time: 1:15:22.579944


In [53]:
model_data = torch.load(f"{dataset_dir}/checkpoint.lepidoptera.tb.ep040001.pth", weights_only=False)

In [54]:
test_top_k(model_data, f"{dataset_dir}/../butterfly/random-test", 3, print_preds=False, print_top1_accuracy=True, print_no_match=False)
test_top_k(model_data, f"{dataset_dir}/../butterfly/random-test", 5, print_preds=False, print_top1_accuracy=False)
test_top_k(model_data, f"{dataset_dir}/../butterfly/random-test", 10, print_preds=False, print_top1_accuracy=False)

Top   1 accuracy: 104/153 -> 67.97%, genus matched: 126/153 -> 82.35%
Top   3 accuracy: 130/153 -> 84.97%, genus matched: 145/153 -> 94.77%
Top   5 accuracy: 135/153 -> 88.24%, genus matched: 148/153 -> 96.73%
Top  10 accuracy: 142/153 -> 92.81%, genus matched: 152/153 -> 99.35%


In [55]:
test_top_k(model_data, f"{dataset_dir}/../moth/random-test", 3, print_preds=False, print_top1_accuracy=True, print_no_match=False)
test_top_k(model_data, f"{dataset_dir}/../moth/random-test", 5, print_preds=False, print_top1_accuracy=False)
test_top_k(model_data, f"{dataset_dir}/../moth/random-test", 10, print_preds=False, print_top1_accuracy=False)

Top   1 accuracy: 83/152 -> 54.61%, genus matched: 110/152 -> 72.37%
Top   3 accuracy: 119/152 -> 78.29%, genus matched: 131/152 -> 86.18%
Top   5 accuracy: 123/152 -> 80.92%, genus matched: 132/152 -> 86.84%
Top  10 accuracy: 128/152 -> 84.21%, genus matched: 136/152 -> 89.47%
