In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import sys
directory = ".."
sys.path.append(directory)

import mynnlib
from mynnlib import *

dataset_dir = "../insect-dataset/lepidoptera"

early_regex = r"^.*-(early)$"
unidentified_regex = r"^.*-(spp|genera|genera-spp)$"
early_or_unidentified_regex = r"^.*-(early|spp|genera|genera-spp)$"

# Analyze dataset and model

In [4]:
model_name = "checkpoint.lepidoptera.te.ep060004.pth"
model_data = torch.load(f"{dataset_dir}/{model_name}", weights_only=False)

In [5]:
test_top_k(model_data, f"{dataset_dir}/test", 3, print_preds=False, print_top1_accuracy=True, print_no_match=False)

Top   1 accuracy: 36/43 -> 83.72%, genus matched: 40/43 -> 93.02%
Top   3 accuracy: 42/43 -> 97.67%, genus matched: 43/43 -> 100.00%


In [6]:
print(f"Class count in model: {model_data['num_classes']}")

Class count in model: 5550


In [7]:
inc_train_seq = 1
inc_data_name = f"incremental-data-{inc_train_seq}"

In [8]:
def print_dataset_stats(dataset_type):
    print(f"\nData stats in \"{dataset_dir}/{dataset_type}\"", '\n' + '-' * 50)
    classes = { class_dir: len([ img for img in os.listdir(f"{dataset_dir}/{dataset_type}/{class_dir}") ]) for class_dir in os.listdir(f"{dataset_dir}/{dataset_type}") }
    early_classes = { class_name: count for class_name, count in classes.items() if re.match(early_regex, class_name) }
    unidentified_classes = { class_name: count for class_name, count in classes.items() if re.match(unidentified_regex, class_name) }
    print(f"Total Class count : {len(classes):6} ( Unidentified: {len(unidentified_classes):6} / Early-stage: {len(early_classes):6} / Identified-adult: {len(classes) - len(unidentified_classes) - len(early_classes):6} )")
    print(f"Total  Data count : {sum(classes.values()):6} ( Unidentified: {sum(unidentified_classes.values()):6} / Early-stage: {sum(early_classes.values()):6} / Identified-adult: {sum(classes.values()) - sum(unidentified_classes.values()) - sum(early_classes.values()):6} )")

print_dataset_stats('data')
print_dataset_stats('val')
print_dataset_stats(inc_data_name)
incremental_classes = { class_dir for class_dir in os.listdir(f"{dataset_dir}/{inc_data_name}") }
print(f"Classes: {incremental_classes}")

classes = { class_dir for class_dir in os.listdir(f"{dataset_dir}/data") }
new_classes = list(classes - set(model_data['class_names']))
print(f"\nNew classes: {new_classes}")


Data stats in "../insect-dataset/lepidoptera/data" 
--------------------------------------------------
Total Class count :   5552 ( Unidentified:    446 / Early-stage:   1027 / Identified-adult:   4079 )
Total  Data count : 324269 ( Unidentified:  12202 / Early-stage:  25105 / Identified-adult: 286962 )

Data stats in "../insect-dataset/lepidoptera/val" 
--------------------------------------------------
Total Class count :    143 ( Unidentified:      1 / Early-stage:      0 / Identified-adult:    142 )
Total  Data count :    331 ( Unidentified:      1 / Early-stage:      0 / Identified-adult:    330 )

Data stats in "../insect-dataset/lepidoptera/incremental-data-1" 
--------------------------------------------------
Total Class count :      3 ( Unidentified:      0 / Early-stage:      0 / Identified-adult:      3 )
Total  Data count :    235 ( Unidentified:      0 / Early-stage:      0 / Identified-adult:    235 )
Classes: {'albinospila-floresaria', 'lophoptera-squammigera', 'campto

In [9]:
for class_name in incremental_classes:
    if not os.path.exists(f"{dataset_dir}/val/{class_name}"):
        os.makedirs(f"{dataset_dir}/val/{class_name}")
    if not os.listdir(f"{dataset_dir}/val/{class_name}"):
        files = os.listdir(f"{dataset_dir}/{inc_data_name}/{class_name}")
        random.shuffle(files)
        for file in files[0:2]:
            shutil.copy(f"{dataset_dir}/{inc_data_name}/{class_name}/{file}", f"{dataset_dir}/val/{class_name}/{file}")

# Test updated training methods

In [11]:
model_data = torch.load(f"{dataset_dir}/{model_name}", weights_only=False)

In [12]:
for file in ['test/acraea-terpsicore.jpg', 'test/graphium-doson.jpg', 'test/troides-minos.jpg', 
             'test2/albinospila-floresaria.jpg', 'test2/lophoptera-squammigera.jpg']:
    print(f"{file} --> {predict_top_k(f'{dataset_dir}/{file}', model_data, 1)}");

test/acraea-terpsicore.jpg --> {'acraea-terpsicore': 0.5028307437896729}
test/graphium-doson.jpg --> {'graphium-doson': 0.9164521098136902}
test/troides-minos.jpg --> {'troides-minos': 0.7493312954902649}
test2/albinospila-floresaria.jpg --> {'comostola-hauensteini': 0.3403162956237793}
test2/lophoptera-squammigera.jpg --> {'lophoptera-spp': 0.9972708821296692}


In [13]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/incremental-data-1', f'{dataset_dir}/val', 
                                    batch_size=32, image_size=224, robustness=0.2, silent=False)

ERROR: 5550 classes in model but 3 classes in train dataset


In [14]:
model_data = prepare_for_incremental_training(model_data, f'{dataset_dir}/incremental-data-1', f'{dataset_dir}/val', 
                                              batch_size=32, image_size=224, robustness=0.2, silent=False)

train class count: 5550 updated to 5552
val class count: 143 synced to train class count 5552
2 new classes added: ['albinospila-floresaria', 'lophoptera-squammigera']
Old Weight Std: tensor(0.0197, device='cuda:0')
Old FC Weight Shape: torch.Size([5550, 2048])
Old FC Weights Mean (Before Copy): -0.030855517834424973
New FC Weights Mean (Before Copy): -1.453034087717242e-06
New FC Weight Mean (After Copy):  -0.030855517834424973
New FC Weight Slice Shape: torch.Size([5550, 2048])
device: cuda:0


In [15]:
for file in ['test/acraea-terpsicore.jpg', 'test/graphium-doson.jpg', 'test/troides-minos.jpg', 
             'test2/albinospila-floresaria.jpg', 'test2/lophoptera-squammigera.jpg']:
    print(f"{file} --> {predict_top_k(f'{dataset_dir}/{file}', model_data, 1)}");

test/acraea-terpsicore.jpg --> {'lophoptera-squammigera': 0.9393853545188904}
test/graphium-doson.jpg --> {'lophoptera-squammigera': 0.8485425710678101}
test/troides-minos.jpg --> {'albinospila-floresaria': 0.9019157886505127}
test2/albinospila-floresaria.jpg --> {'lophoptera-squammigera': 0.6468878984451294}
test2/lophoptera-squammigera.jpg --> {'albinospila-floresaria': 0.9970108270645142}


it is forgetting everything...

# Train

In [16]:
training_params = [
    { "idx": 1, "robustness": 0.2, "break_at_val_acc_diff": 0.05},
    { "idx": 2, "robustness": 0.5, "break_at_val_acc_diff": 0.02},
    { "idx": 3, "robustness": 1.0, "break_at_val_acc_diff": 0.01}
]
for param in training_params:
    print(f"Phase {param["idx"]}:")
    model_data = prepare_for_incremental_training(model_data, f'{dataset_dir}/incremental-data-1', f'{dataset_dir}/val', 
                                                  batch_size=32, image_size=224, robustness=param["robustness"], silent=True)
    train(model_data, 5, f"{dataset_dir}/{model_name}.inc.test.pth", 
          break_at_val_acc_diff=param["break_at_val_acc_diff"])

Phase 1:
Epoch    0 /    4  | Train Loss: 55.7671 Acc: 0.0000  | Val Loss: 37.0219 Acc: 0.0000  | Elapsed time: 0:00:08.143682
Epoch    1 /    4  | Train Loss: 55.7782 Acc: 0.0000  | Val Loss: 38.7608 Acc: 0.0030  | Elapsed time: 0:00:17.034200
Phase 2:
Epoch    0 /    4  | Train Loss: 55.3140 Acc: 0.0000  | Val Loss: 39.8080 Acc: 0.0030  | Elapsed time: 0:00:08.674864
Epoch    1 /    4  | Train Loss: 55.2716 Acc: 0.0000  | Val Loss: 40.6783 Acc: 0.0000  | Elapsed time: 0:00:18.162286
Phase 3:
Epoch    0 /    4  | Train Loss: 55.2689 Acc: 0.0000  | Val Loss: 40.6941 Acc: 0.0000  | Elapsed time: 0:00:09.161697
Epoch    1 /    4  | Train Loss: 55.1161 Acc: 0.0000  | Val Loss: 40.8872 Acc: 0.0000  | Elapsed time: 0:00:19.055705


In [17]:
for file in ['test/acraea-terpsicore.jpg', 'test/graphium-doson.jpg', 'test/troides-minos.jpg', 
             'test2/albinospila-floresaria.jpg', 'test2/lophoptera-squammigera.jpg']:
    print(f"{file} --> {predict_top_k(f'{dataset_dir}/{file}', model_data, 1)}");

test/acraea-terpsicore.jpg --> {'lophoptera-squammigera': 0.9502971172332764}
test/graphium-doson.jpg --> {'lophoptera-squammigera': 0.8710583448410034}
test/troides-minos.jpg --> {'albinospila-floresaria': 0.9235486388206482}
test2/albinospila-floresaria.jpg --> {'lophoptera-squammigera': 0.8892876505851746}
test2/lophoptera-squammigera.jpg --> {'albinospila-floresaria': 0.9614320397377014}
