In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [35]:
import mynnlib
from mynnlib import *

dataset_dir = "insect-dataset/root-classifier"

# Create datasets

In [25]:
target_data_cnt_per_class = 15000

for class_name in ["moth", "butterfly"]:
    class_dir = f"{dataset_dir}/data/{class_name}"
    if os.path.exists(class_dir):
        shutil.rmtree(class_dir)
    src_dir = f"{dataset_dir}/../{class_name}/data"
    data_cnt = sum([len(os.listdir(f"{src_dir}/{species_name}")) for species_name in os.listdir(src_dir)])
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)
    used_data_cnt = 0
    for species_name in os.listdir(src_dir):
        for file in Path(f"{src_dir}/{species_name}").iterdir():
            if file.is_file() and random.random() <= target_data_cnt_per_class/data_cnt:
                used_data_cnt += 1
                shutil.copy(file, f"{class_dir}/{file.name}")
    print(f"class: {class_name:20} | total data count: {data_cnt:10} | used data count: {used_data_cnt:10}")

class: moth                 | total data count:     187506 | used data count:      14804
class: butterfly            | total data count:     124809 | used data count:      14956


In [27]:
other_data_cnt = sum([1 for class_name in os.listdir(f"{dataset_dir}/data/other")])
print(f"class: {'other':20} | used data count: {other_data_cnt:10}")

class: other                | used data count:      14053


In [44]:
other_insect_data_cnt = sum([1 for class_name in os.listdir(f"{dataset_dir}/data/other-insect")])
print(f"class: {'other':20} | used data count: {other_insect_data_cnt:10}")

class: other                | used data count:       4966


# Train
### Model A

In [29]:
training_params = [
    { "idx": 1, "robustness": 0.2, "break_at_val_acc_diff": 0.05},
    { "idx": 2, "robustness": 0.5, "break_at_val_acc_diff": 0.02},
    { "idx": 3, "robustness": 1.0, "break_at_val_acc_diff": 0.01},
    { "idx": 4, "robustness": 2.0, "break_at_val_acc_diff": -0.000001},
    { "idx": 5, "robustness": 2.0, "break_at_val_acc_diff": -0.000001}
]
for param in training_params:
    print(f"Phase {param["idx"]}:")
    if param["idx"] == 1:
        model_data = init_model_for_training(f'{dataset_dir}/data', f'{dataset_dir}/val', 
                                             batch_size=32, arch="resnet18", image_size=224, robustness=param["robustness"],
                                             lr=1e-4, weight_decay=1e-4, silent=True)
    else:
        model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', 
                                            batch_size=32, image_size=224, robustness=param["robustness"], silent=True)
    train(model_data, 5, f"{dataset_dir}/checkpoint.root-classifier.ta.ep{param["idx"]:02}###.pth", 
          break_at_val_acc_diff=param["break_at_val_acc_diff"])

Phase 1:
Epoch    1 /    5  | Train Loss: 0.1766 Acc: 0.9332  | Val Loss: 0.2624 Acc: 0.9305  | Elapsed time: 0:05:49.368873
Epoch    2 /    5  | Train Loss: 0.0988 Acc: 0.9624  | Val Loss: 0.3145 Acc: 0.9109  | Elapsed time: 0:11:26.323126
Phase 2:
Epoch    1 /    5  | Train Loss: 0.2937 Acc: 0.8861  | Val Loss: 0.1139 Acc: 0.9626  | Elapsed time: 0:06:53.654441
Epoch    2 /    5  | Train Loss: 0.2317 Acc: 0.9092  | Val Loss: 0.1865 Acc: 0.9465  | Elapsed time: 0:14:57.317811
Phase 3:
Epoch    1 /    5  | Train Loss: 0.2392 Acc: 0.9052  | Val Loss: 0.3843 Acc: 0.9127  | Elapsed time: 0:05:30.800892
Epoch    2 /    5  | Train Loss: 0.2203 Acc: 0.9146  | Val Loss: 0.1586 Acc: 0.9537  | Elapsed time: 0:11:04.983225
Epoch    3 /    5  | Train Loss: 0.2121 Acc: 0.9169  | Val Loss: 0.1383 Acc: 0.9554  | Elapsed time: 0:16:37.199918
Phase 4:
Epoch    1 /    5  | Train Loss: 0.1971 Acc: 0.9222  | Val Loss: 0.1510 Acc: 0.9519  | Elapsed time: 0:05:31.102094
Epoch    2 /    5  | Train Loss: 0.1

In [30]:
model_data = torch.load(f"{dataset_dir}/checkpoint.root-classifier.ta.ep050001.pth", weights_only=False)

In [41]:
test_class(model_data, f"{dataset_dir}/val", ["moth", "butterfly", "other"])

Class: moth            ----> Success: 143/157 -> 0.91%
Class: butterfly       ----> Success: 314/331 -> 0.95%
Class: other           ----> Success: 542/561 -> 0.97%


### Model B (added other-insect class)

In [46]:
training_params = [
    { "idx": 1, "robustness": 0.2, "break_at_val_acc_diff": 0.05},
    { "idx": 2, "robustness": 0.5, "break_at_val_acc_diff": 0.02},
    { "idx": 3, "robustness": 1.0, "break_at_val_acc_diff": 0.01},
    { "idx": 4, "robustness": 2.0, "break_at_val_acc_diff": -0.000001},
    { "idx": 5, "robustness": 2.0, "break_at_val_acc_diff": -0.000001}
]
for param in training_params:
    print(f"Phase {param["idx"]}:")
    if param["idx"] == 1:
        model_data = init_model_for_training(f'{dataset_dir}/data', f'{dataset_dir}/val', 
                                             batch_size=32, arch="resnet18", image_size=224, robustness=param["robustness"],
                                             lr=1e-4, weight_decay=1e-4, silent=True)
    else:
        model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/val', 
                                            batch_size=32, image_size=224, robustness=param["robustness"], silent=True)
    train(model_data, 5, f"{dataset_dir}/checkpoint.root-classifier.tb.ep{param["idx"]:02}###.pth", 
          break_at_val_acc_diff=param["break_at_val_acc_diff"])

Phase 1:
Epoch    1 /    5  | Train Loss: 0.2307 Acc: 0.9159  | Val Loss: 0.1703 Acc: 0.9458  | Elapsed time: 0:09:20.039842
Epoch    2 /    5  | Train Loss: 0.1274 Acc: 0.9541  | Val Loss: 0.1647 Acc: 0.9445  | Elapsed time: 0:14:37.725237
Phase 2:
Epoch    1 /    5  | Train Loss: 0.3708 Acc: 0.8613  | Val Loss: 0.1759 Acc: 0.9433  | Elapsed time: 0:08:59.943711
Epoch    2 /    5  | Train Loss: 0.2904 Acc: 0.8883  | Val Loss: 0.1090 Acc: 0.9647  | Elapsed time: 0:15:22.880138
Epoch    3 /    5  | Train Loss: 0.2669 Acc: 0.8988  | Val Loss: 0.1150 Acc: 0.9697  | Elapsed time: 0:21:23.838444
Phase 3:
Epoch    1 /    5  | Train Loss: 0.2877 Acc: 0.8902  | Val Loss: 0.1485 Acc: 0.9521  | Elapsed time: 0:08:24.598403
Epoch    2 /    5  | Train Loss: 0.2695 Acc: 0.8981  | Val Loss: 0.1888 Acc: 0.9407  | Elapsed time: 0:15:23.914549
Phase 4:
Epoch    1 /    5  | Train Loss: 0.2578 Acc: 0.9031  | Val Loss: 0.1462 Acc: 0.9571  | Elapsed time: 0:06:00.299434
Epoch    2 /    5  | Train Loss: 0.2

In [47]:
model_data = torch.load(f"{dataset_dir}/checkpoint.root-classifier.tb.ep050002.pth", weights_only=False)

In [48]:
test_class(model_data, f"{dataset_dir}/val", ["moth", "butterfly", "other-insect", "other"])

Class: moth            ----> Success: 138/157 -> 0.88%
Class: butterfly       ----> Success: 308/331 -> 0.93%
Class: other-insect    ----> Success: 537/563 -> 0.95%
Class: other           ----> Success: 765/793 -> 0.96%
