In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import mynnlib
from mynnlib import *

# Generate .pt and classes json

In [4]:
output_dir = "models"
models = {
    "moth": "insect-dataset/moth/checkpoint.moth.tg.ep060001.pth",
    "butterfly": "insect-dataset/butterfly/checkpoint.butterfly.te.ep050000.pth",
    "lepidoptera": "insect-dataset/lepidoptera/checkpoint.lepidoptera.te.ep060004.pth",
    "odonata": "insect-dataset/odonata/checkpoint.odonata.tc.ep060002.pth",
    "cicada": "insect-dataset/cicada/checkpoint.cicada.te.ep060000.pth",
    "root-classifier": "insect-dataset/root-classifier/checkpoint.root-classifier.tj.ep060001.pth"
}

In [None]:
for model_name, model_path in models.items():
    if os.path.exists(model_path):
        print(f"{model_path}")
        model_data = torch.load(model_path, weights_only=False)
        model = model_data['model']
        device = torch.device("cpu")
        model.to(device)
        model.eval()
        scripted_model = torch.jit.script(model)
        scripted_model_path = f"{output_dir}/m.checkpoint.{model_name}.pt"
        scripted_model.save(scripted_model_path)
        print(f" --> {scripted_model_path}")
        class_file_path = f"{output_dir}/classes.{model_name}.json"
        with open(class_file_path, "w", encoding="utf-8") as file:
            json.dump(model_data['class_names'], file, indent=4)
        print(f" --> {class_file_path}")

insect-dataset/moth/checkpoint.moth.tg.ep060001.pth
 --> models/m.checkpoint.moth.pt
 --> models/classes.moth.json
insect-dataset/butterfly/checkpoint.butterfly.te.ep050000.pth
 --> models/m.checkpoint.butterfly.pt
 --> models/classes.butterfly.json
insect-dataset/lepidoptera/checkpoint.lepidoptera.te.ep060004.pth
 --> models/m.checkpoint.lepidoptera.pt
 --> models/classes.lepidoptera.json
insect-dataset/odonata/checkpoint.odonata.tc.ep060002.pth
 --> models/m.checkpoint.odonata.pt
 --> models/classes.odonata.json
insect-dataset/cicada/checkpoint.cicada.te.ep060000.pth
 --> models/m.checkpoint.cicada.pt
 --> models/classes.cicada.json
insect-dataset/root-classifier/checkpoint.root-classifier.tj.ep060001.pth
 --> models/m.checkpoint.root-classifier.pt
 --> models/classes.root-classifier.json


In [6]:
# shutil.copy("models/m.checkpoint.root-classifier.pt", "insect-id-app/app/src/main/assets/m.checkpoint.root-classifier.pt")

In [5]:
metadata = load_json("insect-id-app/metadata.json")
metadata['root-classifier']['classes'] = load_json("models/classes.root-classifier.json")
dump_json("insect-id-app/metadata.json", metadata)
os.remove("models/classes.root-classifier.json")

# Model stats

In [9]:
base_dir = "insect-dataset"
metadata_path = "insect-id-app/metadata.json"

In [10]:
metadata = load_json(metadata_path)
for species_type in ['lepidoptera', 'moth', 'butterfly', 'odonata', 'cicada']:
    data_dir = f"insect-dataset/{species_type}/data"
    if os.path.exists(data_dir):
        if not metadata[species_type]['stats']:
            metadata[species_type]['stats'] = {}
        stats = metadata[species_type]['stats']
        stats['class_count'] = len(os.listdir(f"{data_dir}"))
        stats['species_count'] = len([class_name for class_name in os.listdir(f"{data_dir}") if not re.match(r"^.*-(early|genera|spp)$", class_name)])
        stats['spp_class_count'] = len([class_name for class_name in os.listdir(f"{data_dir}") if re.match(r"^.*-(genera|spp)$", class_name)])
        stats['early_stage_class_count'] = len([class_name for class_name in os.listdir(f"{data_dir}") if re.match(r"^.*-(early)$", class_name)])
        stats['data_count'] = sum([len(os.listdir(f"{data_dir}/{class_name}")) for class_name in os.listdir(f"{data_dir}")])
dump_json(metadata_path, metadata)

In [11]:
min_val_data_cnt = 2
metadata = load_json(metadata_path)
for species_type in ["lepidoptera", "moth", "butterfly", "cicada", "odonata"]:
    print(f"\n{species_type}\n" + ('-' * 30))

    # make val dataset uniform
    dataset_dir = f"{base_dir}/{species_type}"
    for class_name in os.listdir(f"{dataset_dir}/data"):
        if not os.path.exists(f"{dataset_dir}/val/{class_name}"):
            os.makedirs(f"{dataset_dir}/val/{class_name}")
        val_data_cnt = len(os.listdir(f"{dataset_dir}/val/{class_name}"))
        data_to_add = max(0, min_val_data_cnt - val_data_cnt)
        if data_to_add > 0:
            files = os.listdir(f"{dataset_dir}/data/{class_name}")
            random.shuffle(files)
            for file in files[:data_to_add]:
                shutil.copy2(f"{dataset_dir}/data/{class_name}/{file}", f"{dataset_dir}/val/{class_name}/{file}")

    # get accuracy
    model_data = torch.load(models[species_type], weights_only=False)
    top1 = validate_prediction_in_dir_top_k(f"{base_dir}/{species_type}/val", model_data, 1)
    print(f"Top 1 Success: {top1['success']}/{top1['total']} -> {100*top1['success']/top1['total']:.2f}%")
    top3 = validate_prediction_in_dir_top_k(f"{base_dir}/{species_type}/val", model_data, 3)
    print(f"Top 3 Success: {top3['success']}/{top3['total']} -> {100*top3['success']/top3['total']:.2f}%")

    # save in json
    if not metadata[species_type]['stats']:
        metadata[species_type]['stats'] = {}
    stats = metadata[species_type]['stats']
    stats['accuracy'] = f"{100*top1['success']/top1['total']:.2f}%"
    stats['accuracy_top3'] = f"{100*top3['success']/top3['total']:.2f}%"
    dump_json(metadata_path, metadata)


lepidoptera
------------------------------
Top 1 Success: 8777/11106 -> 79.03%
Top 3 Success: 9806/11106 -> 88.29%

moth
------------------------------
Top 1 Success: 5692/6541 -> 87.02%
Top 3 Success: 6204/6541 -> 94.85%

butterfly
------------------------------
Top 1 Success: 3017/3467 -> 87.02%
Top 3 Success: 3302/3467 -> 95.24%

cicada
------------------------------
Top 1 Success: 280/449 -> 62.36%
Top 3 Success: 317/449 -> 70.60%

odonata
------------------------------
Top 1 Success: 1416/1701 -> 83.25%
Top 3 Success: 1589/1701 -> 93.42%
