# Target
1. Try with resnet-152 instead of resnet-18
2. Save all training data instead of only state_dict in .pth

# Prerequisites

In [26]:
# !pip install torchviz tensorflow

In [1]:
dataset_dir = "insect-dataset/moth"

In [21]:
import shutil
import os
import time
import datetime
import random
import numpy as np
from pathlib import Path
from PIL import Image
import pprint
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision import models
import torch.nn as nn
import torch.nn.functional as F

In [22]:
def split_data_for_train_and_val(data_dir, test_dir, val_dir, train_dir, test_data_weight, val_data_weight, min_file_cnt_for_val):
    if os.path.exists(test_dir):
        shutil.rmtree(test_dir)
    if os.path.exists(val_dir):
        shutil.rmtree(val_dir)
    if os.path.exists(train_dir):
        shutil.rmtree(train_dir)

    train_data_cnt = 0
    val_data_cnt = 0
    test_data_cnt = 0
    class_cnt = 0
    
    for class_dir in Path(data_dir).iterdir():
        if class_dir.is_dir() and os.listdir(class_dir):
            class_cnt = class_cnt + 1
            file_count = sum(1 for file in class_dir.iterdir() if file.is_file())
            for file in Path(class_dir).iterdir():
                if file.is_file():
                    random_float = random.random()
                    class_dir_name = class_dir.name
                    if file_count >= min_file_cnt_for_val and random_float < test_data_weight:
                        target_dir = test_dir
                        test_data_cnt = test_data_cnt + 1
                    elif file_count >= min_file_cnt_for_val and random_float < test_data_weight + val_data_weight:
                        target_dir = val_dir
                        val_data_cnt = val_data_cnt + 1
                    else:
                        target_dir = train_dir
                        train_data_cnt = train_data_cnt + 1
                    target_dir_path = f"{target_dir}/{class_dir_name}"
                    if not os.path.exists(target_dir_path):
                        os.makedirs(target_dir_path)
                    shutil.copy(file, target_dir_path)

    print(f"Class count: {class_cnt}")
    print(f"Training data count: {train_data_cnt}")
    print(f"Validation data count: {val_data_cnt}")
    print(f"Test data count: {test_data_cnt}")

In [155]:
def init_model_for_training(train_dir, val_dir, batch_size, arch='resnet18'):
    transform = {
        'train': transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.CenterCrop((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
        'val': transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.CenterCrop((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
    }
    training_datasets = {
        'train': datasets.ImageFolder(root=train_dir, transform=transform['train']),
        'val': datasets.ImageFolder(root=val_dir, transform=transform['val']),
    }
    if len(training_datasets['val'].class_to_idx) != len(training_datasets['train'].class_to_idx):
        training_datasets['val'].class_to_idx =  training_datasets['train'].class_to_idx
        new_val_samples = []
        for path, label in training_datasets['val'].samples:
            class_name = training_datasets['val'].classes[label]
            if class_name in training_datasets['train'].class_to_idx:
                new_val_samples.append((path, training_datasets['train'].class_to_idx[class_name]))
        training_datasets['val'].samples = new_val_samples
        
    dataloaders = {
        'train': DataLoader(training_datasets['train'], batch_size=batch_size, shuffle=True),
        'val': DataLoader(training_datasets['val'], batch_size=batch_size, shuffle=False),
    }
    class_names = training_datasets['train'].classes

    if arch == 'resnet152':
        model = models.resnet152(weights=models.ResNet152_Weights.DEFAULT)
    elif arch == 'resnet50':
        model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    else:
        model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        
    num_classes = len(class_names)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    return {
        'model': model,
        'device': device,
        'transform': transform,
        'datasets': training_datasets,
        'dataloaders': dataloaders,
        'class_names': class_names,
        'num_classes': num_classes,
        'num_features': num_features,
        'criterion': criterion,
        'optimizer': optimizer,
        'scheduler': scheduler
    }

In [24]:
def train(model_data, num_epochs, model_path, phases=['train', 'val']):
    start_time = time.time()
    for epoch in range(num_epochs):
        print(f"Epoch {(epoch+1):4} / {num_epochs:4}", end=' ')
        for phase in phases:
            if phase == 'train':
                model_data['model'].train()
            else:
                model_data['model'].eval()
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in model_data['dataloaders'][phase]:
                inputs, labels = inputs.to(model_data['device']), labels.to(model_data['device'])
                model_data['optimizer'].zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model_data['model'](inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = model_data['criterion'](outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        model_data['optimizer'].step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
    
            epoch_loss = running_loss / len(model_data['datasets'][phase])
            epoch_acc = running_corrects.double() / len(model_data['datasets'][phase])
            print(f" | {phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}", end=' ')
            if phase == 'train':
                model_data['scheduler'].step()
        print(f" | Elapsed time: {datetime.timedelta(seconds=(time.time() - start_time))}")
        torch.save(model_data, model_path)

In [25]:
def predict(image_path, model_data):
    image = Image.open(image_path).convert("RGB")
    image = model_data['transform']['val'](image).unsqueeze(0).to(model_data['device'])
    with torch.no_grad():
        outputs = model_data['model'](image)
        _, preds = torch.max(outputs, 1)
    try:
        return model_data['class_names'][preds[0]]
    except (Exception):
        return None

def predict_top_k(image_path, model_data, k):
    image = Image.open(image_path).convert("RGB")
    image = model_data['transform']['val'](image).unsqueeze(0).to(model_data['device'])
    with torch.no_grad():
        outputs = model_data['model'](image)
        probabilities = F.softmax(outputs, dim=1)
        top_probs, top_indices = torch.topk(probabilities, k)
    try:
        return {model_data['class_names'][top_indices[0][i]]: top_probs[0][i].item() for i in range(0, k)}
    except (Exception):
        return None

In [26]:
def validate_prediction_in_dir(test_dir, model_data):
    total = 0
    success = 0
    failures = {}
    for species_dir in Path(test_dir).iterdir():
        if species_dir.is_dir():
            for file in Path(f"{species_dir}").iterdir():
                if file.is_file():
                    species = file.parts[-2]
                    prediction = predict(file, model_data)
                    is_success = (species==prediction)
                    if not is_success:
                        failures[species] = prediction
                    total = total + 1
                    if is_success:
                        success = success + 1
    return {
        'total': total, 
        'success': success,
        'failures': failures
    }

def test(model_data, test_dir, print_failures=True):
    model_data['model'].eval()
    start_time = time.time()
    prediction = validate_prediction_in_dir(test_dir, model_data)
    print(f"Accuracy: {prediction['success']} / {prediction['total']} -> {100*prediction['success']/prediction['total']:.2f}%")
    print(f"Elapsed time: {datetime.timedelta(seconds=(time.time() - start_time))}")
    if print_failures:
        print("-"*10)
        print("Failures:")
        pprint.pprint(prediction['failures'])

In [35]:
def test_top_k(model_data, test_dir, k, print_preds=True, print_accuracy=True):
    model_data['model'].eval()
    top1_success_cnt = 0
    success_cnt = 0
    total_cnt = 0
    for file in Path(test_dir).iterdir():
        if print_preds:
            print(f"{file.name.split('.')[0]:25}:", end=' ')
        total_cnt = total_cnt + 1
        probs = predict_top_k(file, model_data, k)
        for pred, prob in probs.items():
            if pred in file.name:
                success_cnt = success_cnt + 1
            if print_preds:
                print(f"{pred}({prob:.3f}) ", end=' ')
        if [pred for pred, prob in probs.items()][0] in file.name:
            top1_success_cnt = top1_success_cnt + 1
        if print_preds:
            print()
    if print_accuracy:
        if print_preds:
            print("-"*10)
        print(f"Top {k} accuracy: {success_cnt} / {total_cnt} -> {success_cnt/total_cnt:.3f}")
        print(f"Top 1 accuracy: {top1_success_cnt} / {total_cnt} -> {top1_success_cnt/total_cnt:.3f}")

In [28]:
def extract_proto_dataset(data_dir, proto_data_dir, limit):
    file_cnt = 0
    for class_dir in Path(data_dir).iterdir():
        if class_dir.is_dir() and os.listdir(class_dir):
            file_count = sum(1 for file in class_dir.iterdir() if file.is_file())
            class_dir_name = class_dir.name
            for file in Path(class_dir).iterdir():
                if file.is_file():
                    target_dir_path = f"{proto_data_dir}/{class_dir_name}"
                    if not os.path.exists(target_dir_path):
                        os.makedirs(target_dir_path)
                    shutil.copy(file, target_dir_path)
                    file_cnt = file_cnt + 1
                    if(file_cnt >= limit):
                        return

In [156]:
def prepare_for_retraining(model_data, train_dir, val_dir, batch_size):
    model_data['transform'] = {
        'train': transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.CenterCrop((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
        'val': transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.CenterCrop((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
    }
    
    model_data['datasets'] = {
        'train': datasets.ImageFolder(root=train_dir, transform=model_data['transform']['train']),
        'val': datasets.ImageFolder(root=val_dir, transform=model_data['transform']['val']),
    }
    if len(model_data['datasets']['val'].class_to_idx) != len(model_data['datasets']['train'].class_to_idx):
        model_data['datasets']['val'].class_to_idx =  model_data['datasets']['train'].class_to_idx
        new_val_samples = []
        for path, label in model_data['datasets']['val'].samples:
            class_name = model_data['datasets']['val'].classes[label]
            if class_name in model_data['datasets']['train'].class_to_idx:
                new_val_samples.append((path, model_data['datasets']['train'].class_to_idx[class_name]))
        model_data['datasets']['val'].samples = new_val_samples
        
    model_data['dataloaders'] = {
        'train': DataLoader(model_data['datasets']['train'], batch_size=batch_size, shuffle=True),
        'val': DataLoader(model_data['datasets']['val'], batch_size=batch_size, shuffle=False),
    }
    
    for class_name in model_data['datasets']['train'].classes:
        if class_name not in model_data['class_names']:
            model_data['class_names'].append(class_name)
    old_num_classes = model_data['num_classes']
    model_data['num_classes'] = len(model_data['class_names'])
    
    old_fc_weights = model_data['model'].fc.weight.data[:old_num_classes]
    model_data['model'].fc = nn.Linear(model_data['num_features'], model_data['num_classes'])
    model_data['model'].fc.weight.data[:old_num_classes] = old_fc_weights
    
    model_data['device'] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model_data['model'] = model_data['model'].to(model_data['device'])
    
    return model_data;

# Extract small dataset and train and test

In [18]:
extract_proto_dataset(f"{dataset_dir}/data", f"{dataset_dir}/proto/data", 5000)

In [19]:
split_data_for_train_and_val(f"{dataset_dir}/proto/data", f"{dataset_dir}/proto/test", f"{dataset_dir}/proto/val", f"{dataset_dir}/proto/train", 0.1, 0.2, 4)

Class count: 335
Training data count: 3586
Validation data count: 941
Test data count: 473


In [20]:
model_data = init_model_for_training(f'{dataset_dir}/proto/train', f'{dataset_dir}/proto/val', 32, 'resnet152')
print(f"device: {model_data['device']}")
print(f"num_classes: {model_data['num_classes']}")
print(f"num_features: {model_data['num_features']}")

device: cuda:0
num_classes: 335
num_features: 2048


In [None]:
train(model_data, 25, f"{dataset_dir}/proto/checkpoint_latest.pth")
shutil.copy(f"{dataset_dir}/proto/checkpoint_latest.pth", f"{dataset_dir}/proto/checkpoint_{int(time.time())}.pth")

In [None]:
test(f"{dataset_dir}/proto/checkpoint_latest.pth", f"{dataset_dir}/proto/test", False)

# Try full dataset

In [12]:
split_data_for_train_and_val(f"{dataset_dir}/data", f"{dataset_dir}/full/test", f"{dataset_dir}/full/val", f"{dataset_dir}/full/train", 0.1, 0.2, 4)

Class count: 3051
Training data count: 31622
Validation data count: 8466
Test data count: 4237


In [13]:
model_data = init_model_for_training(f'{dataset_dir}/full/train', f'{dataset_dir}/full/val', 32, 'resnet152')
print(f"device: {model_data['device']}")
print(f"num_classes: {model_data['num_classes']}")
print(f"num_features: {model_data['num_features']}")

device: cuda:0
num_classes: 3047
num_features: 2048


### Train 2 epochs with 70% train, 20% val, 10% test data

In [14]:
train(model_data, 2, f"{dataset_dir}/full/checkpoint_latest.pth")
shutil.copy(f"{dataset_dir}/full/checkpoint_latest.pth", f"{dataset_dir}/full/checkpoint_{int(time.time())}.pth")

Epoch    1 /    2  | Train Loss: 7.0484 Acc: 0.0277  | Val Loss: 11.1510 Acc: 0.0004  | Elapsed time: 0:13:02.099367
Epoch    2 /    2  | Train Loss: 5.6067 Acc: 0.0975  | Val Loss: 13.5868 Acc: 0.0000  | Elapsed time: 0:22:24.161010


'insect-dataset/moth/full/checkpoint_1737977932.pth'

In [16]:
test(model_data, f"{dataset_dir}/full/test", False)

Accuracy: 529 / 4237 -> 12.49%
Elapsed time: 0:02:16.021992


#### Load & verify file saved properly

In [38]:
model_data = torch.load(f"{dataset_dir}/full/checkpoint_latest.pth", weights_only=False)

In [39]:
test(model_data, f"{dataset_dir}/full/test", False)

Accuracy: 529 / 4237 -> 12.49%
Elapsed time: 0:02:11.483563


### Train 8 more epochs

In [40]:
train(model_data, 8, f"{dataset_dir}/full/checkpoint_latest.pth")
shutil.copy(f"{dataset_dir}/full/checkpoint_latest.pth", f"{dataset_dir}/full/checkpoint_{int(time.time())}.pth")

Epoch    1 /    8  | Train Loss: 4.3428 Acc: 0.2076  | Val Loss: 17.1842 Acc: 0.0000  | Elapsed time: 0:09:12.678239
Epoch    2 /    8  | Train Loss: 3.1974 Acc: 0.3325  | Val Loss: 19.7847 Acc: 0.0000  | Elapsed time: 0:18:26.308181
Epoch    3 /    8  | Train Loss: 2.3313 Acc: 0.4657  | Val Loss: 22.8052 Acc: 0.0001  | Elapsed time: 0:27:44.034766
Epoch    4 /    8  | Train Loss: 1.7133 Acc: 0.5855  | Val Loss: 25.9527 Acc: 0.0000  | Elapsed time: 0:36:55.719091
Epoch    5 /    8  | Train Loss: 1.2721 Acc: 0.6771  | Val Loss: 27.9397 Acc: 0.0001  | Elapsed time: 0:46:16.316828
Epoch    6 /    8  | Train Loss: 0.6752 Acc: 0.8315  | Val Loss: 29.9066 Acc: 0.0000  | Elapsed time: 0:55:28.904590
Epoch    7 /    8  | Train Loss: 0.5179 Acc: 0.8727  | Val Loss: 31.0145 Acc: 0.0000  | Elapsed time: 1:04:42.652712
Epoch    8 /    8  | Train Loss: 0.4426 Acc: 0.8921  | Val Loss: 31.9842 Acc: 0.0000  | Elapsed time: 1:14:02.674499


'insect-dataset/moth/full/checkpoint_1737984137.pth'

In [41]:
test(model_data, f"{dataset_dir}/full/test", False)

Accuracy: 2397 / 4237 -> 56.57%
Elapsed time: 0:02:09.862823


In [42]:
test(model_data, f"{dataset_dir}/full/val", False)

Accuracy: 4727 / 8466 -> 55.84%
Elapsed time: 0:04:13.736466


In [43]:
test_top_k(model_data, f"{dataset_dir}/my-test", 4)

artena-dotata-2          : artena-dotata(0.856)  nephele-hespera(0.073)  mocis-frugalis(0.021)  helicoverpa-armigera(0.019)  
artena-dotata            : artena-dotata(0.854)  carea-angulata(0.055)  simplicia-schaldusalis(0.039)  ampelophaga-rubiginosa(0.022)  
artena-submira-2         : poaphilini-genera-spp(0.561)  acosmeryx-shervillii(0.145)  achaea-janata(0.073)  mocis-undata(0.052)  
artena-submira           : artena-dotata(0.389)  artena-submira(0.217)  thyas-coronata(0.118)  oporophylla-ustulata(0.077)  
clanis-phalaris-2        : theretra-alecto(0.183)  rhagastis-castanea(0.154)  clanis-titan(0.107)  hippotion-rafflesii(0.105)  
clanis-phalaris          : hippotion-rosetta(0.677)  theretra-alecto(0.124)  theretra-clotho(0.093)  palirisa-cervina(0.023)  
clanis-undulosa          : theretra-alecto(0.741)  clanis-titan(0.104)  theretra-nessus(0.050)  theretra-clotho(0.036)  
conogethes-sahyadriensis : conogethes-spp(0.664)  herpetogramma-cynaralis(0.192)  conogethes-punctiferalis(0

### Train 15 more epochs

In [44]:
train(model_data, 15, f"{dataset_dir}/full/checkpoint_latest.pth")
shutil.copy(f"{dataset_dir}/full/checkpoint_latest.pth", f"{dataset_dir}/full/checkpoint_{int(time.time())}.pth")

Epoch    1 /   15  | Train Loss: 0.3784 Acc: 0.9077  | Val Loss: 32.3950 Acc: 0.0000  | Elapsed time: 0:09:29.600801
Epoch    2 /   15  | Train Loss: 0.3337 Acc: 0.9187  | Val Loss: 33.3562 Acc: 0.0000  | Elapsed time: 0:19:06.111443
Epoch    3 /   15  | Train Loss: 0.2815 Acc: 0.9333  | Val Loss: 33.4328 Acc: 0.0000  | Elapsed time: 0:28:21.974020
Epoch    4 /   15  | Train Loss: 0.2407 Acc: 0.9458  | Val Loss: 34.6437 Acc: 0.0000  | Elapsed time: 0:37:35.404415
Epoch    5 /   15  | Train Loss: 0.1888 Acc: 0.9616  | Val Loss: 34.2778 Acc: 0.0000  | Elapsed time: 0:46:47.961458
Epoch    6 /   15  | Train Loss: 0.1789 Acc: 0.9636  | Val Loss: 34.9573 Acc: 0.0000  | Elapsed time: 0:56:01.920862
Epoch    7 /   15  | Train Loss: 0.1710 Acc: 0.9667  | Val Loss: 34.7931 Acc: 0.0000  | Elapsed time: 1:05:14.638715
Epoch    8 /   15  | Train Loss: 0.1644 Acc: 0.9684  | Val Loss: 34.6984 Acc: 0.0000  | Elapsed time: 1:14:26.601579
Epoch    9 /   15  | Train Loss: 0.1603 Acc: 0.9694  | Val Loss:

'insect-dataset/moth/full/checkpoint_1737992855.pth'

In [45]:
test(model_data, f"{dataset_dir}/full/test", False)

Accuracy: 2473 / 4237 -> 58.37%
Elapsed time: 0:02:14.230098


In [50]:
test(model_data, f"{dataset_dir}/full/val", False)

Accuracy: 6006 / 8466 -> 70.94%
Elapsed time: 0:04:01.650916


In [52]:
test(model_data, f"{dataset_dir}/full/train", False)

Accuracy: 22947 / 31622 -> 72.57%
Elapsed time: 0:15:46.898181


In [53]:
test_top_k(model_data, f"{dataset_dir}/my-test", 4)

artena-dotata-2          : artena-dotata(0.752)  simplicia-spp(0.038)  nephele-hespera(0.032)  achaea-serva(0.024)  
artena-dotata            : artena-dotata(0.566)  sympis-rufibasis(0.047)  Odontopera-spp(0.045)  odontopera-bilinearia(0.043)  
artena-submira-2         : artena-submira(0.522)  pseudojana-incandescens(0.072)  calyptra-spp(0.056)  lebeda-nobilis(0.029)  
artena-submira           : artena-dotata(0.443)  artena-submira(0.138)  poaphilini-genera-spp(0.035)  lyssa-zampa(0.034)  
clanis-phalaris-2        : theretra-alecto(0.502)  theretra-clotho(0.065)  marumba-cristata(0.052)  clanis-titan(0.037)  
clanis-phalaris          : hippotion-rosetta(0.238)  theretra-alecto(0.202)  theretra-pallicosta(0.148)  theretra-clotho(0.072)  
clanis-undulosa          : clanis-titan(0.527)  marumba-cristata(0.132)  clanis-undulosa(0.099)  theretra-alecto(0.038)  
conogethes-sahyadriensis : conogethes-spp(0.352)  conogethes-punctiferalis(0.227)  psyra-cuneata(0.209)  pycnarmon-cribrata(0.065) 

### Train with all 100% data for 10 epochs

In [54]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/full/val', 32)
print(f"device: {model_data['device']}")
print(f"num_classes: {model_data['num_classes']}")
print(f"num_features: {model_data['num_features']}")

device: cuda:0
num_classes: 3051
num_features: 2048


In [55]:
train(model_data, 10, f"{dataset_dir}/full/checkpoint_latest.pth", phases=['train'])
shutil.copy(f"{dataset_dir}/full/checkpoint_latest.pth", f"{dataset_dir}/full/checkpoint_{int(time.time())}.pth")

Epoch    1 /   10  | Train Loss: 3.5844 Acc: 0.4054  | Elapsed time: 0:10:56.711135
Epoch    2 /   10  | Train Loss: 1.1778 Acc: 0.7077  | Elapsed time: 0:21:58.167607
Epoch    3 /   10  | Train Loss: 0.6566 Acc: 0.8163  | Elapsed time: 0:33:00.221350
Epoch    4 /   10  | Train Loss: 0.4229 Acc: 0.8757  | Elapsed time: 0:44:02.327792
Epoch    5 /   10  | Train Loss: 0.3086 Acc: 0.9091  | Elapsed time: 0:55:04.145452
Epoch    6 /   10  | Train Loss: 0.2512 Acc: 0.9236  | Elapsed time: 1:06:06.113104
Epoch    7 /   10  | Train Loss: 0.1971 Acc: 0.9400  | Elapsed time: 1:17:08.019445
Epoch    8 /   10  | Train Loss: 0.0678 Acc: 0.9820  | Elapsed time: 1:28:10.299961
Epoch    9 /   10  | Train Loss: 0.0304 Acc: 0.9930  | Elapsed time: 1:39:13.228200
Epoch   10 /   10  | Train Loss: 0.0215 Acc: 0.9951  | Elapsed time: 1:50:15.626157


'insect-dataset/moth/full/checkpoint_1738002319.pth'

In [56]:
test(model_data, f"{dataset_dir}/full/test", False)

Accuracy: 4231 / 4237 -> 99.86%
Elapsed time: 0:02:09.365678


In [57]:
test(model_data, f"{dataset_dir}/full/val", False)

Accuracy: 8456 / 8466 -> 99.88%
Elapsed time: 0:04:09.827129


In [58]:
test(model_data, f"{dataset_dir}/full/train", False)

Accuracy: 31577 / 31622 -> 99.86%
Elapsed time: 0:15:26.177914


#### All data have been trained on. only "my-test" has unseen data now

In [89]:
test_top_k(model_data, f"{dataset_dir}/my-test", 4)

artena-dotata-2          : artena-dotata(1.000)  artena-submira(0.000)  achaea-serva(0.000)  clanidopsis-exusta(0.000)  
artena-dotata            : artena-dotata(1.000)  bastilla-praetermissa(0.000)  bastilla-crameri(0.000)  artena-submira(0.000)  
artena-submira-2         : artena-submira(0.633)  poaphilini-genera-spp(0.189)  mocis-undata(0.070)  thyas-coronata(0.030)  
artena-submira           : artena-dotata(0.499)  artena-submira(0.275)  thyas-coronata(0.122)  episparis-tortuosalis(0.055)  
clanis-phalaris-2        : clanis-phalaris(0.564)  acosmeryx-pseudonaga(0.200)  marumba-irata(0.139)  clanidopsis-exusta(0.042)  
clanis-phalaris          : theretra-clotho(0.722)  clanis-bilineata(0.168)  clanis-phalaris(0.029)  theretra-pallicosta(0.024)  
clanis-undulosa          : clanis-titan(0.387)  clanis-undulosa(0.338)  ambulyx-belli(0.147)  theretra-alecto(0.040)  
conogethes-sahyadriensis : conogethes-spp(0.673)  argina-astrea(0.279)  conogethes-punctiferalis(0.018)  pycnarmon-cribrat

In [90]:
test_top_k(model_data, f"{dataset_dir}/my-test", 10, print_preds=False)

Top 10 accuracy: 19 / 22 -> 0.864
Top 1 accuracy: 12 / 22 -> 0.545


### Train 15 more epochs

In [103]:
model_data = torch.load(f"{dataset_dir}/full/checkpoint.resnet152.2025.01.28.pth", weights_only=False)

In [102]:
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/full/val', 32)
print(f"device: {model_data['device']}")
print(f"num_classes: {model_data['num_classes']}")
print(f"num_features: {model_data['num_features']}")

device: cuda:0
num_classes: 3051
num_features: 2048


In [106]:
train(model_data, 15, f"{dataset_dir}/full/checkpoint_latest.pth", phases=['train'])
shutil.copy(f"{dataset_dir}/full/checkpoint_latest.pth", f"{dataset_dir}/full/checkpoint_{int(time.time())}.pth")

Epoch    1 /   15  | Train Loss: 0.0171 Acc: 0.9960  | Elapsed time: 0:10:36.996702
Epoch    2 /   15  | Train Loss: 0.0150 Acc: 0.9965  | Elapsed time: 0:21:21.279913
Epoch    3 /   15  | Train Loss: 0.0120 Acc: 0.9973  | Elapsed time: 0:32:23.928651
Epoch    4 /   15  | Train Loss: 0.0112 Acc: 0.9975  | Elapsed time: 0:43:16.842611
Epoch    5 /   15  | Train Loss: 0.0072 Acc: 0.9987  | Elapsed time: 0:54:10.058172
Epoch    6 /   15  | Train Loss: 0.0071 Acc: 0.9986  | Elapsed time: 1:05:04.588430
Epoch    7 /   15  | Train Loss: 0.0064 Acc: 0.9986  | Elapsed time: 1:16:00.932254
Epoch    8 /   15  | Train Loss: 0.0060 Acc: 0.9988  | Elapsed time: 1:26:53.555932
Epoch    9 /   15  | Train Loss: 0.0054 Acc: 0.9990  | Elapsed time: 1:37:44.153952
Epoch   10 /   15  | Train Loss: 0.0049 Acc: 0.9991  | Elapsed time: 1:48:35.034925
Epoch   11 /   15  | Train Loss: 0.0051 Acc: 0.9989  | Elapsed time: 1:59:25.057349
Epoch   12 /   15  | Train Loss: 0.0048 Acc: 0.9991  | Elapsed time: 2:10:13

'insect-dataset/moth/full/checkpoint_1738074381.pth'

In [159]:
test_top_k(model_data, f"{dataset_dir}/my-test", 4)

artena-dotata-2          : artena-dotata(1.000)  simplicia-spp(0.000)  herminiinae-genera-spp(0.000)  achaea-serva(0.000)  
artena-dotata            : artena-dotata(1.000)  bastilla-praetermissa(0.000)  simplicia-bimarginata(0.000)  artena-submira(0.000)  
artena-submira-2         : artena-submira(0.570)  poaphilini-genera-spp(0.175)  achaea-janata(0.126)  buzara-onelia(0.043)  
artena-submira           : artena-submira(0.332)  thyas-coronata(0.320)  artena-dotata(0.263)  episparis-tortuosalis(0.064)  
clanis-phalaris-2        : clanis-phalaris(0.963)  acosmeryx-pseudonaga(0.021)  marumba-irata(0.006)  clanidopsis-exusta(0.004)  
clanis-phalaris          : theretra-clotho(0.587)  clanis-bilineata(0.335)  hippotion-rosetta(0.016)  theretra-pallicosta(0.015)  
clanis-undulosa          : ambulyx-belli(0.478)  clanis-titan(0.356)  clanis-undulosa(0.078)  gangarides-vittipalpis(0.029)  
conogethes-sahyadriensis : conogethes-spp(0.982)  argina-astrea(0.012)  conogethes-punctiferalis(0.003)  

In [158]:
test_top_k(model_data, f"{dataset_dir}/my-test", 10, print_preds=False)

Top 10 accuracy: 17 / 22 -> 0.773
Top 1 accuracy: 13 / 22 -> 0.591


In [116]:
test(model_data, f"{dataset_dir}/data", print_failures=False)

Accuracy: 44296 / 44325 -> 99.93%
Elapsed time: 0:21:41.783535


# Problems Identified
#### Fixing class_to_idx for val dataset in prepare_for_retraining & init_model_for_training
causing to print accuracy as 0% in val phase during train

In [157]:
model_data = torch.load(f"{dataset_dir}/full/checkpoint_1738074381.pth", weights_only=False)
model_data = prepare_for_retraining(model_data, f'{dataset_dir}/data', f'{dataset_dir}/full/val', 32)
print(f"device: {model_data['device']}")
print(f"num_classes: {model_data['num_classes']}")
print(f"num_features: {model_data['num_features']}")
train(model_data, 1, f"{dataset_dir}/full/checkpoint_latest.pth", phases=['train', 'val'])

device: cuda:0
num_classes: 3051
num_features: 2048
Epoch    1 /    1  | Train Loss: 0.0049 Acc: 0.9990  | Val Loss: 0.0020 Acc: 0.9992  | Elapsed time: 0:11:42.693817


# Retest with new data

In [2]:
import mynnlib
from mynnlib import *
model_data = torch.load(f"{dataset_dir}/checkpoint.2025.01.28.resnet152.mothsofindia.raw.25x70%+10x100%.pth", weights_only=False)

In [3]:
test_top_k(model_data, f"{dataset_dir}/my-test", 3)
test_top_k(model_data, f"{dataset_dir}/my-test", 5, print_preds=False, print_top1_accuracy=False)
test_top_k(model_data, f"{dataset_dir}/my-test", 10, print_preds=False, print_top1_accuracy=False)

acidon-nigrobasis             : metanastria-spp(0.980)  pyralidae-genera-spp(0.009)  thosea-spp(0.004)  
acosmeryx-anceus              : acosmeryx-anceus(0.709)  macroglossum-spp(0.218)  macroglossum-nycteris(0.030)  
Adoxophyes-privatana          : adoxophyes-spp(0.933)  selepa-spp(0.029)  hyblaea-puera-complex(0.023)  
agnidra-vinacea               : agnidra-vinacea(0.999)  eupterote-spp(0.001)  oreta-sanguinea(0.000)  
alcanola-speideli             : alcanola-tympanistis(0.985)  nolinae-genera-spp(0.014)  nola-internella-analis-complex(0.000)  
alcanola-spp-2                : alcanola-tympanistis(0.985)  nolinae-genera-spp(0.014)  nola-internella-analis-complex(0.000)  
alcanola-spp                  : alcanola-speideli(0.513)  maliattha-spp(0.219)  nolinae-genera-spp(0.086)  
alcanola-tympanistis-2        : nolinae-genera-spp(0.480)  heterothera-spp(0.206)  alcanola-speideli(0.068)  
alcanola-tympanistis          : ptisciana-seminivea(0.898)  agrius-convolvuli(0.067)  alcanola-speid