# Inport needed package

- import os, sys # to add the parent directory to the path

In [1]:
import os
import sys
import time

- Using torchvision to create a dataset

In [2]:
# Using torchvision to create a dataset
import cv2
from torchvision import transforms
import torch
from torch.utils.data import random_split, DataLoader
import torchvision

import pandas as pd

- import self library

In [3]:
from train.trainer import ClassifierTrainer as Trainer
import dataset as ds  # type: ignore
import models as md  # type: ignore

# Define classification train process

1. Define place where the model is saved

In [4]:
time_str = time.strftime("%Y%m%d_%H%M%S")


2. Define train function

In [5]:
def doTheTrain(dataset, model):
  # define batch_size
  batch_size = 64

  # init train val test ds
  train_val_size = int(0.8 * len(dataset))
  test_size = len(dataset) - train_val_size
  train_ds, test_ds = random_split(dataset, [train_val_size, test_size])

  # define optimizer using Adam and loss function
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
  loss_fn = torch.nn.CrossEntropyLoss()

  trainer = Trainer(model, optimizer, loss_fn, random_seed_value=86)
  print('device: ', trainer.device)
  avg_loss, metric = trainer.cross_validate(train_ds, k=10, epochs=10, batch_size=batch_size)
  print('avg_loss: ', avg_loss)

  # score model
  test_dataloader = DataLoader(test_ds, batch_size=batch_size, shuffle=True)
  model_scored = trainer.score(test_dataloader)
  print(f'model_scored: {model_scored:.4f}, avg_accuracy: {100*(1 - model_scored):.4f}')

  # return model scored, train_avg_lost
  return model_scored, avg_loss

3. execute progress

- define the model

In [6]:
models = [
    torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT),
    torchvision.models.densenet121(weights=torchvision.models.DenseNet121_Weights.DEFAULT),
    torchvision.models.vgg16(weights=torchvision.models.VGG16_Weights.DEFAULT)
]

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\nnvuf/.cache\torch\hub\checkpoints\vgg16-397923af.pth
100%|██████████| 528M/528M [00:55<00:00, 9.92MB/s] 


- Define tested datasets

In [7]:
datasets = {
    'gi4e_full': ds.Gi4eDataset(
        './datasets/gi4e',
        transform=transforms.Compose([transforms.ToPILImage(), transforms.Resize((224, 224)), transforms.ToTensor()]),
        is_classification=True),
    'gi4e_raw_eyes': ds.ImageDataset(
        './datasets/gi4e_raw_eyes',
        transform=transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()]),
        file_extension='png'),
    'gi4e_detected_eyes': ds.ImageDataset(
        './datasets/gi4e_eyes/20250521_200316',
        transform=transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()]),
        file_extension='png'),
}

- Train all defined model on each registered datasets

In [None]:
result_df = pd.DataFrame(columns=['dataset', 'model', 'avg_loss', 'avg_accuracy', 'total_time'])

for name, dataset in datasets.items():
	for model in models:
		print(f'Running {name} dataset with {model.__class__.__name__}')
		# do the train
		start_time = time.time()
		scored, loss = doTheTrain(dataset, model)
		end_time = time.time()
		total_time = end_time - start_time
		print(f'Finished {name} dataset with {model.__class__.__name__}')
		print('----------------------')

		# save the result
		result_df = pd.concat([result_df, pd.DataFrame({
			'model': [model.__class__.__name__],
			'dataset': [name],
			'avg_loss': [loss],
			'avg_accuracy': [scored],
			'total_time': [total_time]
		})], ignore_index=True)

print('Finished all datasets')

# print the result
print(result_df)



Running gi4e_full dataset with ResNet
device:  cuda
Fold 1/10:


4. print the result

In [None]:
# swap the first two columns
result_df = result_df[['dataset', 'model', 'avg_loss', 'avg_accuracy', 'total_time']]
# scale the avg_accuracy to 0-100
result_df['avg_accuracy'] = 100 * (1 - result_df['avg_accuracy'])
# display the total time in the format HH:MM:SS
result_df['total_time'] = pd.to_timedelta(result_df['total_time'], unit='s')

# save the result to csv
result_df.to_csv(f'results_{time_str}.csv', index=False)
# print the result
print(result_df)

KeyError: "['total_time'] not in index"

In [None]:
print(result_df)

              dataset     model  avg_loss  avg_accuracy
0           gi4e_full    ResNet  0.000008     99.999914
1           gi4e_full  DenseNet  0.000028     99.990581
2           gi4e_full       VGG  0.017698     99.825569
3       gi4e_raw_eyes    ResNet  0.042010     99.709066
4       gi4e_raw_eyes  DenseNet  0.005678     99.373679
5       gi4e_raw_eyes       VGG  0.147762     86.098641
6  gi4e_detected_eyes    ResNet  0.018778     99.837420
7  gi4e_detected_eyes  DenseNet  0.013462     99.948888
8  gi4e_detected_eyes       VGG  0.017937     79.856581


# Embedded Classification

- Define the model

In [None]:
embedded_models = [
    md.embedded_resnet50(),
    md.embedded_densenet121(),
    md.embedded_vgg16()
]

- Train all defined model on each registered datasets

In [None]:
result_df = pd.DataFrame(columns=['model', 'avg_loss', 'avg_accuracy', 'total_time'])

for model in embedded_models:
    print(f'Running embedded model {model.__class__.__name__}')
    # do the train
    start_time = time.time()
    scored, loss = doTheTrain(datasets['gi4e_full'], model)
    end_time = time.time()
    total_time = end_time - start_time
    print(f'Finished embedded model {model.__class__.__name__}')
    print('----------------------')

    # save the result
    result_df = pd.concat([result_df, pd.DataFrame({
        'model': [model.__class__.__name__],
        'avg_loss': [loss],
        'avg_accuracy': [scored],
        'total_time': [total_time]
    })], ignore_index=True)
    
print('Finished all embedded models')

- Print the result

In [None]:
# swap the first two columns
result_df = result_df[['dataset', 'model', 'avg_loss', 'avg_accuracy', 'total_time']]
# scale the avg_accuracy to 0-100
result_df['avg_accuracy'] = 100 * (1 - result_df['avg_accuracy'])
# display the total time in the format HH:MM:SS
result_df['total_time'] = pd.to_timedelta(result_df['total_time'], unit='s')

# save the result to csv
result_df.to_csv(f'results_{time_str}.csv', index=False)
# print the result
print(result_df)