# Inport needed package

- import os, sys # to add the parent directory to the path

In [1]:
import os
import sys
import time

- Using torchvision to create a dataset

In [2]:
# Using torchvision to create a dataset
import cv2
from torchvision import transforms
import torch
from torch.utils.data import random_split, DataLoader
import torchvision

import pandas as pd

- import self library

In [3]:
from train.trainer import ClassifierTrainer as Trainer
import dataset as ds  # type: ignore
import model as md  # type: ignore

# Define classification train process

1. Define place where the model is saved

In [4]:
time_str = time.strftime("%Y%m%d_%H%M%S")


2. Define train function

In [5]:
def doTheTrain(dataset, model):
  # define batch_size
  batch_size = 64

  # init train val test ds
  train_val_size = int(0.8 * len(dataset))
  test_size = len(dataset) - train_val_size
  train_ds, test_ds = random_split(dataset, [train_val_size, test_size])

  # define optimizer using Adam and loss function
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
  loss_fn = torch.nn.CrossEntropyLoss()

  trainer = Trainer(model, optimizer, loss_fn, random_seed_value=86)
  print('device: ', trainer.device)
  avg_loss, metric = trainer.cross_validate(train_ds, k=5, epochs=10, batch_size=batch_size)
  print('avg_loss: ', avg_loss)

  # score model
  test_dataloader = DataLoader(test_ds, batch_size=batch_size, shuffle=True)
  model_scored = trainer.score(test_dataloader)
  print(f'model_scored: {model_scored:.4f}, avg_accuracy: {100*(1 - model_scored):.4f}')

  # return model scored, train_avg_lost
  return model_scored, avg_loss

3. execute progress

- define the model

In [6]:
models = [
    md.embedded_resnet50(103),
    md.embedded_densenet121(103),
    md.embedded_vgg16(103)
]



- Define tested datasets

In [7]:
datasets = {
    'gi4e_full': ds.Gi4eDataset(
        './datasets/gi4e',
        transform=transforms.Compose([transforms.ToPILImage(), transforms.Resize((224, 224)), transforms.ToTensor()]),
        is_classification=True),
    'gi4e_raw_eyes': ds.ImageDataset(
        './datasets/gi4e_raw_eyes',
        transform=transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()]),
        file_extension='png'),
    'gi4e_detected_eyes': ds.ImageDataset(
        './datasets/gi4e_eyes/20250521_200316',
        transform=transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()]),
        file_extension='png'),
}

- Train all defined model on each registered datasets

In [8]:
result_df = pd.DataFrame(columns=['dataset', 'model', 'avg_loss', 'avg_accuracy', 'total_time'])

for name, dataset in datasets.items():
	for model in models:
		print(f'Running {name} dataset with {model.__class__.__name__}')
		# do the train
		start_time = time.time()
		scored, loss = doTheTrain(dataset, model)
		end_time = time.time()
		total_time = end_time - start_time
		print(f'Finished {name} dataset with {model.__class__.__name__}')
		print('----------------------')

		# save the result
		result_df = pd.concat([result_df, pd.DataFrame({
			'model': [model.__class__.__name__],
			'dataset': [name],
			'avg_loss': [loss],
			'avg_accuracy': [scored],
			'total_time': [total_time]
		})], ignore_index=True)

print('Finished all datasets')

# print the result
print(result_df)



Running gi4e_full dataset with Classifier
device:  cuda
Fold 1/5:


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


4. print the result

In [None]:
# swap the first two columns
result_df = result_df[['dataset', 'model', 'avg_loss', 'avg_accuracy', 'total_time']]
# scale the avg_accuracy to 0-100
result_df['avg_accuracy'] = 100 * (1 - result_df['avg_accuracy'])
# display the total time in the format HH:MM:SS
result_df['total_time'] = pd.to_timedelta(result_df['total_time'], unit='s')

# save the result to csv
result_df.to_csv(f'results_{time_str}.csv', index=False)
# print the result
print(result_df)

              dataset     model  avg_loss  avg_accuracy  \
0           gi4e_full    ResNet  0.000011     99.981028   
1           gi4e_full  DenseNet  0.000229     99.993595   
2           gi4e_full       VGG  0.059837     99.096932   
3       gi4e_raw_eyes    ResNet  0.004592     94.986418   
4       gi4e_raw_eyes  DenseNet  0.005347     95.743190   
5       gi4e_raw_eyes       VGG  0.119210     78.677075   
6  gi4e_detected_eyes    ResNet  0.122154     94.245342   
7  gi4e_detected_eyes  DenseNet  0.003370     97.697311   
8  gi4e_detected_eyes       VGG  0.031279     93.412539   

                 total_time  
0 0 days 00:26:04.910116911  
1 0 days 00:15:41.770978928  
2 0 days 00:19:16.193440437  
3 0 days 00:12:16.112033606  
4 0 days 00:10:20.350670099  
5 0 days 00:15:50.633701324  
6 0 days 00:09:34.279892683  
7 0 days 00:10:33.354367256  
8 0 days 00:16:07.067535877  


In [None]:
print(result_df)

              dataset     model  avg_loss  avg_accuracy  \
0           gi4e_full    ResNet  0.000011     99.981028   
1           gi4e_full  DenseNet  0.000229     99.993595   
2           gi4e_full       VGG  0.059837     99.096932   
3       gi4e_raw_eyes    ResNet  0.004592     94.986418   
4       gi4e_raw_eyes  DenseNet  0.005347     95.743190   
5       gi4e_raw_eyes       VGG  0.119210     78.677075   
6  gi4e_detected_eyes    ResNet  0.122154     94.245342   
7  gi4e_detected_eyes  DenseNet  0.003370     97.697311   
8  gi4e_detected_eyes       VGG  0.031279     93.412539   

                 total_time  
0 0 days 00:26:04.910116911  
1 0 days 00:15:41.770978928  
2 0 days 00:19:16.193440437  
3 0 days 00:12:16.112033606  
4 0 days 00:10:20.350670099  
5 0 days 00:15:50.633701324  
6 0 days 00:09:34.279892683  
7 0 days 00:10:33.354367256  
8 0 days 00:16:07.067535877  


# Embedded Classification

- Define the model

In [None]:
embedded_models = [
    md.embedded_resnet50(103),
    md.embedded_densenet121(103),
    md.embedded_vgg16(103)
]



- Train all defined model on each registered datasets

In [None]:
result_df = pd.DataFrame(columns=['model', 'avg_loss', 'avg_accuracy', 'total_time'])

for name, dataset in datasets.items():
	for model in embedded_models:
		print(f'Running {name} dataset with {model.__class__.__name__}')
		# do the train
		start_time = time.time()
		scored, loss = doTheTrain(dataset, model)
		end_time = time.time()
		total_time = end_time - start_time
		print(f'Finished {name} dataset with {model.__class__.__name__}')
		print('----------------------')

		# save the result
		result_df = pd.concat([result_df, pd.DataFrame({
			'model': [model.__class__.__name__],
			'dataset': [name],
			'avg_loss': [loss],
			'avg_accuracy': [scored],
			'total_time': [total_time]
		})], ignore_index=True)


Running gi4e_full dataset with Classifier


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


- Print the result

In [None]:
# swap the first two columns
result_df = result_df[['dataset', 'model', 'avg_loss', 'avg_accuracy', 'total_time']]
# scale the avg_accuracy to 0-100
result_df['avg_accuracy'] = 100 * (1 - result_df['avg_accuracy'])
# display the total time in the format HH:MM:SS
result_df['total_time'] = pd.to_timedelta(result_df['total_time'], unit='s')

# save the result to csv
result_df.to_csv(f'results_{time_str}.csv', index=False)
# print the result
print(result_df)