In [1]:
# Standard library imports
import os
import json
import pandas as pd
import timm
import matplotlib.pyplot as plt
from fastai.vision.all import *
from fastai.callback.core import Callback
import torch

In [None]:
import timm

# List all available models in timm
# Check CVT model
available_models = timm.list_models()
print(available_models)

In [None]:
import os
import pandas as pd

# Define the path to your CSV file and training images
csv_path = os.path.join('COMP90086_2024_Project_train', 'train.csv')
train_dir = os.path.join('COMP90086_2024_Project_train', 'train')

# Read the CSV file into a DataFrame
train_data = pd.read_csv(csv_path)

# Add a column with the complete image path
train_data['image_path'] = train_data['id'].apply(lambda x: os.path.join(train_dir, f"{x}.jpg"))

# Ensure that the 'stable_height' column is treated as a string for classification
train_data['stable_height'] = train_data['stable_height'].astype(str)

In [None]:
"""
goal: to have 65+ accuracy and good visiualisations for report
maybe try 2 techniques /shrug



VisionTransformerTrainer / model
    model name
    self.lr = None
    self.metrics = {}
    self.dls = dls
    
dataload - todo Jule
    split into training and validation
    data loader build
        inceptionv4 uses 299 299

testcase
    - VisionTransformerTrainer /model
    - dataloader



        
 https://docs.fast.ai/tutorial.vision.html   

need
    data
        model name
        model class
        Learning rate
        metrics
            Error rate
            epoc
            training loss
            validation loss
            accuracy
    functions
        auto learning rate Jack
        save results in dataframe for easy loading Jack
        post trained model weight Jule
            save model every 5 epoc
        auto test with different techniques
            data augmentation
            active learning????
            Progressive Training (cross that bridge when we get to it)
                number of bricks can i train them on both?
                stability can i train them on both?
                stabel height
            
        load visiualisaion from json data

"""

In [None]:
# Dataloader Objects

# Dataloader for vision transformers
# Resizes to 224
transformer_db = DataBlock(
    blocks=(ImageBlock, CategoryBlock),  # Define the input and output types
    get_x=ColReader('image_path'),       # Read the image paths
    get_y=ColReader('stable_height'),    # Read the target labels
    splitter=RandomSplitter(valid_pct=0.2),  # Split into training and validation sets
    item_tfms=Resize(224),               # Resize images to 224x224 (required by ViT)
    batch_tfms=aug_transforms()          # Apply data augmentation
)

transformer_dl = transformer_db.dataloaders(train_data, bs=8, num_workers=0)

In [None]:
## Class for utilizing the Callback function from fastai
# Define the callback within the function
class SaveEvery5EpochsCallback(Callback):
    def __init__(self, model_name):
        self.model_name = model_name
    
    def after_epoch(self):
        # Check if the current epoch is a multiple of 5
        if (self.epoch + 1) % 5 == 0:
            self.learn.save(f"{self.model_name}_epoch_{self.epoch + 1}")
            print(f"Model saved at epoch {self.epoch + 1} as {self.model_name}_epoch_{self.epoch + 1}")


In [2]:
import json
from typing import Tuple

class VisionTransformerTrainer:
    def __init__(self, model_name='vit_base_patch16_224', dls=None):
        self.model_name = model_name
        self.learn = None
        self.lr = None
        self.metrics = {}
        self.dls = dls

        if self.dls is None:
            raise Exception("No dataloader provided. Please provide a dataloader when initializing the class.")
        else:
            print("Using provided dataloader.")

    def initialize_model(self, pretrained=True):
        self.model = timm.create_model(self.model_name, pretrained=pretrained, num_classes=self.dls.c)
        self.learn = vision_learner(self.dls, self.model_name, metrics=accuracy, pretrained=pretrained,cbs=CSVLogger())
        
        # Check if CUDA is available
        if torch.cuda.is_available():
            self.learn.model = self.learn.model.cuda()
            print("CUDA is available. Using GPU for training.")
        else:
            print("CUDA is not available. Using CPU for training.")

    def set_learning_rate(self):
        """
        Sets the learning rate manually.
        
        Args:
            learning_rate (float): The learning rate to use for training.
        """
        def find_optimal_lr(learner) -> float:
            lr_learn: Tuple[float, float] = learner.lr_find(suggest_funcs=(minimum, steep))
            lr_min, lr_steep = lr_learn
            optimal: float = lr_steep / 5
            return optimal
        

        self.lr = find_optimal_lr(self.learn)
        print(f"Learning rate set to: {self.lr}")

    def fine_tune(self, epochs=5):
        """
        
        """
        if self.learn is None:
            raise Exception("Model has not been initialized. Please run initialize_model() first.")
        if self.lr is None:
            raise Exception("Learning rate not set. Please set it using set_learning_rate() first.")
        
        #This saves every 5 epochs automatically during fine-tuning
        self.learn.add_cb(SaveEvery5EpochsCallback(model_name=self.model_name.replace('/', '_')))
        
        self.learn.fine_tune(epochs, base_lr=self.lr)
        
        torch.cuda.empty_cache()

    def load_model(self, model_name=None):
        if self.learn is None:
            raise Exception("Model has not been initialized. Please run initialize_model() first.")
        
        if model_name is None:
            model_name = self.model_name.replace('/', '_')
        self.learn.load(model_name)
        print(f"Model loaded from {model_name}")

    def predict(self, image_path):
        if self.learn is None:
            raise Exception("Model has not been initialized. Please run initialize_model() first.")
        
        img = PILImage.create(image_path)
        pred, pred_idx, probs = self.learn.predict(img)
        return {'id': os.path.basename(image_path).split('.')[0], 'predicted_stable_height': pred, 'probability': probs[pred_idx].item()}

    def plot_metrics(self):
        pass

    def get_final_accuracy(self):
        """
        I theoretically could store it in a callback after each epoch, but would take 20min + to code
        """

        return self.learn.recorder.metrics[0].value.item()
    
    def get_training_metrics(self) -> pd.DataFrame:
        """
        Get training metrics from CSV log and save to self.
        
        Returns:
            pd.DataFrame: DataFrame containing training metrics
        """
        df: pd.DataFrame = self.learn.csv_logger.read_log()
        self.training_metrics: pd.DataFrame = df
        self.final_accuracy = final_accuracy = df['accuracy'].iloc[-1]
        self.final_epoch = final_epoch = df['epoch'].iloc[-1]
        return df
        
    def save_metrics(self):
        df = self.get_training_metrics()
        df['model_name'] = self.model_name
        final_accuracy = self.final_accuracy
        final_epoch = self.final_epoch
        total_time = pd.to_timedelta(df['time'].apply(lambda x: f'00:{x}')).sum()
        
        file_name = f"acc{final_accuracy:.3f}_epo{final_epoch:02d}_tim{total_time.total_seconds():.0f}s_{self.model_name}.csv"
        file_path = os.path.join('metrics', file_name)
        
        os.makedirs('metrics', exist_ok=True)
        df.to_csv(file_path, index=False)
        
        print(f"Metrics saved to {file_path}")
        


In [None]:
"""
potential models to test
'efficientnet_b0'
mobilenetv4_conv_medium
mobilenetv4_hybrid_medium
segementation

ese_vovnet19b_slim


"""

In [3]:
# List of Vision Transformer models to test from timm
model_names = [  
    'vit_base_patch16_224',     # Vanilla ViT model  
    'beit_base_patch16_224',    # BEiT model
    'cvt-21-224x224',           # CvT model
    'deit_base_patch16_224'     # DeiT model
]



# Paths to your dataset
csv_path = 'COMP90086_2024_Project_train/train.csv'
train_dir = 'COMP90086_2024_Project_train/train'

# Define the learning rate to be used for all models
learning_rate = 3e-4  # You can adjust this value as needed

# Loop through each Vision Transformer model and train
for model_name in model_names:
    print(f"\nTraining with model: {model_name}")
    
    try:
        # Initialize the Vision Transformer Trainer for the current model
        trainer = VisionTransformerTrainer(csv_path, train_dir, model_name=model_name, image_size=224, batch_size=8)
        
        # Initialize the model
        trainer.initialize_model(pretrained=True)
        
        # Set the learning rate manually
        trainer.set_learning_rate(learning_rate=learning_rate)
        
        # Fine-tune the model -  Automatically saves after 5 epochs
        trainer.fine_tune(epochs=5)  # Adjust the number of epochs as needed
        
        # Save the training metrics to a JSON file
        trainer.save_metrics(file_path=f'{model_name}_training_metrics.json')
        
        # Plot the training metrics
        trainer.plot_metrics()
        
    except Exception as e:
        print(f"An error occurred while training model {model_name}: {e}")



Training with model: vit_base_patch16_224
CUDA is available. Using GPU for training.
Learning rate set to: 0.0003


epoch,train_loss,valid_loss,accuracy,time
0,2.539186,1.94039,0.239583,03:53


  x = F.scaled_dot_product_attention(


epoch,train_loss,valid_loss,accuracy,time
0,2.216609,1.711342,0.269531,05:31
1,2.043823,1.628476,0.302734,04:53
2,1.913306,1.516157,0.355469,03:45
3,1.854125,1.474757,0.361979,03:54
4,1.74471,1.463529,0.36849,03:58


Training complete.
Model saved as vit_base_patch16_224
An error occurred while training model vit_base_patch16_224: 'float' object is not iterable

Training with model: beit_base_patch16_224
CUDA is available. Using GPU for training.
Learning rate set to: 0.0003


epoch,train_loss,valid_loss,accuracy,time
0,2.433183,1.788025,0.260417,03:16


epoch,train_loss,valid_loss,accuracy,time
0,2.213568,1.694043,0.295573,05:30
1,1.998419,1.575691,0.327474,04:39
2,1.937005,1.487411,0.342448,06:02
3,1.810161,1.455377,0.353516,05:56
4,1.757939,1.450001,0.354818,05:20


Training complete.
Model saved as beit_base_patch16_224
An error occurred while training model beit_base_patch16_224: 'float' object is not iterable

Training with model: cvt-21-224x224
An error occurred while training model cvt-21-224x224: Unknown model (cvt-21-224x224)

Training with model: deit_base_patch16_224
CUDA is available. Using GPU for training.
Learning rate set to: 0.0003


epoch,train_loss,valid_loss,accuracy,time
0,2.519091,1.854182,0.245443,03:00


epoch,train_loss,valid_loss,accuracy,time
0,2.385565,1.725879,0.27474,17:01
1,2.079112,1.706737,0.279297,13:00
2,1.908475,1.673879,0.289062,05:35
3,1.932471,1.615718,0.28776,05:34
4,1.896061,1.620193,0.289062,05:41


Training complete.
Model saved as deit_base_patch16_224
An error occurred while training model deit_base_patch16_224: 'float' object is not iterable
