<a href="https://colab.research.google.com/github/justpr09rammer/BankingProject/blob/main/koduum_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torchmetrics

In [None]:
import os

# Create the .kaggle directory if it doesn't exist
!mkdir -p ~/.kaggle

# Move kaggle.json to the .kaggle directory
!mv kaggle.json ~/.kaggle/

# Set permissions for the kaggle.json file
!chmod 600 ~/.kaggle/kaggle.json

print("Kaggle API credentials set up successfully!")

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("engeddy/astrophysical-objects-image-dataset")

print("Path to dataset files:", path)

After defining our custom dataset, we need a `DataLoader` to handle batching, shuffling, and multi-threaded data loading. This makes the data loading process much more efficient, especially for large datasets.

In [None]:
import torch
import torch.nn
from PIL import Image
from torchvision import transforms
import os
import numpy as np
from torch.utils.data import Dataset



class custom_dataset(Dataset):
    def __init__(self, mode = "train", root = "datasets/demo_dataset", transforms = None):
        super().__init__()
        self.mode = mode
        self.root = root
        self.transforms = transforms

        #select split
        self.folder = os.path.join(self.root, self.mode)

        #initialize lists
        self.image_list = []
        self.label_list = []

        #save class lists
        self.class_list = os.listdir(self.folder)
        self.class_list.sort()

        for class_id in range(len(self.class_list)):
            for image in os.listdir(os.path.join(self.folder, self.class_list[class_id])):
                self.image_list.append(os.path.join(self.folder, self.class_list[class_id], image))
                label = np.zeros(len(self.class_list))
                label[class_id] = 1.0
                self.label_list.append(label)

    def __getitem__(self, index):
        image_name = self.image_list[index]
        label = self.label_list[index]


        image = Image.open(image_name)
        if(self.transforms):
            image = self.transforms(image)

        label = torch.tensor(label)

        return image, label

    def __len__(self):
        return len(self.image_list)

In [None]:
import torchvision.transforms as transforms
import os # Import os to use os.listdir or os.path.join effectively

# Define image transformations for the TRAINING set (with data augmentation)
train_transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert('RGB')), # Ensure 3 channels
    transforms.RandomResizedCrop(224), # Randomly crop and resize to 224x224
    transforms.RandomHorizontalFlip(), # Randomly flip the image horizontally
    transforms.RandomRotation(15),     # Randomly rotate the image by a small degree
    transforms.ToTensor(),             # Convert PIL Image to PyTorch Tensor
    transforms.Lambda(lambda x: torch.cat([x, x, x], dim=0) if x.shape[0] == 1 else x), # Ensure 3 channels by concatenating if grayscale
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize image pixel values
])

# --- Corrected section ---
# The `ls` command showed that the actual data is inside 'astro_dataset_maxia' subdirectory.
# Further inspection revealed a double nesting.
# Adjusting the root path to point to the correct deepest directory containing 'test', 'training' and 'validation'.
actual_data_root = os.path.join(path, 'astro_dataset_maxia', 'astro_dataset_maxia')

# Instantiate the custom dataset for the 'training' split (corrected from 'train')
train_dataset = custom_dataset(mode="training", root=actual_data_root, transforms=train_transform)
# --- End of corrected section ---

# Print the number of samples in the training dataset
print(f"Number of samples in training dataset: {len(train_dataset)}")

# Get one sample to verify (image and label)
image, label = train_dataset[0]

print(f"\nShape of the first image: {image.shape}")
print(f"Label of the first image (one-hot encoded): {label}")

In [None]:
from torch.utils.data import DataLoader

# Define batch size
batch_size = 32

# Create a DataLoader for the training dataset
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0) # Changed num_workers to 0 for debugging

print(f"\nDataLoader created with batch size: {batch_size}")

# Iterate through a single batch to demonstrate
for images, labels in train_loader:
    print(f"Shape of images in one batch: {images.shape}")
    print(f"Shape of labels in one batch: {labels.shape}")
    break # Just show one batch

In [None]:
# Get the number of classes from the instantiated dataset
num_classes = len(train_dataset.class_list)
print(f"Number of classes in the dataset: {num_classes}")

Here is the definition for the `ExModel` using a ResNet18 backbone, which we previously updated to accept `num_classes`.

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models


class ExModel(nn.Module):

    def __init__(self, num_classes):
        super().__init__()

        self.resnet18 = models.resnet18(pretrained=True)
        self.resnet18 = torch.nn.Sequential(*(list(self.resnet18.children())[:-1]))

        self.classifier = torch.nn.Linear(512, num_classes)


    def forward(self, image):
        resnet_features = self.resnet18(image).squeeze(-1).squeeze(-1)
        out = self.classifier(resnet_features)

        return out

print("ExModel (ResNet18 backbone) class defined.")

Next, let's define the `VGG16Model` class. Similar to `ExModel`, it will use a pre-trained VGG16 backbone and replace its final classification layer.

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

class VGG16Model(nn.Module):

    def __init__(self, num_classes):
        super().__init__()

        self.vgg16 = models.vgg16(pretrained=True)

        # Freeze all parameters in the feature extractor (convolutional layers)
        # This is common for transfer learning to prevent fine-tuning pre-trained features too aggressively
        for param in self.vgg16.features.parameters():
            param.requires_grad = False

        # Get the number of input features for the last classification layer
        # The default VGG16 classifier has a last Linear layer at index 6
        num_ftrs = self.vgg16.classifier[6].in_features

        # Replace the last classification layer with a new one adapted to our num_classes
        self.vgg16.classifier[6] = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        # The VGG16 model already handles the forward pass through features and classifier
        return self.vgg16(x)

print("VGG16Model class defined.")

# Example of how to instantiate VGG16Model (you can uncomment and run this separately if needed):
# vgg_model = VGG16Model(num_classes=num_classes)
# print("VGG16 Model instantiated:")
# print(vgg_model)

Now, I'll update the `ExModel` class to use the correct `num_classes` in its classifier layer.

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models # Changed model to models for clarity and consistency


class ExModel(nn.Module):

    def __init__(self, num_classes):
        super().__init__()

        # Use torchvision.models for ResNet18
        self.resnet18 = models.resnet18(pretrained=True) # Set pretrained=True to use pre-trained weights

        # Remove the original fully connected layer
        # The last layer of ResNet18 is typically a `nn.Linear` layer (fc) or `AdaptiveAvgPool2d` followed by `fc`
        # We want to extract features before the final classification head.
        # For resnet18, `resnet18.fc` is the final linear layer.
        self.resnet18 = torch.nn.Sequential(*(list(self.resnet18.children())[:-1]))

        # Determine the input features for the new classifier
        # ResNet18's average pooling output is 512 features
        # If using `squeeze()` after `resnet_pred = self.resnet18(image)`, the output shape will be `(batch_size, 512)`
        # For the provided `resnet_pred = self.resnet18(image).squeeze()`, the input features should be 512.
        # If the original classifier was `torch.nn.Linear(1024, 1000)`, it implies a different backbone or a misunderstanding of `resnet_pred` output.
        # Let's assume the user's original `resnet18` extraction logic `torch.nn.Sequential(*(list(self.resnet18.children())[:-1]))`
        # effectively gives a feature vector that, when squeezed, has a dimension that was previously thought to be 1024.
        # However, a standard ResNet18 after removing the final fc layer and applying global average pooling produces 512 features.
        # I'll stick to the user's `1024` for now based on the template, but this might need adjustment if `squeeze()` changes it.

        # Correcting the input features to the classifier based on typical ResNet18 feature extractor output
        # After `self.resnet18(image)`, and if `squeeze()` is applied, it will be 512 features for ResNet18.
        # The user's template had `torch.nn.Linear(1024, 1000)`. This 1024 might be from a different model or an error.
        # A typical ResNet18 without the final FC layer, followed by AdaptiveAvgPool2d, outputs 512 features.
        # Let's re-evaluate the output of `self.resnet18(image).squeeze()` in the forward pass.
        # A common practice is to replace the `fc` layer directly.

        # Let's adjust based on the standard ResNet18 feature extraction output of 512 features.
        # If the user explicitly provided 1024, there might be a custom ResNet or a different understanding.
        # For a standard ResNet18 without its final FC layer, the output is 512 features.
        # I will assume `1024` in the user's template was a placeholder and will use `512` as it's standard for ResNet18 feature extractors.
        self.classifier = torch.nn.Linear(512, num_classes)


    def forward(self, image):
        # Get features from ResNet18 backbone
        # The output of `self.resnet18` (which is `nn.Sequential` excluding the original FC layer)
        # will have shape `(batch_size, 512, 1, 1)` after global average pooling.
        # `squeeze()` will reduce this to `(batch_size, 512)`
        resnet_features = self.resnet18(image).squeeze(-1).squeeze(-1) # Squeeze both last dimensions (1, 1)

        out = self.classifier(resnet_features)

        return out

In [None]:
# Instantiate the model
model = ExModel(num_classes=num_classes)

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the device
model.to(device)

print(f"Model instantiated and moved to {device}.")
print(model)

In [None]:
import torch
import torch.nn as nn
from torch.nn.functional import softmax
from torch.nn.functional import cross_entropy
from torchmetrics import F1Score
from torchvision import transforms

from torch.utils.data import DataLoader, Dataset
from torch.optim import SGD, Adam
from torch.utils.tensorboard import SummaryWriter
import tqdm

import os

save_model_path = "checkpoints/"
pth_name = "saved_model.pth"


def val(model, data_val, loss_function, writer, epoch, device):
    f1score = 0
    # The original script had num_classes=53, but we should use the actual num_classes from our dataset
    f1 = F1Score(num_classes=num_classes, task = 'multiclass').to(device)
    data_iterator = enumerate(data_val)  # take batches
    f1_list = []
    f1t_list = []

    with torch.no_grad():
        model.eval()  # switch model to evaluation mode
        tq = tqdm.tqdm(total=len(data_val))
        tq.set_description('Validation:')

        total_loss = 0

        for _, batch in data_iterator:
            # forward propagation
            image, label = batch
            image = image.to(device)

            # Move label to the device before processing for loss
            label = label.to(device)

            # Convert one-hot labels to class indices for CrossEntropyLoss, or keep as float for BCEWithLogitsLoss
            if isinstance(loss_function, nn.CrossEntropyLoss):
                labels_for_loss = torch.argmax(label, dim=1).long()
            else:
                labels_for_loss = label.float()

            pred = model(image)
            loss = loss_function(pred, labels_for_loss)

            pred = pred.softmax(dim=1)

            f1_list.extend(torch.argmax(pred, dim =1).tolist())
            f1t_list.extend(torch.argmax(label, dim =1).tolist()) # F1 score calculation needs original one-hot labels for argmax

            total_loss += loss.item()
            tq.update(1)


    f1score = f1(torch.tensor(f1_list).to(device), torch.tensor(f1t_list).to(device))
    writer.add_scalar("Validation F1", f1score, epoch)
    writer.add_scalar("Validation Loss", total_loss/len(data_val), epoch)


    tq.close()
    print("F1 score: ", f1score)


    return None

Here is the `train` function, recreated in a new cell for clarity, including all necessary imports to be self-contained.

In [None]:
from torch.utils.tensorboard import SummaryWriter
import torch
import torch.nn as nn
from torch.nn.functional import softmax
from torch.nn.functional import cross_entropy
from torchmetrics import F1Score
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torch.optim import SGD, Adam
import tqdm
import os

save_model_path = "checkpoints/"
pth_name = "saved_model.pth"


def train(model, train_loader, val_loader, optimizer, loss_fn, n_epochs, device):
    writer = SummaryWriter()

    model.to(device)
    model.train()
    for epoch in range(n_epochs):

        model.train()
        running_loss = 0.0

        tq = tqdm.tqdm(total=len(train_loader))
        tq.set_description('epoch %d' % (epoch))

        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device) # Move original labels to device first

            if isinstance(loss_fn, nn.CrossEntropyLoss):
                labels_for_loss = torch.argmax(labels, dim=1).long()
            else:
                labels_for_loss = labels.float()
            # labels_for_loss is now derived from a tensor already on the device
            # No need to move labels = labels.to(device) again if labels_for_loss is derived correctly

            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels_for_loss)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            tq.set_postfix(loss_st='%.6f' % loss.item())
            tq.update(1)

        writer.add_scalar("Training Loss", running_loss/len(train_loader), epoch)

        tq.close()
        epoch_loss = running_loss / len(train_loader)
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, n_epochs, epoch_loss))

        val(model, val_loader, loss_fn, writer, epoch, device)

        os.makedirs(save_model_path, exist_ok=True)
        checkpoint = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        torch.save(checkpoint, os.path.join(save_model_path, pth_name))
        print("saved the model " + os.path.join(save_model_path, pth_name))

In [None]:
from torch.utils.data import DataLoader # Added import for DataLoader
from torch.optim import SGD, Adam # Added import for optimizers

# Define image transformations for the VALIDATION set (without random augmentations)
# Only resize and normalize for consistent evaluation
val_transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert('RGB')), # Ensure 3 channels
    transforms.Resize((224, 224)), # Resize images to 224x224
    transforms.ToTensor(),       # Convert PIL Image to PyTorch Tensor
    transforms.Lambda(lambda x: torch.cat([x, x, x], dim=0) if x.shape[0] == 1 else x), # Ensure 3 channels by concatenating if grayscale
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize image pixel values
])

# Instantiate the custom dataset for the 'val' split
val_dataset = custom_dataset(mode="validation", root=actual_data_root, transforms=val_transform) # Use 'validation' mode and val_transform

# Create a DataLoader for the validation dataset
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, drop_last=True) # Changed num_workers to 0 for debugging

# Instantiate the model with the determined num_classes
# model = ExModel(num_classes=num_classes) # Model is already instantiated and moved to device

# Define optimizer and loss function
# Using SGD from the template, feel free to change to Adam if preferred
optimizer = SGD(model.parameters(), lr=0.001) # Reduced learning rate as 0.5 is usually too high

# Using CrossEntropyLoss as per user's train.py template
loss_fn = nn.CrossEntropyLoss()

# Set number of epochs
n_epochs = 15

print(f"Starting training on {device} for {n_epochs} epochs...")

# Start the training process
train(model, train_loader, val_loader, optimizer, loss_fn, n_epochs, device)

print("Training complete!")

To visualize the training and validation metrics (loss and F1 score) that were logged by `SummaryWriter`, you can launch TensorBoard. Run the cell below, and a local link will appear that you can click to open TensorBoard.

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs

In [None]:
with open('/content/train.py', 'r') as f:
    print(f.read())

First, let's define the `val` function from `train.py`. This function will be used to evaluate the model's performance on the validation set after each training epoch.

Next, we'll define the main `train` function, which handles the training loop, loss calculation, backpropagation, and model updates.

Now, let's set up the training environment. This involves preparing the validation dataset, creating data loaders for both training and validation, instantiating the `ExModel`, defining the optimizer and loss function, and finally calling the `train` function.

First, let's define the necessary image transformations and then instantiate the `train_dataset` using your `custom_dataset` class.

# Task
Perform a comparative study of different deep learning models (ResNet18 and VGG16), pretraining strategies (pretrained and from scratch), and optimizers (SGD and Adam) for astrophysical object image classification. This involves modifying model architectures and training/validation functions to support pretraining and accuracy logging, running 8 distinct experiments, visualizing their performance using TensorBoard, evaluating the best-performing model on a test set, and finally summarizing the findings to recommend the optimal configuration.

## Prepare Data Loaders

### Subtask:
Ensure that `train_loader`, `val_loader`, and `num_classes` are correctly defined and accessible. This is a prerequisite for all training experiments.


**Reasoning**:
Verify that `train_loader`, `val_loader`, and `num_classes` are correctly defined and accessible as requested.



In [None]:
print(f"num_classes is defined and its value is: {num_classes}")
print(f"train_loader is defined and its type is: {type(train_loader)}")
print(f"val_loader is defined and its type is: {type(val_loader)}")

## Modify Model Architectures for pretrained argument

### Subtask:
Update the `ExModel` (ResNet18) and `VGG16Model` classes to accept a `pretrained` boolean argument in their `__init__` methods, allowing us to easily switch between pre-trained and randomly initialized weights for the backbone.


**Reasoning**:
To allow for switching between pre-trained and randomly initialized weights, I will update the `ExModel` and `VGG16Model` classes to accept a `pretrained` boolean argument in their `__init__` methods. This argument will be passed directly to the `models.resnet18()` and `models.vgg16()` calls respectively. I'll include both updated classes in a single code block.



In [None]:
import torch
import torch.nn as nn
import torchvision.models as models


class ExModel(nn.Module):

    def __init__(self, num_classes, pretrained=True):
        super().__init__()

        # Use torchvision.models for ResNet18, pass the pretrained argument
        self.resnet18 = models.resnet18(pretrained=pretrained)

        # Remove the original fully connected layer and use the feature extractor
        self.resnet18 = torch.nn.Sequential(*(list(self.resnet18.children())[:-1]))

        # Classifier layer for the specific number of classes
        self.classifier = torch.nn.Linear(512, num_classes)


    def forward(self, image):
        resnet_features = self.resnet18(image).squeeze(-1).squeeze(-1)
        out = self.classifier(resnet_features)

        return out


class VGG16Model(nn.Module):

    def __init__(self, num_classes, pretrained=True):
        super().__init__()

        # Use torchvision.models for VGG16, pass the pretrained argument
        self.vgg16 = models.vgg16(pretrained=pretrained)

        # Freeze all parameters in the feature extractor (convolutional layers) if pretrained
        if pretrained:
            for param in self.vgg16.features.parameters():
                param.requires_grad = False

        # Get the number of input features for the last classification layer
        num_ftrs = self.vgg16.classifier[6].in_features

        # Replace the last classification layer with a new one adapted to our num_classes
        self.vgg16.classifier[6] = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        # The VGG16 model already handles the forward pass through features and classifier
        return self.vgg16(x)

print("ExModel and VGG16Model classes updated to accept a 'pretrained' argument.")

## Update Training and Validation Functions with Accuracy Logging

### Subtask:
Modify the `train` function to accept a `run_name` parameter for TensorBoard logging, and ensure both `train` and `val` functions calculate and log 'accuracy' alongside 'F1 score' and 'loss' to TensorBoard. The `val` function also needs to ensure `label` is moved to the correct device before processing for loss and metrics.


**Reasoning**:
I need to update both the `val` and `train` functions according to the instructions, including importing `Accuracy`, correctly initializing and using `Accuracy` and `F1Score`, moving labels to the device, and logging accuracy to TensorBoard with a `run_name` parameter for the `train` function. I will regenerate the entire code block for both functions to ensure all changes are applied correctly and cohesively.



In [None]:
import torch
import torch.nn as nn
from torch.nn.functional import softmax
from torch.nn.functional import cross_entropy
from torchmetrics import F1Score, Accuracy # Import Accuracy
from torchvision import transforms

from torch.utils.data import DataLoader, Dataset
from torch.optim import SGD, Adam
from torch.utils.tensorboard import SummaryWriter
import tqdm

import os

save_model_path = "checkpoints/"
pth_name = "saved_model.pth"


def val(model, data_val, loss_function, writer, epoch, device):
    # Ensure F1Score and Accuracy are initialized with the correct num_classes and moved to device
    f1_metric = F1Score(num_classes=num_classes, task='multiclass').to(device)
    accuracy_metric = Accuracy(num_classes=num_classes, task='multiclass').to(device)

    data_iterator = enumerate(data_val)  # take batches

    with torch.no_grad():
        model.eval()  # switch model to evaluation mode
        tq = tqdm.tqdm(total=len(data_val))
        tq.set_description('Validation:')

        total_loss = 0

        for _, batch in data_iterator:
            # forward propagation
            image, label = batch
            image = image.to(device)

            # Move label to the device before processing for loss
            label = label.to(device)

            # Convert one-hot labels to class indices for CrossEntropyLoss, or keep as float for BCEWithLogitsLoss
            if isinstance(loss_function, nn.CrossEntropyLoss):
                labels_for_loss = torch.argmax(label, dim=1).long()
            else:
                labels_for_loss = label.float()

            pred = model(image)
            loss = loss_function(pred, labels_for_loss)

            # Calculate predicted class indices
            predicted_classes = torch.argmax(pred.softmax(dim=1), dim=1)
            # Calculate true class indices
            true_classes = torch.argmax(label, dim=1)

            # Update metrics
            f1_metric.update(predicted_classes, true_classes)
            accuracy_metric.update(predicted_classes, true_classes)

            total_loss += loss.item()
            tq.update(1)

    # Compute final metrics
    val_f1score = f1_metric.compute()
    val_accuracy = accuracy_metric.compute()

    writer.add_scalar("Validation F1", val_f1score, epoch)
    writer.add_scalar("Validation Accuracy", val_accuracy, epoch)
    writer.add_scalar("Validation Loss", total_loss/len(data_val), epoch)

    tq.close()
    print(f"F1 score: {val_f1score:.4f}, Accuracy: {val_accuracy:.4f}")

    return None


def train(model, train_loader, val_loader, optimizer, loss_fn, n_epochs, device, run_name):
    writer = SummaryWriter(log_dir=f'runs/{run_name}')

    # Instantiate Accuracy for training
    accuracy_metric = Accuracy(num_classes=num_classes, task='multiclass').to(device)

    model.to(device)
    for epoch in range(n_epochs):

        model.train()
        running_loss = 0.0
        accuracy_metric.reset() # Reset accuracy for each epoch

        tq = tqdm.tqdm(total=len(train_loader))
        tq.set_description('epoch %d' % (epoch))

        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device) # Move original labels to device first

            if isinstance(loss_fn, nn.CrossEntropyLoss):
                labels_for_loss = torch.argmax(labels, dim=1).long()
            else:
                labels_for_loss = labels.float()

            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels_for_loss)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            tq.set_postfix(loss_st='%.6f' % loss.item())
            tq.update(1)

            # Calculate predicted class indices for training
            predicted_classes = torch.argmax(outputs.softmax(dim=1), dim=1)
            # Calculate true class indices for training
            true_classes = torch.argmax(labels, dim=1)
            # Update the Accuracy metric
            accuracy_metric.update(predicted_classes, true_classes)

        # Compute final training accuracy
        train_accuracy = accuracy_metric.compute()
        writer.add_scalar("Training Loss", running_loss/len(train_loader), epoch)
        writer.add_scalar("Training Accuracy", train_accuracy, epoch)

        tq.close()
        epoch_loss = running_loss / len(train_loader)
        print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch+1, n_epochs, epoch_loss, train_accuracy))

        # Pass writer and epoch to val function
        val(model, val_loader, loss_fn, writer, epoch, device)

        os.makedirs(save_model_path, exist_ok=True)
        checkpoint = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        # Save the full model object after each epoch
        torch.save(model, os.path.join(save_model_path, f"full_{run_name}_epoch{epoch+1}_{pth_name}"))
        print(f"Saved full model for {run_name} at epoch {epoch+1} to {os.path.join(save_model_path, f'full_{run_name}_epoch{epoch+1}_{pth_name}')}")

        torch.save(checkpoint, os.path.join(save_model_path, f"checkpoint_{run_name}_epoch{epoch+1}_{pth_name}")) # Modified to save unique checkpoints per run_name
        print(f"saved the checkpoint for {run_name} at epoch {epoch+1} to {os.path.join(save_model_path, f'checkpoint_{run_name}_epoch{epoch+1}_{pth_name}')}")

## Experiment 1: ResNet18 (Pretrained) + SGD

### Subtask:
Instantiate ExModel (ResNet18) with pretrained weights, define an SGD optimizer, and run the training process.


**Reasoning**:
I need to instantiate the ExModel with pretrained weights, define the SGD optimizer, the CrossEntropyLoss function, and then call the train function with all required parameters, including the specified run_name.



In [None]:
model_resnet18_pretrained_sgd = ExModel(num_classes=num_classes, pretrained=True)
optimizer_sgd = SGD(model_resnet18_pretrained_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp1 = 15 # Using the same number of epochs as the previous training run for consistency
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 1: ResNet18 (Pretrained) + SGD on {device} for {n_epochs_exp1} epochs...")
train(
    model_resnet18_pretrained_sgd,
    train_loader,
    val_loader,
    optimizer_sgd,
    loss_fn,
    n_epochs_exp1,
    device,
    run_name='ResNet18_Pretrained_SGD'
)
print("Experiment 1 complete!")

## Instantiate ResNet18 (From Scratch)

### Subtask:
Instantiate the `ExModel` (ResNet18) with `pretrained=False` to use randomly initialized weights.


**Reasoning**:
To instantiate the `ExModel` with randomly initialized weights as requested, I need to call the `ExModel` constructor with `num_classes` and `pretrained=False`, assigning the result to `model_resnet18_fromscratch_sgd`.



In [None]:
model_resnet18_fromscratch_sgd = ExModel(num_classes=num_classes, pretrained=False)
print("ExModel (ResNet18, from scratch) instantiated with randomly initialized weights.")

**Reasoning**:
The previous step successfully instantiated the model. Now, I need to define the SGD optimizer for this model and then initiate the training process for Experiment 2, logging its performance with a distinct run name.



In [None]:
optimizer_sgd_fromscratch = SGD(model_resnet18_fromscratch_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp2 = 15 # Using the same number of epochs for consistency
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 2: ResNet18 (From Scratch) + SGD on {device} for {n_epochs_exp2} epochs...")
train(
    model_resnet18_fromscratch_sgd,
    train_loader,
    val_loader,
    optimizer_sgd_fromscratch,
    loss_fn,
    n_epochs_exp2,
    device,
    run_name='ResNet18_FromScratch_SGD'
)
print("Experiment 2 complete!")

## Experiment 3: ResNet18 (Pretrained) + Adam

### Subtask:
Instantiate ExModel (ResNet18) with pretrained weights, define an Adam optimizer, and run the training process. Log to TensorBoard under 'ResNet18_Pretrained_Adam'.


In [None]:
model_resnet18_pretrained_adam = ExModel(num_classes=num_classes, pretrained=True)
optimizer_adam = Adam(model_resnet18_pretrained_adam.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp3 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 3: ResNet18 (Pretrained) + Adam on {device} for {n_epochs_exp3} epochs...")
train(
    model_resnet18_pretrained_adam,
    train_loader,
    val_loader,
    optimizer_adam,
    loss_fn,
    n_epochs_exp3,
    device,
    run_name='ResNet18_Pretrained_Adam'
)
print("Experiment 3 complete!")

## Experiment 4: ResNet18 (From Scratch) + Adam

### Subtask:
Instantiate ExModel (ResNet18) with randomly initialized weights (from scratch), define an Adam optimizer, and run the training process. Log to TensorBoard under 'ResNet18_FromScratch_Adam'.


In [None]:
model_resnet18_fromscratch_adam = ExModel(num_classes=num_classes, pretrained=False)
optimizer_adam_fromscratch = Adam(model_resnet18_fromscratch_adam.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp4 = 15 # Using the same number of epochs for consistency
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 4: ResNet18 (From Scratch) + Adam on {device} for {n_epochs_exp4} epochs...")
train(
    model_resnet18_fromscratch_adam,
    train_loader,
    val_loader,
    optimizer_adam_fromscratch,
    loss_fn,
    n_epochs_exp4,
    device,
    run_name='ResNet18_FromScratch_Adam'
)
print("Experiment 4 complete!")

## Experiment 5: VGG16 (Pretrained) + SGD

### Subtask:
Instantiate VGG16Model with pretrained weights, define an SGD optimizer, and run the training process. Log to TensorBoard under 'VGG16_Pretrained_SGD'.

In [None]:
model_vgg16_pretrained_sgd = VGG16Model(num_classes=num_classes, pretrained=True)
optimizer_sgd_vgg16 = SGD(model_vgg16_pretrained_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp5 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 5: VGG16 (Pretrained) + SGD on {device} for {n_epochs_exp5} epochs...")
train(
    model_vgg16_pretrained_sgd,
    train_loader,
    val_loader,
    optimizer_sgd_vgg16,
    loss_fn,
    n_epochs_exp5,
    device,
    run_name='VGG16_Pretrained_SGD'
)
print("Experiment 5 complete!")

## Experiment 6: VGG16 (From Scratch) + SGD

### Subtask:
Instantiate VGG16Model with randomly initialized weights (from scratch), define an SGD optimizer, and run the training process. Log to TensorBoard under 'VGG16_FromScratch_SGD'.

In [None]:
model_vgg16_fromscratch_sgd = VGG16Model(num_classes=num_classes, pretrained=False)
optimizer_sgd_vgg16_fromscratch = SGD(model_vgg16_fromscratch_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp6 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 6: VGG16 (From Scratch) + SGD on {device} for {n_epochs_exp6} epochs...")
train(
    model_vgg16_fromscratch_sgd,
    train_loader,
    val_loader,
    optimizer_sgd_vgg16_fromscratch,
    loss_fn,
    n_epochs_exp6,
    device,
    run_name='VGG16_FromScratch_SGD'
)
print("Experiment 6 complete!")

## Experiment 7: VGG16 (Pretrained) + Adam

### Subtask:
Instantiate VGG16Model with pretrained weights, define an Adam optimizer, and run the training process. Log to TensorBoard under 'VGG16_Pretrained_Adam'.

In [None]:
model_vgg16_pretrained_adam = VGG16Model(num_classes=num_classes, pretrained=True)
optimizer_adam_vgg16 = Adam(model_vgg16_pretrained_adam.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp7 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 7: VGG16 (Pretrained) + Adam on {device} for {n_epochs_exp7} epochs...")
train(
    model_vgg16_pretrained_adam,
    train_loader,
    val_loader,
    optimizer_adam_vgg16,
    loss_fn,
    n_epochs_exp7,
    device,
    run_name='VGG16_Pretrained_Adam'
)
print("Experiment 7 complete!")

## Experiment 8: VGG16 (From Scratch) + Adam

### Subtask:
Instantiate VGG16Model with randomly initialized weights (from scratch), define an Adam optimizer, and run the training process. Log to TensorBoard under 'VGG16_FromScratch_Adam'.

In [None]:
model_vgg16_fromscratch_adam = VGG16Model(num_classes=num_classes, pretrained=False)
optimizer_adam_vgg16_fromscratch = Adam(model_vgg16_fromscratch_adam.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp8 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 8: VGG16 (From Scratch) + Adam on {device} for {n_epochs_exp8} epochs...")
train(
    model_vgg16_fromscratch_adam,
    train_loader,
    val_loader,
    optimizer_adam_vgg16_fromscratch,
    loss_fn,
    n_epochs_exp8,
    device,
    run_name='VGG16_FromScratch_Adam'
)
print("Experiment 8 complete!")

**Reasoning**:
I need to instantiate the ExModel with pretrained weights, define the Adam optimizer, the CrossEntropyLoss function, and then call the train function with all required parameters, including the specified run_name.



**Reasoning**:
I need to instantiate the ExModel with randomly initialized weights, define the Adam optimizer, the CrossEntropyLoss function, and then call the train function with all required parameters, including the specified run_name for Experiment 4.



# Task
Perform a comparative study of different deep learning models (ResNet18 and VGG16), pretraining strategies (pretrained and from scratch), and optimizers (SGD and Adam) for astrophysical object image classification. This involves modifying model architectures and training/validation functions to support pretraining and accuracy logging, running 8 distinct experiments, visualizing their performance using TensorBoard, evaluating the best-performing model on a test set, and finally summarizing the findings to recommend the optimal configuration.

## Experiment 5: VGG16 (Pretrained) + SGD

### Subtask:
Instantiate VGG16Model with pretrained weights, define an SGD optimizer, and run the training process. Log to TensorBoard under 'VGG16_Pretrained_SGD'.


**Reasoning**:
I need to instantiate the VGG16Model with pretrained weights, define an SGD optimizer, the CrossEntropyLoss function, and then call the train function with all required parameters, including the specified run_name.



In [None]:
model_vgg16_pretrained_sgd = VGG16Model(num_classes=num_classes, pretrained=True)
optimizer_sgd_vgg16 = SGD(model_vgg16_pretrained_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp5 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 5: VGG16 (Pretrained) + SGD on {device} for {n_epochs_exp5} epochs...")
train(
    model_vgg16_pretrained_sgd,
    train_loader,
    val_loader,
    optimizer_sgd_vgg16,
    loss_fn,
    n_epochs_exp5,
    device,
    run_name='VGG16_Pretrained_SGD'
)
print("Experiment 5 complete!")

**Reasoning**:
The `NameError: name 'VGG16Model' is not defined` indicates that the `VGG16Model` class was not executed or defined before it was called. To fix this, I need to include the `VGG16Model` class definition along with the instantiation and training code to ensure it's available during execution.



In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.optim import SGD

class VGG16Model(nn.Module):

    def __init__(self, num_classes, pretrained=True):
        super().__init__()

        # Use torchvision.models for VGG16, pass the pretrained argument
        self.vgg16 = models.vgg16(pretrained=pretrained)

        # Freeze all parameters in the feature extractor (convolutional layers) if pretrained
        if pretrained:
            for param in self.vgg16.features.parameters():
                param.requires_grad = False

        # Get the number of input features for the last classification layer
        num_ftrs = self.vgg16.classifier[6].in_features

        # Replace the last classification layer with a new one adapted to our num_classes
        self.vgg16.classifier[6] = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        # The VGG16 model already handles the forward pass through features and classifier
        return self.vgg16(x)

model_vgg16_pretrained_sgd = VGG16Model(num_classes=num_classes, pretrained=True)
optimizer_sgd_vgg16 = SGD(model_vgg16_pretrained_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp5 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 5: VGG16 (Pretrained) + SGD on {device} for {n_epochs_exp5} epochs...")
train(
    model_vgg16_pretrained_sgd,
    train_loader,
    val_loader,
    optimizer_sgd_vgg16,
    loss_fn,
    n_epochs_exp5,
    device,
    run_name='VGG16_Pretrained_SGD'
)
print("Experiment 5 complete!")

**Reasoning**:
The `NameError: name 'num_classes' is not defined` indicates that the `num_classes` variable, which is crucial for instantiating the VGG16Model, was not accessible in the scope of the previous code block. To resolve this, I will explicitly define `num_classes` within the current code block, using the value obtained from a prior execution (12 classes), before instantiating the `VGG16Model` and proceeding with the training setup.



In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.optim import SGD

# Re-define num_classes to ensure it's accessible in this scope
num_classes = 12 # Value obtained from cell 2bc9006b

class VGG16Model(nn.Module):

    def __init__(self, num_classes, pretrained=True):
        super().__init__()

        # Use torchvision.models for VGG16, pass the pretrained argument
        self.vgg16 = models.vgg16(pretrained=pretrained)

        # Freeze all parameters in the feature extractor (convolutional layers) if pretrained
        if pretrained:
            for param in self.vgg16.features.parameters():
                param.requires_grad = False

        # Get the number of input features for the last classification layer
        num_ftrs = self.vgg16.classifier[6].in_features

        # Replace the last classification layer with a new one adapted to our num_classes
        self.vgg16.classifier[6] = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        # The VGG16 model already handles the forward pass through features and classifier
        return self.vgg16(x)

model_vgg16_pretrained_sgd = VGG16Model(num_classes=num_classes, pretrained=True)
optimizer_sgd_vgg16 = SGD(model_vgg16_pretrained_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp5 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 5: VGG16 (Pretrained) + SGD on {device} for {n_epochs_exp5} epochs...")
train(
    model_vgg16_pretrained_sgd,
    train_loader,
    val_loader,
    optimizer_sgd_vgg16,
    loss_fn,
    n_epochs_exp5,
    device,
    run_name='VGG16_Pretrained_SGD'
)
print("Experiment 5 complete!")

# Task
The comparative study of deep learning models, pretraining strategies, and optimizers for astrophysical object image classification has been completed, including running all experiments, visualizing results, evaluating the best model, and summarizing the findings.

## Correct Experiment 5 Setup

### Subtask:
Regenerate the code cell for Experiment 5, including all necessary imports, the `num_classes` definition, the `val` and `train` function definitions, and the `VGG16Model` class definition, followed by the instantiation of the model, optimizer, loss function, and the call to the `train` function for 'VGG16_Pretrained_SGD'.


**Reasoning**:
The previous attempts to run Experiment 5 failed due to `NameError` because `VGG16Model`, `num_classes`, and the `val`/`train` functions were not defined or accessible within the scope of the execution. To resolve this, I will regenerate the entire code cell for Experiment 5, including all necessary imports, the `num_classes` definition, the `VGG16Model` class, and the updated `val` and `train` function definitions, before instantiating the model and running the training process. This ensures all dependencies are present and correctly defined in a single block.



In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.nn.functional import softmax
from torch.nn.functional import cross_entropy
from torchmetrics import F1Score, Accuracy
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torch.optim import SGD, Adam
from torch.utils.tensorboard import SummaryWriter
import tqdm
import os

save_model_path = "checkpoints/"
pth_name = "saved_model.pth"

# Re-define num_classes to ensure it's accessible in this scope
num_classes = 12 # Value obtained from cell 2bc9006b


class VGG16Model(nn.Module):

    def __init__(self, num_classes, pretrained=True):
        super().__init__()

        # Use torchvision.models for VGG16, pass the pretrained argument
        self.vgg16 = models.vgg16(pretrained=pretrained)

        # Freeze all parameters in the feature extractor (convolutional layers) if pretrained
        if pretrained:
            for param in self.vgg16.features.parameters():
                param.requires_grad = False

        # Get the number of input features for the last classification layer
        num_ftrs = self.vgg16.classifier[6].in_features

        # Replace the last classification layer with a new one adapted to our num_classes
        self.vgg16.classifier[6] = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        # The VGG16 model already handles the forward pass through features and classifier
        return self.vgg16(x)

def val(model, data_val, loss_function, writer, epoch, device):
    # Ensure F1Score and Accuracy are initialized with the correct num_classes and moved to device
    f1_metric = F1Score(num_classes=num_classes, task='multiclass').to(device)
    accuracy_metric = Accuracy(num_classes=num_classes, task='multiclass').to(device)

    data_iterator = enumerate(data_val)  # take batches

    with torch.no_grad():
        model.eval()  # switch model to evaluation mode
        tq = tqdm.tqdm(total=len(data_val))
        tq.set_description('Validation:')

        total_loss = 0

        for _, batch in data_iterator:
            # forward propagation
            image, label = batch
            image = image.to(device)

            # Move label to the device before processing for loss
            label = label.to(device)

            # Convert one-hot labels to class indices for CrossEntropyLoss, or keep as float for BCEWithLogitsLoss
            if isinstance(loss_function, nn.CrossEntropyLoss):
                labels_for_loss = torch.argmax(label, dim=1).long()
            else:
                labels_for_loss = label.float()

            pred = model(image)
            loss = loss_function(pred, labels_for_loss)

            # Calculate predicted class indices
            predicted_classes = torch.argmax(pred.softmax(dim=1), dim=1)
            # Calculate true class indices
            true_classes = torch.argmax(label, dim=1)

            # Update metrics
            f1_metric.update(predicted_classes, true_classes)
            accuracy_metric.update(predicted_classes, true_classes)

            total_loss += loss.item()
            tq.update(1)

    # Compute final metrics
    val_f1score = f1_metric.compute()
    val_accuracy = accuracy_metric.compute()

    writer.add_scalar("Validation F1", val_f1score, epoch)
    writer.add_scalar("Validation Accuracy", val_accuracy, epoch)
    writer.add_scalar("Validation Loss", total_loss/len(data_val), epoch)

    tq.close()
    print(f"F1 score: {val_f1score:.4f}, Accuracy: {val_accuracy:.4f}")

    return None

def train(model, train_loader, val_loader, optimizer, loss_fn, n_epochs, device, run_name):
    writer = SummaryWriter(log_dir=f'runs/{run_name}')

    # Instantiate Accuracy for training
    accuracy_metric = Accuracy(num_classes=num_classes, task='multiclass').to(device)

    model.to(device)
    for epoch in range(n_epochs):

        model.train()
        running_loss = 0.0
        accuracy_metric.reset() # Reset accuracy for each epoch

        tq = tqdm.tqdm(total=len(train_loader))
        tq.set_description('epoch %d' % (epoch))

        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device) # Move original labels to device first

            if isinstance(loss_fn, nn.CrossEntropyLoss):
                labels_for_loss = torch.argmax(labels, dim=1).long()
            else:
                labels_for_loss = labels.float()

            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels_for_loss)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            tq.set_postfix(loss_st='%.6f' % loss.item())
            tq.update(1)

            # Calculate predicted class indices for training
            predicted_classes = torch.argmax(outputs.softmax(dim=1), dim=1)
            # Calculate true class indices for training
            true_classes = torch.argmax(labels, dim=1)
            # Update the Accuracy metric
            accuracy_metric.update(predicted_classes, true_classes)

        # Compute final training accuracy
        train_accuracy = accuracy_metric.compute()
        writer.add_scalar("Training Loss", running_loss/len(train_loader), epoch)
        writer.add_scalar("Training Accuracy", train_accuracy, epoch)

        tq.close()
        epoch_loss = running_loss / len(train_loader)
        print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch+1, n_epochs, epoch_loss, train_accuracy))

        # Pass writer and epoch to val function
        val(model, val_loader, loss_fn, writer, epoch, device)

        os.makedirs(save_model_path, exist_ok=True)
        checkpoint = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        # Save the full model object after each epoch
        torch.save(model, os.path.join(save_model_path, f"full_{run_name}_epoch{epoch+1}_{pth_name}"))
        print(f"Saved full model for {run_name} at epoch {epoch+1} to {os.path.join(save_model_path, f'full_{run_name}_epoch{epoch+1}_{pth_name}')}")

        torch.save(checkpoint, os.path.join(save_model_path, f"checkpoint_{run_name}_epoch{epoch+1}_{pth_name}")) # Modified to save unique checkpoints per run_name
        print(f"saved the checkpoint for {run_name} at epoch {epoch+1} to {os.path.join(save_model_path, f'checkpoint_{run_name}_epoch{epoch+1}_{pth_name}')}")




model_vgg16_pretrained_sgd = VGG16Model(num_classes=num_classes, pretrained=True)
optimizer_sgd_vgg16 = SGD(model_vgg16_pretrained_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp5 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 5: VGG16 (Pretrained) + SGD on {device} for {n_epochs_exp5} epochs...")
train(
    model_vgg16_pretrained_sgd,
    train_loader,
    val_loader,
    optimizer_sgd_vgg16,
    loss_fn,
    n_epochs_exp5,
    device,
    run_name='VGG16_Pretrained_SGD'
)
print("Experiment 5 complete!")

**Reasoning**:
The `NameError: name 'val_loader' is not defined` indicates that the `train_loader` and `val_loader` variables were not accessible within the scope of the previous code block, nor were their dependencies like `train_dataset`, `val_dataset`, and the `custom_dataset` class. To resolve this, I will regenerate the entire code block for Experiment 5, ensuring that all necessary imports, the `custom_dataset` class definition, the dataset and DataLoader instantiations for both training and validation, the `num_classes` definition, the `VGG16Model` class definition, and the `val` and `train` function definitions are all included and properly ordered before initiating the training process. This will make sure all variables and functions are defined and in scope when called.



In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.nn.functional import softmax
from torch.nn.functional import cross_entropy
from torchmetrics import F1Score, Accuracy
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torch.optim import SGD, Adam
from torch.utils.tensorboard import SummaryWriter
import tqdm
import os
import numpy as np
from PIL import Image

save_model_path = "checkpoints/"
pth_name = "saved_model.pth"

# --- custom_dataset class definition ---
class custom_dataset(Dataset):
    def __init__(self, mode = "train", root = "datasets/demo_dataset", transforms = None):
        super().__init__()
        self.mode = mode
        self.root = root
        self.transforms = transforms

        #select split
        self.folder = os.path.join(self.root, self.mode)

        #initialize lists
        self.image_list = []
        self.label_list = []

        #save class lists
        self.class_list = os.listdir(self.folder)
        self.class_list.sort()

        for class_id in range(len(self.class_list)):
            for image in os.listdir(os.path.join(self.folder, self.class_list[class_id])):
                self.image_list.append(os.path.join(self.folder, self.class_list[class_id], image))
                label = np.zeros(len(self.class_list))
                label[class_id] = 1.0
                self.label_list.append(label)

    def __getitem__(self, index):
        image_name = self.image_list[index]
        label = self.label_list[index]


        image = Image.open(image_name)
        if(self.transforms):
            image = self.transforms(image)

        label = torch.tensor(label)

        return image, label

    def __len__(self):
        return len(self.image_list)

# --- Data transformations ---
train_transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert('RGB')), # Ensure 3 channels
    transforms.RandomResizedCrop(224), # Randomly crop and resize to 224x224
    transforms.RandomHorizontalFlip(), # Randomly flip the image horizontally
    transforms.RandomRotation(15),     # Randomly rotate the image by a small degree
    transforms.ToTensor(),             # Convert PIL Image to PyTorch Tensor
    transforms.Lambda(lambda x: torch.cat([x, x, x], dim=0) if x.shape[0] == 1 else x), # Ensure 3 channels by concatenating if grayscale
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize image pixel values
])

val_transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert('RGB')), # Ensure 3 channels
    transforms.Resize((224, 224)), # Resize images to 224x224
    transforms.ToTensor(),       # Convert PIL Image to PyTorch Tensor
    transforms.Lambda(lambda x: torch.cat([x, x, x], dim=0) if x.shape[0] == 1 else x), # Ensure 3 channels by concatenating if grayscale
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize image pixel values
])

# --- Data Root (assuming 'path' is defined from kagglehub.dataset_download) ---
# The `path` variable is defined in cell 4020bffa as '/kaggle/input/astrophysical-objects-image-dataset'
# actual_data_root was defined in cell 87ddfa51
path = '/kaggle/input/astrophysical-objects-image-dataset'
actual_data_root = os.path.join(path, 'astro_dataset_maxia', 'astro_dataset_maxia')

# --- Dataset and DataLoader Instantiation ---
train_dataset = custom_dataset(mode="training", root=actual_data_root, transforms=train_transform)
val_dataset = custom_dataset(mode="validation", root=actual_data_root, transforms=val_transform)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=True)

# Re-define num_classes to ensure it's accessible in this scope
num_classes = len(train_dataset.class_list) # Dynamically get from dataset


class VGG16Model(nn.Module):

    def __init__(self, num_classes, pretrained=True):
        super().__init__()

        # Use torchvision.models for VGG16, pass the pretrained argument
        self.vgg16 = models.vgg16(pretrained=pretrained)

        # Freeze all parameters in the feature extractor (convolutional layers) if pretrained
        if pretrained:
            for param in self.vgg16.features.parameters():
                param.requires_grad = False

        # Get the number of input features for the last classification layer
        num_ftrs = self.vgg16.classifier[6].in_features

        # Replace the last classification layer with a new one adapted to our num_classes
        self.vgg16.classifier[6] = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        # The VGG16 model already handles the forward pass through features and classifier
        return self.vgg16(x)

def val(model, data_val, loss_function, writer, epoch, device):
    # Ensure F1Score and Accuracy are initialized with the correct num_classes and moved to device
    f1_metric = F1Score(num_classes=num_classes, task='multiclass').to(device)
    accuracy_metric = Accuracy(num_classes=num_classes, task='multiclass').to(device)

    data_iterator = enumerate(data_val)  # take batches

    with torch.no_grad():
        model.eval()  # switch model to evaluation mode
        tq = tqdm.tqdm(total=len(data_val))
        tq.set_description('Validation:')

        total_loss = 0

        for _, batch in data_iterator:
            # forward propagation
            image, label = batch
            image = image.to(device)

            # Move label to the device before processing for loss
            label = label.to(device)

            # Convert one-hot labels to class indices for CrossEntropyLoss, or keep as float for BCEWithLogitsLoss
            if isinstance(loss_function, nn.CrossEntropyLoss):
                labels_for_loss = torch.argmax(label, dim=1).long()
            else:
                labels_for_loss = label.float()

            pred = model(image)
            loss = loss_function(pred, labels_for_loss)

            # Calculate predicted class indices
            predicted_classes = torch.argmax(pred.softmax(dim=1), dim=1)
            # Calculate true class indices
            true_classes = torch.argmax(label, dim=1)

            # Update metrics
            f1_metric.update(predicted_classes, true_classes)
            accuracy_metric.update(predicted_classes, true_classes)

            total_loss += loss.item()
            tq.update(1)

    # Compute final metrics
    val_f1score = f1_metric.compute()
    val_accuracy = accuracy_metric.compute()

    writer.add_scalar("Validation F1", val_f1score, epoch)
    writer.add_scalar("Validation Accuracy", val_accuracy, epoch)
    writer.add_scalar("Validation Loss", total_loss/len(data_val), epoch)

    tq.close()
    print(f"F1 score: {val_f1score:.4f}, Accuracy: {val_accuracy:.4f}")

    return None

def train(model, train_loader, val_loader, optimizer, loss_fn, n_epochs, device, run_name):
    writer = SummaryWriter(log_dir=f'runs/{run_name}')

    # Instantiate Accuracy for training
    accuracy_metric = Accuracy(num_classes=num_classes, task='multiclass').to(device)

    model.to(device)
    for epoch in range(n_epochs):

        model.train()
        running_loss = 0.0
        accuracy_metric.reset() # Reset accuracy for each epoch

        tq = tqdm.tqdm(total=len(train_loader))
        tq.set_description('epoch %d' % (epoch))

        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device) # Move original labels to device first

            if isinstance(loss_fn, nn.CrossEntropyLoss):
                labels_for_loss = torch.argmax(labels, dim=1).long()
            else:
                labels_for_loss = labels.float()

            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels_for_loss)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            tq.set_postfix(loss_st='%.6f' % loss.item())
            tq.update(1)

            # Calculate predicted class indices for training
            predicted_classes = torch.argmax(outputs.softmax(dim=1), dim=1)
            # Calculate true class indices for training
            true_classes = torch.argmax(labels, dim=1)
            # Update the Accuracy metric
            accuracy_metric.update(predicted_classes, true_classes)

        # Compute final training accuracy
        train_accuracy = accuracy_metric.compute()
        writer.add_scalar("Training Loss", running_loss/len(train_loader), epoch)
        writer.add_scalar("Training Accuracy", train_accuracy, epoch)

        tq.close()
        epoch_loss = running_loss / len(train_loader)
        print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch+1, n_epochs, epoch_loss, train_accuracy))

        # Pass writer and epoch to val function
        val(model, val_loader, loss_fn, writer, epoch, device)

        os.makedirs(save_model_path, exist_ok=True)
        checkpoint = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        # Save the full model object after each epoch
        torch.save(model, os.path.join(save_model_path, f"full_{run_name}_epoch{epoch+1}_{pth_name}"))
        print(f"Saved full model for {run_name} at epoch {epoch+1} to {os.path.join(save_model_path, f'full_{run_name}_epoch{epoch+1}_{pth_name}')}")

        torch.save(checkpoint, os.path.join(save_model_path, f"checkpoint_{run_name}_epoch{epoch+1}_{pth_name}")) # Modified to save unique checkpoints per run_name
        print(f"saved the checkpoint for {run_name} at epoch {epoch+1} to {os.path.join(save_model_path, f'checkpoint_{run_name}_epoch{epoch+1}_{pth_name}')}")




model_vgg16_pretrained_sgd = VGG16Model(num_classes=num_classes, pretrained=True)
optimizer_sgd_vgg16 = SGD(model_vgg16_pretrained_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp5 = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 5: VGG16 (Pretrained) + SGD on {device} for {n_epochs_exp5} epochs...")
train(
    model_vgg16_pretrained_sgd,
    train_loader,
    val_loader,
    optimizer_sgd_vgg16,
    loss_fn,
    n_epochs_exp5,
    device,
    run_name='VGG16_Pretrained_SGD'
)
print("Experiment 5 complete!")

# Task
Starting Experiment 5: VGG16 (Pretrained) + SGD on cpu for 15 epochs...

## Correct Device Assignment

### Subtask:
Modify the line `device = torch.device("cuda" if torch.cuda.is_available() else "gpu")` to `device = torch.device("cuda" if torch.cuda.is_available() else "cpu")` to ensure a valid device is always selected. Then, re-instantiate the model and move it to the corrected device.


**Reasoning**:
To fix the RuntimeError and ensure a valid device is always selected, I will modify the device assignment to use 'cpu' as a fallback, then re-instantiate the ExModel with the correct number of classes, and move the model to this newly defined device.



In [None]:
import torch

# Correct the device assignment
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Re-instantiate the model (using the updated ExModel from cell eb32e150)
# Assuming num_classes is already defined from previous steps (it's 12)
# And ExModel class is defined in the current context (from cell eb32e150)
model = ExModel(num_classes=num_classes) # Default to pretrained=True as per previous usage

# Move the model to the corrected device
model.to(device)

print(f"Model instantiated and moved to {device}.")
print(model)

**Reasoning**:
The previous step successfully corrected the device assignment and instantiated the model. Now, I need to execute Experiment 1 as per the task, which involves instantiating `ExModel` (ResNet18) with pretrained weights, defining an SGD optimizer, and running the training process, logging its performance with a distinct run name.



In [None]:
model_resnet18_pretrained_sgd = ExModel(num_classes=num_classes, pretrained=True)
optimizer_sgd = SGD(model_resnet18_pretrained_sgd.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
n_epochs_exp1 = 15 # Using the same number of epochs as the previous training run for consistency
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Starting Experiment 1: ResNet18 (Pretrained) + SGD on {device} for {n_epochs_exp1} epochs...")
train(
    model_resnet18_pretrained_sgd,
    train_loader,
    val_loader,
    optimizer_sgd,
    loss_fn,
    n_epochs_exp1,
    device,
    run_name='ResNet18_Pretrained_SGD'
)
print("Experiment 1 complete!")