In [None]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader, random_split

class CarDataset(Dataset):
    def __init__(self, annotations_df, root_dir, transform=None):
        self.annotations = annotations_df.reset_index(drop=True)
        self.root_dir = root_dir
        self.transform = transform
        
        self.annotations['Class'] = self.annotations['Class'] - 1
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.annotations.iloc[idx]['image'])
        image = Image.open(img_name).convert('RGB')
        label = int(self.annotations.iloc[idx]['Class'])
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)
        

In [None]:
import pandas as pd
file_path = "car_boundingBox_class_and_image_number.csv"
car_dataset_df = pd.read_csv(file_path)
num_classes = car_dataset_df['Class'].nunique()

num_classes

In [None]:
car_dataset_df.head(5)

In [None]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((272, 272), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomApply(
        [transforms.RandomChoice([
            transforms.RandomRotation(degrees=(-90, -90)),
            transforms.RandomRotation(degrees=(90, 90))
        ])],
        p=0.3
    ),
    transforms.RandomHorizontalFlip(p=0.3), 
    transforms.ToTensor(),
])

transform_validation = transforms.Compose([
    transforms.Resize((272, 272), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.ToTensor(),
])

In [None]:
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision.models import swin_v2_b, Swin_V2_B_Weights
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import os
import random
from sklearn.model_selection import train_test_split

In [None]:
train_df, val_df = train_test_split(car_dataset_df, test_size=0.2, random_state=22)

train_ds = CarDataset(annotations_df=train_df, root_dir="cars_training_image/cars_train/", transform=transform)
val_ds = CarDataset(annotations_df=val_df, root_dir="cars_training_image/cars_train/", transform=transform_validation)

In [None]:
train_loader = DataLoader(train_ds, batch_size=4, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=4, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
for i, (images, labels) in enumerate(train_loader):
    images, labels = images.to(device), labels.to(device)
    
    print(f"Batch {i+1}")
    print("Images shape:", images.shape)
    print("Labels:", labels)
    
    if i == 2:
        break

In [None]:
import torch
import torch.nn as nn
from torchvision.models import swin_v2_b, Swin_V2_B_Weights

class SwinV2BClassifier(nn.Module):
    def __init__(self, num_classes, pretrained_weights=Swin_V2_B_Weights.IMAGENET1K_V1, lr=0.001):
        super(SwinV2BClassifier, self).__init__()
        self.model = swin_v2_b(weights=pretrained_weights)
        self.model.head = nn.Linear(self.model.head.in_features, num_classes)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
    
    def forward(self, x):
        return self.model(x)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import timm

class EfficientNetV2XLClassifier(nn.Module):
    def __init__(self, num_classes, lr=0.001):
        super(EfficientNetV2XLClassifier, self).__init__()
        self.model = timm.create_model('tf_efficientnetv2_xl.in21k', pretrained=True)
        
        self.model.reset_classifier(num_classes)

        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
    
    def forward(self, x):
        return self.model(x)


In [None]:
model = EfficientNetV2XLClassifier(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = model.optimizer 

In [None]:
def fgsm_attack(model, images, labels, epsilon):
    images.requires_grad = True
    outputs = model(images)
    loss = criterion(outputs, labels)
    model.zero_grad()
    loss.backward()
    data_grad = images.grad.data
    perturbed_images = images + epsilon * data_grad.sign()
    perturbed_images = torch.clamp(perturbed_images, 0, 1)
    images.requires_grad = False
    return perturbed_images.detach()


In [None]:
import os
import time
import matplotlib.pyplot as plt
from torchvision.transforms.functional import to_pil_image

patience = 3
best_val_loss = float('inf')
epochs_no_improve = 0

num_epochs = 10
epsilon = 0.01
save_dir = "saved_models"
os.makedirs(save_dir, exist_ok=True)

scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.00001)

def show_fgsm_images(original_image, augmented_image):
    """Display the original and FGSM-augmented images side by side."""
    original = to_pil_image(original_image.cpu())
    augmented = to_pil_image(augmented_image.cpu())
    fig, axes = plt.subplots(1, 2, figsize=(8, 4))
    axes[0].imshow(original)
    axes[0].set_title("Original Image")
    axes[0].axis('off')
    axes[1].imshow(augmented)
    axes[1].set_title("FGSM Augmented Image")
    axes[1].axis('off')
    plt.show()

for epoch in range(num_epochs):
    print(f"\nStarting Epoch {epoch+1}/{num_epochs}")
    epoch_start_time = time.time()
    model.train()
    running_loss = 0.0

    for batch_idx, (images, labels) in enumerate(train_loader):
        batch_start_time = time.time()
        
        images = images.to(device)
        labels = labels.to(device)
        
        if random.random() < 0.3:
            original_images = images.clone()
            images = fgsm_attack(model, images, labels, epsilon)

            show_fgsm_images(original_images[0], images[0])
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        
        batch_end_time = time.time()
        print(f"Batch {batch_idx+1}/{len(train_loader)}, Loss: {loss.item():.4f}, Time: {batch_end_time - batch_start_time:.2f} seconds")
    
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_end_time = time.time()
    print(f"Epoch {epoch+1} Training Loss: {epoch_loss:.4f}, Time: {epoch_end_time - epoch_start_time:.2f} seconds")
  
    scheduler.step()

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_epoch_loss = val_loss / len(val_loader.dataset)
    val_accuracy = 100 * correct / total
    print(f"Epoch {epoch+1} Validation Loss: {val_epoch_loss:.4f}, Accuracy: {val_accuracy:.2f}%")
    
    current_lr = scheduler.get_last_lr()[0]
    print(f"Current Learning Rate: {current_lr:.6f}")
    
    model_path = os.path.join(save_dir, f"model_epoch_{epoch+1}.pth")
    torch.save(model.state_dict(), model_path)
    print(f"Model weights saved to {model_path}")
    
    if val_epoch_loss < best_val_loss:
        best_val_loss = val_epoch_loss
        epochs_no_improve = 0
        best_model_path = os.path.join(save_dir, "best_model.pth")
        torch.save(model.state_dict(), best_model_path)
        print(f"Best model updated and saved to {best_model_path}")
    else:
        epochs_no_improve += 1
        print(f"No improvement for {epochs_no_improve} epoch(s).")

    if epochs_no_improve >= patience:
        print("Early stopping triggered.")
        break
