In [1]:
import timm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

import matplotlib.pyplot as plt
from tqdm import tqdm

from config import *
from stratified_dataset_splitter import StratifiedDatasetSplitter

# Define the model (DeiT base)
model = timm.create_model("deit_base_patch16_224", pretrained=True, num_classes=num_classes)  # Change num_classes based on dataset
model = model.to("cuda" if torch.cuda.is_available() else "cpu")


In [2]:
train_loader, val_loader, test_loader = StratifiedDatasetSplitter('data').split_dataset()

In [4]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), weight_decay=0.05)

# Learning rate scheduler
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

In [5]:
def plot_training_curves(train_losses, test_accuracies, epochs):
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(range(1, epochs+1), train_losses)
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')

    plt.subplot(1, 2, 2)
    plt.plot(range(1, epochs+1), test_accuracies)
    plt.title('Test Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')

    plt.tight_layout()
    plt.savefig('training_curves.png')
    plt.show()


In [10]:
# Training loop
train_losses = []
test_accuracies = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    # Progress bar for training
    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}')

    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        optimizer.step()

        # Update statistics
        running_loss += loss.item()
        pbar.set_postfix({'loss': loss.item()})

    epoch_loss = running_loss / len(trainloader)
    train_losses.append(epoch_loss)

    # Evaluate on test set
    test_accuracy = evaluate_model(model, testloader)
    test_accuracies.append(test_accuracy)

    print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, '
          f'Test Accuracy: {test_accuracy:.2f}%')

    # Update learning rate
    scheduler.step()

# Plot training curves
plot_training_curves(train_losses, test_accuracies, epochs + fine_tune_epochs)

Epoch 1/100:   0%|                                                                              | 0/976 [00:00<?, ?it/s]


AttributeError: 'int' object has no attribute 'to'

In [None]:
results = evaluate_model(
    model=model, 
    dataloader=test_loader, 
    device=device,
)

# Print results
print_evaluation_results(results)

In [None]:
# Configurations
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 3
num_epochs = 125
batch_size = 32
learning_rate = 1e-4
# learning_rate = 3e-6
dropout_rate = 0.5
# dropout_rate = 0.2
# weight_decay = 1e-3
weight_decay = 1e-4
model_name = "resnet"  # Choose from 'vgg', 'resnet', 'densenet'

In [None]:
# Define transformations for training and validation
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # DeiT requires 224x224 input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np


# Load the dataset
data_dir = 'data'
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# Get labels
targets = np.array(dataset.targets)

# Set split ratio and seed for reproducibility
train_ratio = 0.7
val_ratio = 0.1
test_ratio = 0.2
random_seed = 42

# Split train validation and test
strat_split = StratifiedShuffleSplit(n_splits=1, test_size=test_ratio, random_state=random_seed)

# Get train val and test indices
for train_val_index, test_index in strat_split.split(np.zeros(len(targets)), targets):
    train_val_indices = train_val_index
    test_indices = test_index



# Split train and validation
train_val_targets = targets[train_val_indices]
strat_split = StratifiedShuffleSplit(n_splits=1, test_size=val_ratio / (1 - test_ratio), random_state=random_seed)

for train_index, val_index in strat_split.split(np.zeros(len(train_val_targets)), train_val_targets):
    train_indices = train_val_indices[train_index]
    val_indices = train_val_indices[val_index]


# Create subsets
train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


# Check the distribution of labels
train_labels = [targets[i] for i in train_indices]
val_labels = [targets[i] for i in val_indices]
test_labels = [targets[i] for i in test_indices]

print("Training set label distribution:", np.bincount(train_labels))
print("Validation set label distribution:", np.bincount(val_labels))
print("Testing set label distribution:", np.bincount(test_labels))


# Display the first batch
for images, labels in train_loader:
    print(images.shape, labels)
    break


Training set label distribution: [285 394 388]
Validation set label distribution: [41 56 56]
Testing set label distribution: [ 81 113 111]
torch.Size([32, 3, 224, 224]) tensor([2, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 2, 1, 0, 1, 2, 1, 2, 1, 1,
        2, 2, 2, 0, 0, 0, 2, 0])


In [None]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-4)


In [None]:
def evaluate(model, dataloader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(dataloader)
    accuracy = 100 * correct / total
    return val_loss, accuracy


In [None]:
num_epochs = 10
device = "cuda" if torch.cuda.is_available() else "cpu"

for epoch in range(num_epochs):
    model.train()
    running_loss = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    train_loss = running_loss / len(train_loader)
    # print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")
    val_loss, val_accuracy = evaluate(model, val_loader, criterion)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

print("Training complete!")


Epoch 1/10, Train Loss: 0.2639, Val Loss: 0.1945, Val Accuracy: 92.81%
Epoch 2/10, Train Loss: 0.0686, Val Loss: 0.1499, Val Accuracy: 93.46%
Epoch 3/10, Train Loss: 0.0211, Val Loss: 0.1355, Val Accuracy: 94.77%
Epoch 4/10, Train Loss: 0.0228, Val Loss: 0.3278, Val Accuracy: 92.16%
Epoch 5/10, Train Loss: 0.0128, Val Loss: 0.1555, Val Accuracy: 94.77%
Epoch 6/10, Train Loss: 0.0023, Val Loss: 0.2011, Val Accuracy: 93.46%
Epoch 7/10, Train Loss: 0.0012, Val Loss: 0.1729, Val Accuracy: 94.12%
Epoch 8/10, Train Loss: 0.0009, Val Loss: 0.1763, Val Accuracy: 94.12%
Epoch 9/10, Train Loss: 0.0007, Val Loss: 0.1773, Val Accuracy: 94.12%
Epoch 10/10, Train Loss: 0.0006, Val Loss: 0.1816, Val Accuracy: 93.46%
Training complete!


In [None]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")


Validation Accuracy: 93.46%
