In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Data pre-processing transformations
transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert('RGB')),  # Convert grayscale to RGB
    transforms.Resize((128, 128)),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
])

# Download Caltech101 dataset
dataset = datasets.Caltech101(root='./data', download=True, transform=transform)

# Split into train, validation, and test sets
train_size = int(0.7 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Initialize data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

131740031it [00:04, 26813512.32it/s]


Extracting ./data/caltech101/101_ObjectCategories.tar.gz to ./data/caltech101


14028800it [00:00, 37850980.16it/s]


Extracting ./data/caltech101/Annotations.tar to ./data/caltech101


In [2]:
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.optim import lr_scheduler

# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load pre-trained VGG-19 with batch normalization
model = models.vgg19_bn(weights='DEFAULT')

# Modify the last layer to match the number of classes in the Caltech101 dataset (102)
num_classes = 102  # Including background class
model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)

# Move the model to the device
model = model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.00148, momentum=0.9395, weight_decay=0.001415, dampening=0.00289)
#optimizer = optim.AdamW(model.parameters(), lr=0.00008, weight_decay=0.05)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=1, verbose=True)

# Train and validate the model
num_epochs = 20

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    # Training phase
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc="Training"):
        images, labels = images.to(device), labels.to(device)  # Move data to the device
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    train_loss = running_loss / len(train_loader)
    print(f"Training Loss: {train_loss}")
    
    # Validation phase
    model.eval()
    val_running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validation"):
            images, labels = images.to(device), labels.to(device)  # Move data to the device
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss = val_running_loss / len(val_loader)
    val_accuracy = 100 * correct / total
    print(f"Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}%")
    scheduler.step(val_loss)

Epoch 1/20


Training: 100%|█████████████████████████████████| 95/95 [00:53<00:00,  1.77it/s]


Training Loss: 1.9474914585289202


Validation: 100%|███████████████████████████████| 28/28 [00:11<00:00,  2.42it/s]


Validation Loss: 0.41873027490718023, Validation Accuracy: 88.53025936599424%
Epoch 2/20


Training: 100%|█████████████████████████████████| 95/95 [00:52<00:00,  1.79it/s]


Training Loss: 0.26294397664697544


Validation: 100%|███████████████████████████████| 28/28 [00:08<00:00,  3.41it/s]


Validation Loss: 0.25676875321992804, Validation Accuracy: 93.19884726224784%
Epoch 3/20


Training: 100%|█████████████████████████████████| 95/95 [00:50<00:00,  1.87it/s]


Training Loss: 0.09472770126242387


Validation: 100%|███████████████████████████████| 28/28 [00:10<00:00,  2.60it/s]


Validation Loss: 0.23226211479465877, Validation Accuracy: 94.06340057636888%
Epoch 4/20


Training: 100%|█████████████████████████████████| 95/95 [00:50<00:00,  1.89it/s]


Training Loss: 0.03533951949916388


Validation: 100%|███████████████████████████████| 28/28 [00:10<00:00,  2.58it/s]


Validation Loss: 0.20818325950364983, Validation Accuracy: 94.35158501440922%
Epoch 5/20


Training: 100%|█████████████████████████████████| 95/95 [00:49<00:00,  1.92it/s]


Training Loss: 0.01745248100426244


Validation: 100%|███████████████████████████████| 28/28 [00:10<00:00,  2.66it/s]


Validation Loss: 0.19645906110028072, Validation Accuracy: 94.5821325648415%
Epoch 6/20


Training: 100%|█████████████████████████████████| 95/95 [00:50<00:00,  1.90it/s]


Training Loss: 0.011447401327620212


Validation: 100%|███████████████████████████████| 28/28 [00:10<00:00,  2.60it/s]


Validation Loss: 0.1996697986365429, Validation Accuracy: 94.69740634005764%
Epoch 7/20


Training: 100%|█████████████████████████████████| 95/95 [00:52<00:00,  1.79it/s]


Training Loss: 0.009602478854848367


Validation: 100%|███████████████████████████████| 28/28 [00:07<00:00,  3.57it/s]


Validation Loss: 0.19906419974618725, Validation Accuracy: 94.5821325648415%
Epoch 00007: reducing learning rate of group 0 to 1.4800e-04.
Epoch 8/20


Training: 100%|█████████████████████████████████| 95/95 [00:52<00:00,  1.81it/s]


Training Loss: 0.006483998875697388


Validation: 100%|███████████████████████████████| 28/28 [00:08<00:00,  3.46it/s]


Validation Loss: 0.19666583636509521, Validation Accuracy: 94.75504322766571%
Epoch 9/20


Training: 100%|█████████████████████████████████| 95/95 [00:56<00:00,  1.68it/s]


Training Loss: 0.005347094183640652


Validation: 100%|███████████████████████████████| 28/28 [00:07<00:00,  3.52it/s]


Validation Loss: 0.19423933120976603, Validation Accuracy: 94.69740634005764%
Epoch 10/20


Training: 100%|█████████████████████████████████| 95/95 [00:56<00:00,  1.69it/s]


Training Loss: 0.004843225672620495


Validation: 100%|███████████████████████████████| 28/28 [00:07<00:00,  3.58it/s]


Validation Loss: 0.19257320556789637, Validation Accuracy: 94.63976945244957%
Epoch 11/20


Training: 100%|█████████████████████████████████| 95/95 [00:57<00:00,  1.66it/s]


Training Loss: 0.004660854965301328


Validation: 100%|███████████████████████████████| 28/28 [00:07<00:00,  3.70it/s]


Validation Loss: 0.1925897456239909, Validation Accuracy: 94.52449567723343%
Epoch 12/20


Training: 100%|█████████████████████████████████| 95/95 [00:53<00:00,  1.79it/s]


Training Loss: 0.004301900003031877


Validation: 100%|███████████████████████████████| 28/28 [00:09<00:00,  2.97it/s]


Validation Loss: 0.19466013152019254, Validation Accuracy: 94.5821325648415%
Epoch 00012: reducing learning rate of group 0 to 1.4800e-05.
Epoch 13/20


Training: 100%|█████████████████████████████████| 95/95 [00:50<00:00,  1.88it/s]


Training Loss: 0.004744647476687341


Validation: 100%|███████████████████████████████| 28/28 [00:12<00:00,  2.21it/s]


Validation Loss: 0.19360807790820087, Validation Accuracy: 94.69740634005764%
Epoch 14/20


Training: 100%|█████████████████████████████████| 95/95 [00:49<00:00,  1.91it/s]


Training Loss: 0.0045647267705613845


Validation: 100%|███████████████████████████████| 28/28 [00:14<00:00,  1.93it/s]


Validation Loss: 0.19332863387119556, Validation Accuracy: 94.92795389048992%
Epoch 00014: reducing learning rate of group 0 to 1.4800e-06.
Epoch 15/20


Training: 100%|█████████████████████████████████| 95/95 [00:51<00:00,  1.86it/s]


Training Loss: 0.004247020986727684


Validation: 100%|███████████████████████████████| 28/28 [00:10<00:00,  2.71it/s]


Validation Loss: 0.1920000876499606, Validation Accuracy: 94.69740634005764%
Epoch 16/20


Training: 100%|█████████████████████████████████| 95/95 [00:53<00:00,  1.78it/s]


Training Loss: 0.005100276632430522


Validation: 100%|███████████████████████████████| 28/28 [00:10<00:00,  2.59it/s]


Validation Loss: 0.1927275916095823, Validation Accuracy: 94.81268011527378%
Epoch 17/20


Training: 100%|█████████████████████████████████| 95/95 [00:59<00:00,  1.60it/s]


Training Loss: 0.0042989414013726145


Validation: 100%|███████████████████████████████| 28/28 [00:08<00:00,  3.28it/s]


Validation Loss: 0.19086923327163927, Validation Accuracy: 94.69740634005764%
Epoch 18/20


Training: 100%|█████████████████████████████████| 95/95 [00:57<00:00,  1.65it/s]


Training Loss: 0.004502331145340576


Validation: 100%|███████████████████████████████| 28/28 [00:07<00:00,  3.76it/s]


Validation Loss: 0.19415822455526463, Validation Accuracy: 94.69740634005764%
Epoch 19/20


Training: 100%|█████████████████████████████████| 95/95 [00:54<00:00,  1.74it/s]


Training Loss: 0.0046251087694575916


Validation: 100%|███████████████████████████████| 28/28 [00:07<00:00,  3.60it/s]


Validation Loss: 0.19313025072083942, Validation Accuracy: 94.5821325648415%
Epoch 00019: reducing learning rate of group 0 to 1.4800e-07.
Epoch 20/20


Training: 100%|█████████████████████████████████| 95/95 [00:52<00:00,  1.80it/s]


Training Loss: 0.003646195843497193


Validation: 100%|███████████████████████████████| 28/28 [00:12<00:00,  2.28it/s]

Validation Loss: 0.19140979920380882, Validation Accuracy: 94.63976945244957%





In [3]:
from sklearn.metrics import classification_report
import numpy as np
from tqdm import tqdm

# Initialize variables for the classification report
y_true = np.array([])
y_pred = np.array([])

# Test the model
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in tqdm(test_loader, desc="Testing"):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Append data for the classification report
        y_true = np.concatenate((y_true, labels.cpu().numpy()))
        y_pred = np.concatenate((y_pred, predicted.cpu().numpy()))

# Calculate the accuracy of the model on the test set
test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy}%")

# Generate a classification report
report = classification_report(y_true, y_pred)
print("Classification Report:")
print(report)

Testing: 100%|██████████████████████████████████| 14/14 [00:04<00:00,  3.26it/s]

Test Accuracy: 94.82163406214039%
Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.97      0.99        37
         1.0       0.98      1.00      0.99        45
         2.0       0.95      1.00      0.98        21
         3.0       1.00      1.00      1.00        76
         4.0       1.00      1.00      1.00         7
         5.0       1.00      1.00      1.00        90
         6.0       0.20      1.00      0.33         1
         7.0       1.00      0.67      0.80         3
         8.0       0.75      0.75      0.75         4
         9.0       1.00      1.00      1.00         5
        10.0       0.50      1.00      0.67         1
        11.0       0.75      1.00      0.86         3
        12.0       1.00      1.00      1.00        18
        13.0       1.00      0.75      0.86         4
        14.0       0.80      1.00      0.89         4
        15.0       1.00      1.00      1.00         8
        16.0       0.80 




In [4]:
torch.save(model.state_dict(), 'caltech101_vgg19bn.pth')
print("Model has been saved to caltech101_vgg19bn.pth")

Model has been saved to caltech101_vgg19bn.pth


In [2]:
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import optuna

def objective(trial):
    # Define the device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Load pre-trained VGG-19 with batch normalization
    model = models.vgg19_bn(weights='DEFAULT')
    
    # Modify the last layer to match the number of classes in the Caltech101 dataset (102)
    num_classes = 102  # Including background class
    model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)
    
    # Move the model to the device
    model = model.to(device)

    # Define hyperparameters to be optimized
    weight_decay = trial.suggest_float('weight_decay', 0, 0.005)
    dampening = trial.suggest_float('dampening', 0, 0.005)

    # Loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.00148, momentum=0.9395, dampening=dampening, weight_decay=weight_decay)

    # Train and validate the model
    num_epochs = 7

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader, desc="Training"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Validation phase
        model.eval()
        val_running_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc="Validation"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        val_accuracy = 100 * correct / total

    return val_accuracy

if __name__ == '__main__':
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50)

[I 2023-10-20 23:29:37,602] A new study created in memory with name: no-name-8c7d4047-20ac-4a30-a520-1514f086f09a
Training: 100%|█████████████████████████████████| 95/95 [00:35<00:00,  2.65it/s]
Validation: 100%|███████████████████████████████| 28/28 [00:05<00:00,  5.24it/s]
Training: 100%|█████████████████████████████████| 95/95 [00:37<00:00,  2.56it/s]
Validation: 100%|███████████████████████████████| 28/28 [00:05<00:00,  5.39it/s]
Training: 100%|█████████████████████████████████| 95/95 [00:36<00:00,  2.58it/s]
Validation: 100%|███████████████████████████████| 28/28 [00:05<00:00,  4.99it/s]
Training: 100%|█████████████████████████████████| 95/95 [00:39<00:00,  2.43it/s]
Validation: 100%|███████████████████████████████| 28/28 [00:05<00:00,  5.38it/s]
Training: 100%|█████████████████████████████████| 95/95 [00:39<00:00,  2.43it/s]
Validation: 100%|███████████████████████████████| 28/28 [00:05<00:00,  5.52it/s]
Training: 100%|█████████████████████████████████| 95/95 [00:38<00:00,  2.49i