In [1]:
!pip install gdown grad-cam ttach

Collecting grad-cam
  Downloading grad-cam-1.5.4.tar.gz (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m47.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ttach
  Downloading ttach-0.0.3-py3-none-any.whl.metadata (5.2 kB)
Downloading ttach-0.0.3-py3-none-any.whl (9.8 kB)
Building wheels for collected packages: grad-cam
  Building wheel for grad-cam (pyproject.toml) ... [?25l[?25hdone
  Created wheel for grad-cam: filename=grad_cam-1.5.4-py3-none-any.whl size=39648 sha256=642d2ab297373aab9e4c56bad68d20d75a92b4a4aa6a57393753b3639d0aebec
  Stored in directory: /root/.cache/pip/wheels/8b/0d/d2/b12bec1ccc028921fb98158042ade2d19dae73925dfc636954
Successfully built grad-cam
Installing collected packages: ttach, grad-cam
Successfully installed grad-cam-1.5.4 ttach-0.0.3


In [2]:
# Import required libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import confusion_matrix, classification_report
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE
from tqdm.notebook import tqdm
import os
import requests
import tarfile
from google.colab import drive
import gdown

In [3]:
# Download and extract Caltech-101 dataset
def download_caltech101():
    """
    Download Caltech-101 dataset using gdown from Google Drive mirror
    """
    print("Downloading Caltech-101 dataset...")

    # Google Drive file ID for Caltech-101
    file_id = '137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp'

    # Download path
    output = '101_ObjectCategories.tar.gz'

    # Download the file from Google Drive
    gdown.download(f'https://drive.google.com/uc?id={file_id}', output, quiet=False)

    print("Extracting dataset...")
    with tarfile.open(output, "r:gz") as tar:
        tar.extractall()

    # Clean up the tar.gz file
    os.remove(output)
    print("Dataset downloaded and extracted successfully!")

In [4]:
# Data preprocessing
def get_data_transforms():
    return transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(30),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
        transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1)),
        transforms.RandomGrayscale(p=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

In [5]:
# Model setup
def setup_model(num_classes):
    print(f"Setting up model with {num_classes} classes")
    model = models.resnet50(pretrained=True)
    model.fc = nn.Linear(2048, num_classes)
    return model

# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device='cuda'):
    best_val_acc = 0
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_acc = 100 * correct / total
        train_loss = train_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        val_acc = 100 * correct / total
        val_loss = val_loss / len(val_loader)

        # Save history
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_acc)
        history['val_acc'].append(val_acc)

        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%')

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), '/content/drive/MyDrive/best_model.pth')

    return history

In [6]:
# Grad-CAM visualization
def visualize_gradcam(model, test_loader, class_names, num_images=5, device='cuda'):
    target_layer = model.layer4[-1]
    cam = GradCAM(model=model, target_layer=target_layer, use_cuda=torch.cuda.is_available())

    model.eval()
    images, labels = next(iter(test_loader))
    images = images[:num_images].to(device)
    labels = labels[:num_images].to(device)

    plt.figure(figsize=(20, 4))
    for idx in range(num_images):
        grayscale_cam = cam(input_tensor=images[idx].unsqueeze(0))
        rgb_img = images[idx].cpu().permute(1, 2, 0).numpy()
        rgb_img = (rgb_img - rgb_img.min()) / (rgb_img.max() - rgb_img.min())
        cam_image = show_cam_on_image(rgb_img, grayscale_cam[0])

        plt.subplot(1, num_images, idx + 1)
        plt.imshow(cam_image)
        plt.axis('off')
        plt.title(f'Class: {class_names[labels[idx]]}')

    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/gradcam_visualizations.png')
    plt.show()

In [7]:
# Main execution
if __name__ == "__main__":
    # Install required package if not already installed
    try:
        import gdown
    except ImportError:
        !pip install gdown
        import gdown

    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Download dataset if needed
    if not os.path.exists('101_ObjectCategories'):
        download_caltech101()

    # Set up data
    transform = get_data_transforms()
    dataset = datasets.ImageFolder(root='101_ObjectCategories', transform=transform)

    # Get the number of classes and class names
    num_classes = len(dataset.classes)
    class_names = dataset.classes
    print(f"Number of classes detected: {num_classes}")
    print("Class names:", class_names)

    # Split dataset
    train_size = int(0.8 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    train_data, val_data, test_data = random_split(dataset, [train_size, val_size, test_size])

    # Create data loaders
    train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=32)
    test_loader = DataLoader(test_data, batch_size=32)

    # Initialize model with correct number of classes
    model = setup_model(num_classes)
    model = model.to(device)

    # Set up training
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train model
    history = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device=device)

    # Visualize Grad-CAM
    visualize_gradcam(model, test_loader, class_names, num_images=5, device=device)

Using device: cuda
Downloading Caltech-101 dataset...


Downloading...
From (original): https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp
From (redirected): https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp&confirm=t&uuid=271fbf31-8c90-4f79-981e-da352ed82688
To: /content/101_ObjectCategories.tar.gz
100%|██████████| 132M/132M [00:09<00:00, 14.5MB/s]


Extracting dataset...
Dataset downloaded and extracted successfully!
Number of classes detected: 102
Class names: ['BACKGROUND_Google', 'Faces', 'Faces_easy', 'Leopards', 'Motorbikes', 'accordion', 'airplanes', 'anchor', 'ant', 'barrel', 'bass', 'beaver', 'binocular', 'bonsai', 'brain', 'brontosaurus', 'buddha', 'butterfly', 'camera', 'cannon', 'car_side', 'ceiling_fan', 'cellphone', 'chair', 'chandelier', 'cougar_body', 'cougar_face', 'crab', 'crayfish', 'crocodile', 'crocodile_head', 'cup', 'dalmatian', 'dollar_bill', 'dolphin', 'dragonfly', 'electric_guitar', 'elephant', 'emu', 'euphonium', 'ewer', 'ferry', 'flamingo', 'flamingo_head', 'garfield', 'gerenuk', 'gramophone', 'grand_piano', 'hawksbill', 'headphone', 'hedgehog', 'helicopter', 'ibis', 'inline_skate', 'joshua_tree', 'kangaroo', 'ketch', 'lamp', 'laptop', 'llama', 'lobster', 'lotus', 'mandolin', 'mayfly', 'menorah', 'metronome', 'minaret', 'nautilus', 'octopus', 'okapi', 'pagoda', 'panda', 'pigeon', 'pizza', 'platypus', 'py

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 164MB/s]


Epoch 1/10:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 1/10:
Train Loss: 3.6089 | Train Acc: 24.51%
Val Loss: 3.2995 | Val Acc: 27.57%


Epoch 2/10:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 2/10:
Train Loss: 2.9186 | Train Acc: 34.49%
Val Loss: 3.0116 | Val Acc: 32.82%


Epoch 3/10:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 3/10:
Train Loss: 2.5345 | Train Acc: 41.03%
Val Loss: 2.3367 | Val Acc: 47.37%


Epoch 4/10:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 4/10:
Train Loss: 2.1801 | Train Acc: 47.16%
Val Loss: 2.2166 | Val Acc: 47.48%


Epoch 5/10:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 5/10:
Train Loss: 1.8963 | Train Acc: 52.30%
Val Loss: 1.8189 | Val Acc: 53.39%


Epoch 6/10:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 6/10:
Train Loss: 1.6614 | Train Acc: 57.32%
Val Loss: 1.6850 | Val Acc: 57.55%


Epoch 7/10:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 7/10:
Train Loss: 1.4639 | Train Acc: 61.45%
Val Loss: 1.5319 | Val Acc: 60.61%


Epoch 8/10:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 8/10:
Train Loss: 1.3162 | Train Acc: 64.78%
Val Loss: 1.4656 | Val Acc: 61.82%


Epoch 9/10:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 9/10:
Train Loss: 1.1639 | Train Acc: 68.42%
Val Loss: 1.2567 | Val Acc: 67.51%


Epoch 10/10:   0%|          | 0/229 [00:00<?, ?it/s]

Exception ignored in: <function BaseCAM.__del__ at 0x7df2ce2a8a40>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/pytorch_grad_cam/base_cam.py", line 189, in __del__
    self.activations_and_grads.release()
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'GradCAM' object has no attribute 'activations_and_grads'


Epoch 10/10:
Train Loss: 1.0633 | Train Acc: 71.55%
Val Loss: 1.4770 | Val Acc: 64.00%


TypeError: GradCAM.__init__() got an unexpected keyword argument 'target_layer'

In [8]:
from google.colab import files
import os
import torch
import json
import matplotlib.pyplot as plt

def save_and_download_results(model, history, class_names, base_path='/content/model_results'):
    """
    Save and download model, training history, and visualizations
    """
    # Create directory if it doesn't exist
    os.makedirs(base_path, exist_ok=True)

    # 1. Save model state
    model_path = os.path.join(base_path, 'final_model.pth')
    torch.save({
        'model_state_dict': model.state_dict(),
        'class_names': class_names,
        'num_classes': len(class_names)
    }, model_path)

    # 2. Save training history
    history_path = os.path.join(base_path, 'training_history.json')
    with open(history_path, 'w') as f:
        json.dump(history, f)

    # 3. Plot and save training curves
    plt.figure(figsize=(12, 4))

    # Loss plot
    plt.subplot(1, 2, 1)
    plt.plot(history['train_loss'], label='Train Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Accuracy plot
    plt.subplot(1, 2, 2)
    plt.plot(history['train_acc'], label='Train Accuracy')
    plt.plot(history['val_acc'], label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.tight_layout()
    plt.savefig(os.path.join(base_path, 'training_curves.png'))
    plt.close()

    # 4. Create a README file
    readme_content = f"""# Caltech-101 Classification Model

Training completed with:
- Number of classes: {len(class_names)}
- Final training accuracy: {history['train_acc'][-1]:.2f}%
- Final validation accuracy: {history['val_acc'][-1]:.2f}%

Files included:
- final_model.pth: Trained PyTorch model
- training_history.json: Training metrics history
- training_curves.png: Visualization of training progress
- gradcam_visualizations.png: Grad-CAM visualizations

To load the model:
```python
import torch
checkpoint = torch.load('final_model.pth')
model.load_state_dict(checkpoint['model_state_dict'])
class_names = checkpoint['class_names']
```
"""

    with open(os.path.join(base_path, 'README.md'), 'w') as f:
        f.write(readme_content)

    # 5. Copy Grad-CAM visualizations if they exist
    if os.path.exists('/content/drive/MyDrive/gradcam_visualizations.png'):
        import shutil
        shutil.copy('/content/drive/MyDrive/gradcam_visualizations.png',
                   os.path.join(base_path, 'gradcam_visualizations.png'))

    # 6. Create zip file
    !zip -r /content/model_results.zip {base_path}

    # 7. Download zip file
    files.download('/content/model_results.zip')

    print("All files have been saved and download should start automatically!")

In [9]:
save_and_download_results(model, history, dataset.classes)

  adding: content/model_results/ (stored 0%)
  adding: content/model_results/training_history.json (deflated 48%)
  adding: content/model_results/final_model.pth (deflated 7%)
  adding: content/model_results/README.md (deflated 42%)
  adding: content/model_results/training_curves.png (deflated 5%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

All files have been saved and download should start automatically!


In [10]:
def visualize_gradcam(model, test_loader, class_names, num_images=5, device='cuda'):
    """
    Updated Grad-CAM visualization function with correct API usage
    """
    # Get the target layer for Grad-CAM
    target_layers = [model.layer4[-1]]

    # Initialize Grad-CAM with correct parameters
    cam = GradCAM(
        model=model,
        target_layers=target_layers,  # Changed from target_layer to target_layers
        use_cuda=torch.cuda.is_available()
    )

    model.eval()
    images, labels = next(iter(test_loader))
    images = images[:num_images].to(device)
    labels = labels[:num_images].to(device)

    plt.figure(figsize=(20, 4))
    for idx in range(num_images):
        # Get the input tensor
        input_tensor = images[idx].unsqueeze(0)

        # Generate Grad-CAM
        grayscale_cam = cam(input_tensor=input_tensor, targets=None)[0]

        # Convert the image for visualization
        rgb_img = images[idx].cpu().permute(1, 2, 0).numpy()
        rgb_img = (rgb_img - rgb_img.min()) / (rgb_img.max() - rgb_img.min())

        # Create the CAM visualization
        visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)

        # Plot
        plt.subplot(1, num_images, idx + 1)
        plt.imshow(visualization)
        plt.axis('off')
        plt.title(f'Class: {class_names[labels[idx]]}')

    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/gradcam_visualizations.png')
    plt.show()

In [12]:
visualize_gradcam(model, test_loader, class_names, num_images=5)

Exception ignored in: <function BaseCAM.__del__ at 0x7df2ce2a8a40>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/pytorch_grad_cam/base_cam.py", line 189, in __del__
    self.activations_and_grads.release()
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'GradCAM' object has no attribute 'activations_and_grads'


TypeError: GradCAM.__init__() got an unexpected keyword argument 'use_cuda'