In [None]:
import os
import cv2
import torch
import numpy as np
from PIL import Image
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

In [None]:
# Config
DATASET_PATHS = {
    'caltech101': '../data/101_ObjectCategories',
    'cifar10': '../data'
}
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 32
EPOCHS = 15
LR = 0.001
IMAGE_SIZE = 128

In [None]:
# Transform
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

In [None]:
# Caltech Dataset
class CaltechDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.images = []
        self.labels = []
        self.transform = transform
        self.label_encoder = LabelEncoder()
        class_names = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d)) and not d.startswith("BACKGROUND")])
        self.class_to_idx = {name: idx for idx, name in enumerate(class_names)}

        for label_name in class_names:
            folder = os.path.join(root_dir, label_name)
            for img_file in os.listdir(folder):
                img_path = os.path.join(folder, img_file)
                self.images.append(img_path)
                self.labels.append(label_name)

        self.labels = self.label_encoder.fit_transform(self.labels)
        self.classes = self.label_encoder.classes_

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label

In [None]:
# Model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * (IMAGE_SIZE // 8) * (IMAGE_SIZE // 8), 512),
            nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

In [None]:
# Load Dataset

def load_dataset(name):
    if name == 'caltech101':
        dataset = CaltechDataset(DATASET_PATHS['caltech101'], transform=transform)
    elif name == 'cifar10':
        dataset = datasets.CIFAR10(root=DATASET_PATHS['cifar10'], train=True, download=True, transform=transform)
        dataset.classes = dataset.classes
    else:
        raise ValueError("Dataset not supported. Choose from 'caltech101' or 'cifar10'")
    return dataset

In [None]:
# Train

def train_model(model, train_loader, test_loader, epochs=EPOCHS):
    model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct = 0
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()

        acc = correct / len(train_loader.dataset) * 100
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Train Acc: {acc:.2f}%")

In [None]:
# Evaluate

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()

    acc = correct / len(test_loader.dataset) * 100
    print(f"\n✅ Test Accuracy: {acc:.2f}%")

In [None]:
# Run

def run_pipeline(dataset_name):
    full_dataset = load_dataset(dataset_name)
    train_size = int(0.8 * len(full_dataset))
    test_size = len(full_dataset) - train_size
    train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    model = SimpleCNN(num_classes=len(full_dataset.classes))
    train_model(model, train_loader, test_loader)
    evaluate_model(model, test_loader)

In [None]:
run_pipeline("caltech101")

Epoch [1/15], Loss: 708.2667, Train Acc: 31.68%
Epoch [2/15], Loss: 481.8526, Train Acc: 50.06%
Epoch [3/15], Loss: 337.3019, Train Acc: 61.78%
Epoch [4/15], Loss: 226.3683, Train Acc: 73.62%


KeyboardInterrupt: 

In [2]:
run_pipeline("cifar10")

Epoch [1/15], Loss: 1835.2062, Train Acc: 46.82%


KeyboardInterrupt: 

In [None]:
import os
import cv2
import torch
import numpy as np
from PIL import Image
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# ✅ Config
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 32
EPOCHS = 15
LR = 0.001

# ✅ Dataset-specific config
data_config = {
    'caltech101': {
        'path': '../data/101_ObjectCategories',
        'image_size': None
    },
    'cifar10': {
        'path': '../data',
        'image_size': None
    }
}

# ✅ Custom Caltech Dataset
class CaltechDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.images = []
        self.labels = []
        self.transform = transform
        self.label_encoder = LabelEncoder()
        class_names = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d)) and not d.startswith("BACKGROUND")])
        self.class_to_idx = {name: idx for idx, name in enumerate(class_names)}

        for label_name in class_names:
            folder = os.path.join(root_dir, label_name)
            for img_file in os.listdir(folder):
                img_path = os.path.join(folder, img_file)
                self.images.append(img_path)
                self.labels.append(label_name)

        self.labels = self.label_encoder.fit_transform(self.labels)
        self.classes = self.label_encoder.classes_

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label

# ✅ Model definitions
class SimpleCNNCaltech(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNNCaltech, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 25 * 25, 512),
            nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

class SimpleCNNCIFAR(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNNCIFAR, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

# ✅ Training function
def train_model(model, train_loader, test_loader, epochs=EPOCHS):
    model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct = 0
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()

        acc = correct / len(train_loader.dataset) * 100
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Train Acc: {acc:.2f}%")

# ✅ Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()

    acc = correct / len(test_loader.dataset) * 100
    print(f"\n✅ Test Accuracy: {acc:.2f}%")

# ✅ Run function
def run(dataset_name):
    if dataset_name == 'caltech101':
        transform = transforms.Compose([
            transforms.CenterCrop(200),
            transforms.ToTensor(),
            transforms.Normalize([0.5]*3, [0.5]*3)
        ])
        full_dataset = CaltechDataset(data_config[dataset_name]['path'], transform=transform)
        model = SimpleCNNCaltech(num_classes=len(full_dataset.classes))

    elif dataset_name == 'cifar10':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5]*3, [0.5]*3)
        ])
        full_dataset = datasets.CIFAR10(root=data_config[dataset_name]['path'], train=True, download=True, transform=transform)
        model = SimpleCNNCIFAR(num_classes=10)

    else:
        raise ValueError("Unsupported dataset")

    train_size = int(0.8 * len(full_dataset))
    test_size = len(full_dataset) - train_size
    train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    train_model(model, train_loader, test_loader)
    evaluate_model(model, test_loader)

# ✅ Run one at a time:
run('caltech101')
# run('cifar10')