In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Step 1: Load and preprocess the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize with mean and std deviation for RGB channels
])

# Download CIFAR-10 train and test datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create data loaders
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Step 2: Define the MLP model with Dense Layers
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()

        # Input size is 3*32*32 = 3072
        self.flatten = nn.Flatten()

        # Define fully connected (dense) layers
        self.fc1 = nn.Linear(3 * 32 * 32, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 128)
        self.fc5 = nn.Linear(128, 10)            # Output layer for 10 classes

        # Activation function
        self.relu = nn.ReLU()

        # Dropout layer to prevent overfitting
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        # Flatten the input tensor
        x = self.flatten(x)

        # Pass through first dense layer and activation
        x = self.relu(self.fc1(x))
        x = self.dropout(x)

        # Pass through second dense layer and activation
        x = self.relu(self.fc2(x))
        x = self.dropout(x)

        # Pass through third dense layer and activation
        x = self.relu(self.fc3(x))
        x = self.dropout(x)

        # Pass through fourth dense layer and activation
        x = self.relu(self.fc4(x))
        x = self.dropout(x)

        # Output layer (no activation, as CrossEntropyLoss applies Softmax)
        x = self.fc5(x)

        return x

# Step 3: Instantiate the model, define the loss function, and the optimizer
model = MLP()

# Check if CUDA is available and use GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Loss function (CrossEntropyLoss for multi-class classification)
criterion = nn.CrossEntropyLoss()

# Optimizer (Adam optimizer is often a good choice for dense networks)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 4: Train the model
num_epochs = 10  # Increased number of epochs for dense networks

for epoch in range(num_epochs):
    running_loss = 0.0
    model.train()  # Set model to training mode
    for i, data in enumerate(trainloader, 0):  # Loop over the training data
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass: compute predicted outputs by passing inputs to the model
        outputs = model(inputs)

        # Calculate the loss
        loss = criterion(outputs, labels)

        # Backward pass: compute gradients of the loss with respect to model parameters
        loss.backward()

        # Update the model's parameters
        optimizer.step()

        # Print statistics (every 100 mini-batches)
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

    # Optional: Validate after each epoch
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)  # Get the predicted class
            total += labels.size(0)  # Total number of samples
            correct += (predicted == labels).sum().item()  # Number of correct predictions
    accuracy = 100 * correct / total
    print(f"Epoch {epoch + 1} Validation Accuracy: {accuracy:.2f}%")

print("Finished Training")

# Step 5: Test the model on the test set
model.eval()  # Set model to evaluation mode
correct = 0
total = 0

# Turn off gradients for testing
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)  # Get the predicted class
        total += labels.size(0)  # Total number of samples
        correct += (predicted == labels).sum().item()  # Number of correct predictions

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Step 1: Load and preprocess the CIFAR-10 dataset
# We will normalize the images and apply transformations

transform = transforms.Compose([
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize with mean and std deviation for RGB channels
])

# Download CIFAR-10 train and test datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create data loaders
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Step 2: Define the CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        # First convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3,  # Input channels (RGB)
                               out_channels=16,  # Output channels (number of filters)
                               kernel_size=3,  # Size of the convolutional kernel/filter
                               stride=1,  # Step size for sliding the filter
                               padding=1)  # Padding to maintain image size

        # Second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=16,  # Input channels (from previous layer)
                               out_channels=32,  # Output channels (number of filters)
                               kernel_size=3,  # Size of the convolutional kernel/filter
                               stride=1,  # Step size for sliding the filter
                               padding=1)  # Padding to maintain image size

        # Max pooling layer (downsampling)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # Fully connected layer (flatten the output of the conv layers)
        self.fc1 = nn.Linear(32 * 8 * 8, 128)  # Flattened size is 32 channels, 8x8 image
        self.fc2 = nn.Linear(128, 10)  # Output layer for 10 classes in CIFAR-10 dataset

    def forward(self, x):
        # Apply first convolutional layer and ReLU activation
        x = self.pool(torch.relu(self.conv1(x)))

        # Apply second convolutional layer and ReLU activation
        x = self.pool(torch.relu(self.conv2(x)))

        # Flatten the output before passing it into the fully connected layer
        x = x.view(-1, 32 * 8 * 8)  # Flatten the tensor to shape (batch_size, 32*8*8)

        # Pass through the first fully connected layer
        x = torch.relu(self.fc1(x))

        # Pass through the second fully connected layer (output layer)
        x = self.fc2(x)

        return x

# Step 3: Instantiate the model, define the loss function, and the optimizer
model = SimpleCNN()

# Loss function (CrossEntropyLoss for multi-class classification)
criterion = nn.CrossEntropyLoss()

# Optimizer (Stochastic Gradient Descent with learning rate 0.001)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Step 4: Train the model
num_epochs = 10  # Number of epochs (iterations through the entire dataset)

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):  # Loop over the training data
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass: compute predicted outputs by passing inputs to the model
        outputs = model(inputs)

        # Calculate the loss
        loss = criterion(outputs, labels)

        # Backward pass: compute gradients of the loss with respect to model parameters
        loss.backward()

        # Update the model's parameters
        optimizer.step()

        # Print statistics (every 100 mini-batches)
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print("Finished Training")

# Step 5: Test the model on the test set
correct = 0
total = 0

# Turn off gradients for testing
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)  # Get the predicted class
        total += labels.size(0)  # Total number of samples
        correct += (predicted == labels).sum().item()  # Number of correct predictions

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")


In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Step 1: Load and preprocess the CIFAR-10 dataset
# Apply data augmentation (random horizontal flip and random crop)
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # Randomly flip images horizontally
    transforms.RandomCrop(32, padding=4),  # Randomly crop images with padding
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize with mean and std deviation for RGB channels
])

# Download CIFAR-10 train and test datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create data loaders
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Step 2: Define the improved CNN model
class AdvancedCNN(nn.Module):
    def __init__(self):
        super(AdvancedCNN, self).__init__()

        # First convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3,  # Input channels (RGB)
                               out_channels=32,  # Output channels (number of filters)
                               kernel_size=3,  # Size of the convolutional kernel/filter
                               stride=1,  # Step size for sliding the filter
                               padding=1)  # Padding to maintain image size

        # Batch normalization layer for the first convolutional layer
        self.bn1 = nn.BatchNorm2d(32)

        # Second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=32,  # Input channels (from previous layer)
                               out_channels=64,  # Output channels (number of filters)
                               kernel_size=3,  # Size of the convolutional kernel/filter
                               stride=1,  # Step size for sliding the filter
                               padding=1)  # Padding to maintain image size

        # Batch normalization layer for the second convolutional layer
        self.bn2 = nn.BatchNorm2d(64)

        # Third convolutional layer
        self.conv3 = nn.Conv2d(in_channels=64,  # Input channels (from previous layer)
                               out_channels=128,  # Output channels (number of filters)
                               kernel_size=3,  # Size of the convolutional kernel/filter
                               stride=1,  # Step size for sliding the filter
                               padding=1)  # Padding to maintain image size

        # Max pooling layer (downsampling)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # Dropout layer (to avoid overfitting)
        self.dropout = nn.Dropout(0.5)

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 4 * 4, 256)  # Flattened size is 128 channels, 4x4 image after pooling
        self.fc2 = nn.Linear(256, 10)  # Output layer for 10 classes in CIFAR-10 dataset

    def forward(self, x):
        # Apply first convolutional layer, batch normalization, and ReLU activation
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))

        # Apply second convolutional layer, batch normalization, and ReLU activation
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))

        # Apply third convolutional layer and ReLU activation
        x = self.pool(torch.relu(self.conv3(x)))

        # Flatten the output before passing it into the fully connected layer
        x = x.view(-1, 128 * 4 * 4)  # Flatten the tensor to shape (batch_size, 128*4*4)

        # Pass through the first fully connected layer and apply dropout
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)

        # Pass through the second fully connected layer (output layer)
        x = self.fc2(x)

        return x

# Step 3: Instantiate the model, define the loss function, and the optimizer
model = AdvancedCNN()

# Loss function (CrossEntropyLoss for multi-class classification)
criterion = nn.CrossEntropyLoss()

# Optimizer (Adam optimizer for better performance in most cases)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 4: Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)  # Reduce LR by a factor of 10 every 10 epochs

# Step 5: Train the model
num_epochs = 15  # Increased number of epochs for better training

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):  # Loop over the training data
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass: compute predicted outputs by passing inputs to the model
        outputs = model(inputs)

        # Calculate the loss
        loss = criterion(outputs, labels)

        # Backward pass: compute gradients of the loss with respect to model parameters
        loss.backward()

        # Update the model's parameters
        optimizer.step()

        # Print statistics (every 100 mini-batches)
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

    # Step the learning rate scheduler
    scheduler.step()

print("Finished Training")

# Step 6: Test the model on the test set
correct = 0
total = 0

# Turn off gradients for testing
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)  # Get the predicted class
        total += labels.size(0)  # Total number of samples
        correct += (predicted == labels).sum().item()  # Number of correct predictions

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")


In [None]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms

class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (str): Root directory path.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform

        # List to store tuples of (image_path, label)
        self.image_labels = []

        # Get the class names from the subdirectory names
        self.classes = sorted(entry.name for entry in os.scandir(root_dir) if entry.is_dir())
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}

        # Traverse through each class directory and collect image paths and labels
        for cls in self.classes:
            cls_dir = os.path.join(root_dir, cls)
            for root, _, files in os.walk(cls_dir):
                for file in files:
                    if self.is_image_file(file):
                        path = os.path.join(root, file)
                        self.image_labels.append((path, self.class_to_idx[cls]))

    def __len__(self):
        return len(self.image_labels)

    def __getitem__(self, idx):
        if idx < 0 or idx >= len(self):
            raise IndexError("Index out of bounds")

        img_path, label = self.image_labels[idx]
        image = Image.open(img_path).convert('RGB')  # Ensure image is in RGB format

        if self.transform:
            image = self.transform(image)

        return image, label

    @staticmethod
    def is_image_file(filename):
        """Check if a file is an image."""
        extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']
        return any(filename.lower().endswith(ext) for ext in extensions)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip custom-dataset.zip -d ./

In [None]:
from torch.utils.data import DataLoader
from torchvision import transforms

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),          # Convert PIL Image to Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize with ImageNet means and stds
                         std=[0.229, 0.224, 0.225])
])

dataset = CustomImageDataset(root_dir='./custom-dataset', transform=transform)

dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4)

for batch_idx, (images, labels) in enumerate(dataloader):
    print(f'Batch {batch_idx + 1}')
    print(f'Images shape: {images.shape}')
    print(f'Labels: {labels}')
    break

In [None]:
!pip install datasets

In [None]:
from transformers import AutoImageProcessor, ResNetForImageClassification
import torch
from datasets import load_dataset
from PIL import Image
import matplotlib.pyplot as plt

# Load the dataset
# dataset = load_dataset("huggingface/cats-image")
# dataset = load_dataset("AI-Lab-Makerere/beans")
dataset = load_dataset("RGurung/Animal_dataset")
dataset = load_dataset('imagefolder', data_dir="./custom-dataset")

# Display the dataset structure
print(dataset)

# Select an image from the test split
image = dataset["train"]["image"][5]

# Load the image processor and model
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")

# Preprocess the image
inputs = processor(image, return_tensors="pt")

# Perform inference without tracking gradients
with torch.no_grad():
    logits = model(**inputs).logits

# Get the predicted class index
predicted_class_idx = logits.argmax(-1).item()

# Retrieve the label corresponding to the predicted index
predicted_label = model.config.id2label[predicted_class_idx]

# Print the predicted class index and label
print(f"Predicted Class Index: {predicted_class_idx}")
print(f"Predicted Label: {predicted_label}")

# Plot the image with the predicted label
plt.figure(figsize=(8, 8))  # Set the figure size (optional)
plt.imshow(image)
plt.title(f"Predicted Label: {predicted_label}", fontsize=16)
plt.axis('off')  # Hide the axis
plt.show()


In [None]:
!pip install evaluate

In [None]:
# 1. Import Necessary Libraries
from transformers import (
    AutoImageProcessor,
    ResNetForImageClassification,
    TrainingArguments,
    Trainer
)
import torch
from datasets import load_dataset
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from torchvision.transforms import (
    Compose,
    Resize,
    CenterCrop,
    ToTensor,
    Normalize,
    RandomHorizontalFlip,
    RandomRotation
)
import evaluate
import torch.nn as nn

# 2. Load the Beans Dataset
dataset = load_dataset("AI-Lab-Makerere/beans")

# Display the dataset structure
print(dataset)

# Get the number of classes
num_classes = dataset['train'].features['labels'].num_classes
print(f"Number of classes: {num_classes}")

# Display class names
class_names = dataset['train'].features['labels'].names
print(f"Class names: {class_names}")

# 3. Define Image Transformations
train_transforms = Compose([
    Resize((224, 224)),
    RandomHorizontalFlip(),
    RandomRotation(15),
    ToTensor(),
    Normalize(
        mean=[0.485, 0.456, 0.406],  # ImageNet means
        std=[0.229, 0.224, 0.225]    # ImageNet stds
    )
])

validation_transforms = Compose([
    Resize((224, 224)),
    CenterCrop(224),
    ToTensor(),
    Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# 4. Define the Preprocessing Function
def preprocess_function(examples, split):
    """
    Applies the appropriate transformations to the images based on the dataset split.

    Args:
        examples (dict): A batch of examples from the dataset.
        split (str): The dataset split ('train', 'validation', 'test').

    Returns:
        dict: A dictionary with the transformed images added under the key 'pixel_values'.
    """
    images = examples['image']  # Assuming 'image' contains PIL Images
    if split == 'train':
        images = [train_transforms(image) for image in images]
    else:
        images = [validation_transforms(image) for image in images]
    examples['pixel_values'] = images
    return examples

# 5. Apply Preprocessing to Each Split
dataset['train'] = dataset['train'].map(
    lambda examples: preprocess_function(examples, 'train'),
    batched=True
)
dataset['validation'] = dataset['validation'].map(
    lambda examples: preprocess_function(examples, 'validation'),
    batched=True
)
dataset['test'] = dataset['test'].map(
    lambda examples: preprocess_function(examples, 'test'),
    batched=True
)

# 6. Set the Format for PyTorch
dataset.set_format(
    type='torch',
    columns=['pixel_values', 'labels']
)

# 7. Load the Image Processor and Model with Fixed Parameters
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")

model = ResNetForImageClassification.from_pretrained(
    "microsoft/resnet-50",
    num_labels=num_classes,  # Set the number of labels to match the dataset
    id2label={str(i): label for i, label in enumerate(class_names)},
    label2id={label: i for i, label in enumerate(class_names)},
    ignore_mismatched_sizes=True  # Ignore size mismatches for the classifier layer
)

# Optional: Manually Replace the Classifier (if needed)
# This step is generally handled by 'ignore_mismatched_sizes=True', but can be used for more control.
# model.classifier = nn.Linear(in_features=2048, out_features=num_classes)

# 8. Define Training Arguments with Fixed Parameters
training_args = TrainingArguments(
    output_dir="./resnet50-beans",          # Output directory
    per_device_train_batch_size=16,         # Batch size per device during training
    per_device_eval_batch_size=16,          # Batch size for evaluation
    num_train_epochs=10,                    # Total number of training epochs
    learning_rate=5e-5,                     # Learning rate
    eval_strategy="epoch",                  # Evaluation strategy to adopt during training
    save_strategy="epoch",                  # Save checkpoint every epoch
    logging_dir="./logs",                   # Directory for storing logs
    logging_steps=10,                       # Log every 10 steps
    load_best_model_at_end=True,            # Load the best model when finished training
    metric_for_best_model="accuracy",       # Use accuracy to evaluate the best model
    greater_is_better=True,                 # Whether the better metric is higher
    report_to="none",                       # Disable reporting to any tracking system (e.g., W&B)
    save_total_limit=2                      # Limit the total amount of checkpoints. Deletes the older checkpoints.
)

# 9. Define Evaluation Metrics
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    """
    Computes accuracy metrics.

    Args:
        eval_pred (tuple): Tuple containing predictions and labels.

    Returns:
        dict: Dictionary with accuracy.
    """
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# 10. Initialize the Trainer Without Using 'tokenizer'
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    compute_metrics=compute_metrics,
    # Removed 'tokenizer=processor' to avoid FutureWarning
)

# 11. Fine-Tune the Model
trainer.train()

# 12. Evaluate the Model on the Test Set
results = trainer.evaluate(dataset['test'])
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")

# 13. Save the Fine-Tuned Model and Processor
model.save_pretrained("./resnet50-beans-finetuned")
processor.save_pretrained("./resnet50-beans-finetuned")

# 14. Define the Prediction Function
def predict(image):
    """
    Predicts the label of a given image using the fine-tuned model.

    Args:
        image (PIL.Image.Image): The input image.

    Returns:
        str: The predicted label.
    """
    inputs = processor(images=image, return_tensors="pt")
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class_idx = logits.argmax(-1).item()
    predicted_label = model.config.id2label[str(predicted_class_idx)]
    return predicted_label

# 15. Example Prediction and Visualization
# Select an image from the test set
test_image = dataset['test'][0]['image']  # This is a PIL Image

# Make a prediction
predicted_label = predict(test_image)
print(f"Predicted Label: {predicted_label}")

# Plot the image with the predicted label
plt.figure(figsize=(8, 8))
plt.imshow(test_image)
plt.title(f"Predicted Label: {predicted_label}", fontsize=16)
plt.axis('off')
plt.show()


In [None]:
# 1. Import Necessary Libraries
from transformers import (
    AutoImageProcessor,
    ResNetForImageClassification,
    TrainingArguments,
    Trainer
)
import torch
from datasets import load_dataset
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from torchvision.transforms import (
    Compose,
    Resize,
    CenterCrop,
    ToTensor,
    Normalize,
    RandomHorizontalFlip,
    RandomRotation
)
import evaluate
import torch.nn as nn

# 2. Load the Beans Dataset
dataset = load_dataset("AI-Lab-Makerere/beans")

# Display the dataset structure
print(dataset)

# Get the number of classes
num_classes = dataset['train'].features['labels'].num_classes
print(f"Number of classes: {num_classes}")

# Display class names
class_names = dataset['train'].features['labels'].names
print(f"Class names: {class_names}")

# 3. Define Image Transformations
train_transforms = Compose([
    Resize((224, 224)),
    RandomHorizontalFlip(),
    RandomRotation(15),
    ToTensor(),
    Normalize(
        mean=[0.485, 0.456, 0.406],  # ImageNet means
        std=[0.229, 0.224, 0.225]    # ImageNet stds
    )
])

validation_transforms = Compose([
    Resize((224, 224)),
    CenterCrop(224),
    ToTensor(),
    Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# 4. Define the Preprocessing Function
def preprocess_function(examples, split):
    """
    Applies the appropriate transformations to the images based on the dataset split.

    Args:
        examples (dict): A batch of examples from the dataset.
        split (str): The dataset split ('train', 'validation', 'test').

    Returns:
        dict: A dictionary with the transformed images added under the key 'pixel_values'.
    """
    images = examples['image']  # Assuming 'image' contains PIL Images
    if split == 'train':
        images = [train_transforms(image) for image in images]
    else:
        images = [validation_transforms(image) for image in images]
    examples['pixel_values'] = images
    return examples

# 5. Apply Preprocessing to Each Split
dataset['train'] = dataset['train'].map(
    lambda examples: preprocess_function(examples, 'train'),
    batched=True
)
dataset['validation'] = dataset['validation'].map(
    lambda examples: preprocess_function(examples, 'validation'),
    batched=True
)
dataset['test'] = dataset['test'].map(
    lambda examples: preprocess_function(examples, 'test'),
    batched=True
)

# 6. Set the Format for PyTorch
dataset.set_format(
    type='torch',
    columns=['pixel_values', 'labels']
)

# 7. Load the Image Processor and Model with Fixed Parameters
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")

model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50",)

# Manually Replace the Classifier
model.fc = nn.Linear(in_features=2048, out_features=num_classes)

model.config.id2label = {i: label for i, label in enumerate(class_names)}
model.config.label2id = {label: i for i, label in enumerate(class_names)}

# 8. Freeze All Layers Except the Last Layer
for param in model.parameters():
    param.requires_grad = False  # Freeze all layers

# Unfreeze the last layer (classifier layer)
for param in model.classifier.parameters():
    param.requires_grad = True  # Fine-tune the last layer

# 9. Define Training Arguments with Fixed Parameters
training_args = TrainingArguments(
    output_dir="./resnet50-beans",          # Output directory
    per_device_train_batch_size=16,         # Batch size per device during training
    per_device_eval_batch_size=16,          # Batch size for evaluation
    num_train_epochs=10,                    # Total number of training epochs
    learning_rate=5e-5,                     # Learning rate
    eval_strategy="epoch",                  # Evaluation strategy to adopt during training
    save_strategy="epoch",                  # Save checkpoint every epoch
    logging_dir="./logs",                   # Directory for storing logs
    logging_steps=10,                       # Log every 10 steps
    load_best_model_at_end=True,            # Load the best model when finished training
    metric_for_best_model="accuracy",       # Use accuracy to evaluate the best model
    greater_is_better=True,                 # Whether the better metric is higher
    report_to="none",                       # Disable reporting to any tracking system (e.g., W&B)
    save_total_limit=2                      # Limit the total amount of checkpoints. Deletes the older checkpoints.
)

# 10. Define Evaluation Metrics
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    """
    Computes accuracy metrics.

    Args:
        eval_pred (tuple): Tuple containing predictions and labels.

    Returns:
        dict: Dictionary with accuracy.
    """
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# 11. Initialize the Trainer Without Using 'tokenizer'
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    compute_metrics=compute_metrics,
    # Removed 'tokenizer=processor' to avoid FutureWarning
)

# 12. Fine-Tune the Model
trainer.train()

# 13. Evaluate the Model on the Test Set
results = trainer.evaluate(dataset['test'])
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")

# 14. Save the Fine-Tuned Model and Processor
model.save_pretrained("./resnet50-beans-finetuned")
processor.save_pretrained("./resnet50-beans-finetuned")

# 15. Define the Prediction Function
def predict(image):
    """
    Predicts the label of a given image using the fine-tuned model.

    Args:
        image (PIL.Image.Image): The input image.

    Returns:
        str: The predicted label.
    """
    inputs = processor(images=image, return_tensors="pt")
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class_idx = logits.argmax(-1).item()
    predicted_label = model.config.id2label[str(predicted_class_idx)]
    return predicted_label

# 16. Example Prediction and Visualization
# Select an image from the test set
test_image = dataset['test'][0]['image']  # This is a PIL Image

# Make a prediction
predicted_label = predict(test_image)
print(f"Predicted Label: {predicted_label}")

# Plot the image with the predicted label
plt.figure(figsize=(8, 8))
plt.imshow(test_image)
plt.title(f"Predicted Label: {predicted_label}", fontsize=16)
plt.axis('off')
plt.show()
