# Pokémon images classification challenge: Task 2: CNN from scratch to use

## 1. Previously

- Introduction
- EDA (Exploratory Data Analysis) of the dataset
- pre-processing of the data
- classification using an MLP (Multilayer Perceptron).

This work is available at this link: https://drive.google.com/file/d/1fci5SJnuwGc3tGgtzdE0X4BfafhX83B3/view?usp=sharing

In this notebook, we perform a classification using a **CNN (Convolutional Neural Network)**. The dataset consists of **images of Pokémon**, each identified by an ID (corresponding to an image file with png extension) and a label indicating its **type**.

## 2. Setting up the environment and the data

### Librairie importation

In [None]:
import os

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset, TensorDataset

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

### Data location

In [None]:
# #use of google drive to import data
# from google.colab import drive
# drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
# ! ls drive/MyDrive/AP_Assignement1_Task2

ls: cannot access 'drive/MyDrive/AP_Assignement1_Task2': No such file or directory


In [None]:
#choosing the path where the dataset is stocked

# # Google Colab
# my_path = 'drive/MyDrive/AP_Assignement1_Task3/'

# Kaggle
# my_path = '/kaggle/input/the-pokemon-are-out-there-task-1/'

# Local
my_path = '../data/'

## 3. Data prepocessing

### Data Load

In [None]:
# Load labels
data_path = my_path +"train_labels.csv"
image_folder = my_path +"Train"
labels_df = pd.read_csv(data_path)

In [None]:
#undersampling to balance the data
min_class_count = labels_df['label'].value_counts().min()
balanced_df = labels_df.groupby('label').apply(lambda x: x.sample(min_class_count)).reset_index(drop=True)

  balanced_df = labels_df.groupby('label').apply(lambda x: x.sample(min_class_count)).reset_index(drop=True)


In [None]:
# Train-validation split
train_df, val_df = train_test_split(balanced_df, test_size=0.2, stratify=balanced_df['label'])

In [None]:
# Image preprocessing
transform = transforms.Compose([
    transforms.Resize((64, 64)), #assure all images are 64x64
    transforms.ToTensor(), #convert to tensor pytorch
    transforms.Normalize([0.4464, 0.4480, 0.4158],[0.1823, 0.1728, 0.1813], inplace=False),
    transforms.RandomHorizontalFlip(), #data augmentation, we don't use the crop because of our data preprocessing, since the crop is random it can remove the pokémon
    #and affects our performance
    transforms.RandomRotation(10)
])

In [None]:
# Create datasets

# Create label-to-index mapping
unique_labels = train_df["label"].unique()
label_map = {label: Id for Id, label in enumerate(unique_labels)}

# Function to load images and labels into tensors
def load_dataset(df, img_folder, transform):
    images = []
    labels = []

    for _, row in df.iterrows():
        img_path = os.path.join(img_folder, row["Id"] + ".png")  # Construct full image path
        image = Image.open(img_path).convert("RGB")  # Open image in RGB mode
        image = transform(image)  # Apply transformations

        images.append(image)
        labels.append(label_map[row["label"]])  # Convert label to integer

    # Convert to PyTorch tensors
    images_tensor = torch.stack(images)  # Stack list of images into a single tensor
    labels_tensor = torch.tensor(labels, dtype=torch.long)  # Convert labels to tensor

    return images_tensor, labels_tensor

# Load train and validation sets
train_images, train_labels = load_dataset(train_df, image_folder, transform)
val_images, val_labels = load_dataset(val_df, image_folder, transform)



# Create TensorDataset
train_dataset = TensorDataset(train_images, train_labels)
val_dataset = TensorDataset(val_images, val_labels)

# Create dataloaders (process data in batches, reducing memory usage)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

KeyboardInterrupt: 

In [None]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## 4. Model developpment: CNN (Convolutional Neural Network)

### Model architecture

In [None]:
import torch
import torch.nn as nn

class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3),  # -> 16x62x62
            nn.ReLU(),
            nn.MaxPool2d(2),                  # -> 16x31x31

            nn.Conv2d(16, 32, kernel_size=3), # -> 32x29x29
            nn.ReLU(),
            nn.MaxPool2d(2),                  # -> 32x14x14

            nn.Conv2d(32, 64, kernel_size=3), # -> 64x12x12
            nn.ReLU(),
            nn.MaxPool2d(2)                   # -> 64x6x6
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),                     # -> (batch_size, 64 * 6 * 6)
            nn.Linear(64 * 6 * 6, 512),        # = 2304 → 512
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)       # Final output = num_classes
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [None]:
! pip install torchsummary

In [None]:
# Print model summary : number of parameters, layers of the model
from torchsummary import summary

# Make sure input_size is defined like this (shape, not flattened!)
input_size = (3, 64, 64)

# Move model to device (if not already done)
num_classes = len(train_df['label'].unique())
model = CNNModel(num_classes)
model = model.to(device)

# Print summary (input_size must be shape, not total pixels)
summary(model, input_size=input_size)

### Setting up MLFlow

To execute only if you have Databricks token.

In [None]:
# !pip install mlflow

In [None]:
# from random import random, randint
# from mlflow import log_metric, log_param, log_artifacts
# from mlflow.tracking import MlflowClient

In [None]:
# import mlflow
# import mlflow.pytorch

In [None]:
# # check databricks.txt
# mlflow.login()

In [None]:
# mlflow.set_tracking_uri("databricks")

In [None]:
# experiment_name = "[put_your_link]/Pokemon_Classification_CNN_task3"

# existing_experiment = mlflow.get_experiment_by_name(experiment_name)

# if existing_experiment is None:
#     mlflow.create_experiment(
#         experiment_name,
#         artifact_location="dbfs:/Volumes/test/mlflow/Pokemon_Classification",
#     )

# mlflow.set_experiment(experiment_name)

### Train and evaluate model

In [None]:
# Model initialization
num_classes = len(train_df['label'].unique())  # number of Pokémon classes
model = CNNModel(num_classes)                  # pass only num_classes now
model = model.to(device)                       # move to GPU (or CPU)

In [None]:
# Loss and optimizer definition
criterion = nn.CrossEntropyLoss() #loss function
optimizer = optim.RMSprop(model.parameters(), lr=0.001) #optimizer + lr = learning rate

In [None]:
early_stop = False

In [None]:
# Train and evaluate model

import numpy as np
import torch
from collections import Counter
from sklearn.metrics import f1_score

# Loop parameters
epochs = 100  # Maximum number of epochs #
patience = 10  # Number of epochs to wait before stopping
best_val_accuracy = 0  # Track the best validation accuracy
early_stopping_epoch = 0  # Store the epoch where early stopping occurs
counter = 0  # Count epochs without improvement

# Store accuracies to plot later
train_accuracies = []
val_accuracies = []

mlflow.autolog()

for epoch in range(epochs):
    # Training mode
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, label in train_loader:
        images, label = images.to(device), label.to(device)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()

        # Accumulate statistics
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == label).sum().item()
        total += label.size(0)

    # Calculate and log training accuracy
    train_accuracy = correct / total
    train_accuracies.append(train_accuracy)

    # # uncomment to use MLFLow
    # mlflow.log_metric("train_loss", total_loss, step=epoch)
    # mlflow.log_metric("train_acc", train_accuracy, step=epoch)

    # Validation phase
    model.eval()
    predictions, actuals = [], []
    with torch.no_grad():
        for images, label in val_loader:
            images, label = images.to(device), label.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy())
            actuals.extend(label.cpu().numpy())

    # Compute validation accuracy and f1-score
    val_accuracy = np.mean(np.array(predictions) == np.array(actuals))
    val_accuracies.append(val_accuracy)
    f1 = f1_score(actuals, predictions, average='macro')

    # # uncomment to use MLFlow
    # mlflow.log_metric("val_accuracy", val_accuracy, step=epoch)
    # mlflow.log_metric("val_f1_score", f1, step=epoch)

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, "
          f"Validation Accuracy: {val_accuracy:.4f}, F1 Score: {f1:.4f}")

    # Early Stopping Check
    if early_stop:
      if val_accuracy > best_val_accuracy:
          best_val_accuracy = val_accuracy
          counter = 0
      else:
          counter += 1
      if counter >= patience:
          print(f"Early stopping at epoch {epoch+1}")
          early_stopping_epoch = epoch + 1
          break  # Stop training

In [None]:
# Plot the accuracy over the epochs
plt.figure(figsize=(8, 5))
epochs_range = range(1, len(train_accuracies) + 1)
plt.plot(epochs_range, train_accuracies, label="Train Accuracy", color='blue')
plt.plot(epochs_range, val_accuracies, label="Validation Accuracy", color='red')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Training vs Validation Accuracy (Early Stopping)")
plt.legend()
plt.show()

In [None]:
# Confusion matrix
conf_matrix = confusion_matrix(actuals, predictions)
print(conf_matrix)

# Plot confusion matrix
POKEMON_TYPES= train_df['label'].unique()
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="Blues", xticklabels=POKEMON_TYPES, yticklabels=POKEMON_TYPES)
plt.xlabel("Predictions")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

## 5. Some training metrics without using MLFlow

In [None]:
from sklearn.metrics import classification_report

# Put model in evaluation mode
model.eval()

# Store actual and predicted labels
train_predictions = []
train_actuals = []

with torch.no_grad():
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        train_predictions.extend(preds.cpu().numpy())
        train_actuals.extend(labels.cpu().numpy())

# Compute classification report
train_report = classification_report(train_actuals, train_predictions, digits=4)
print("Training Set Metrics:\n", train_report)

In [None]:
# Compute classification report
val_report = classification_report(actuals, predictions, digits=4)
print("Validation Set Metrics:\n", val_report)

## 6. Prediction on the test data

In [None]:
import pandas as pd
import os
import torch
from torchvision import transforms
from PIL import Image

# Define image transformations (same as used during training)
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Ensure same size as training images
    transforms.ToTensor(),
    transforms.Normalize([0.4464, 0.4480, 0.4158],[0.1823, 0.1728, 0.1813], inplace=False)
])

# Path to the folder containing test images
test_folder = my_path + "Test"

# Load test images
test_images = [f for f in os.listdir(test_folder) if f.endswith(".png")]

# Create label-to-index mapping
unique_labels = train_df["label"].unique()
label_map = {Id: label for Id, label in enumerate(unique_labels)}

# Put model in evaluation mode
model.eval()

# Store results
submission_results = []

# Disable gradient computation for inference
with torch.no_grad():
    for img_name in test_images:
        img_path = os.path.join(test_folder, img_name)
        image = Image.open(img_path).convert("RGB")
        image = transform(image).unsqueeze(0).to(device)  # Add batch dimension & move to GPU

        # Get model predictions
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)  # Get predicted class index

        # Remove ".png" extension and store result
        img_id = img_name.replace(".png", "")
        submission_results.append([img_id, label_map[predicted.item()]])

# Convert results to DataFrame
submission_df = pd.DataFrame(submission_results, columns=["Id", "Label"])

# Save to CSV file
submission_df.to_csv("../submission_CNN.csv", index=False, header=True)

print("Submission file 'submission_CNN.csv' has been created successfully!")
