In [1]:
import os
import xml.etree.ElementTree as ET
import cv2
import io
import zipfile
import requests
from pathlib import Path
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim


In [4]:
# Step 1: Download and extract ZIP
url = "https://github.com/ravipeddi-05/EEN1072-ASSIGNMENT/archive/refs/heads/main.zip"
extract_to = "cnn_dataset"

response = requests.get(url)
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    z.extractall(extract_to)

# Step 2: Define paths
base_path = Path(extract_to) / "EEN1072-ASSIGNMENT-main" / "CNN dataset"
images_path = base_path / "images"
annotations_path = base_path / "annotations"

# Step 3: Load images and corresponding annotations
dataset = []

for image_file in images_path.glob("*.png"):
    image_id = image_file.stem  # e.g., 'img1'
    annotation_file = annotations_path / f"{image_id}.xml"

    if annotation_file.exists():
        # Load image
        image = Image.open(image_file).copy()

        # Parse XML annotation
        tree = ET.parse(annotation_file)
        root = tree.getroot()

        objects = []
        for obj in root.findall("object"):
            label = obj.find("name").text
            bbox = obj.find("bndbox")
            box = {
                "label": label,
                "xmin": int(bbox.find("xmin").text),
                "ymin": int(bbox.find("ymin").text),
                "xmax": int(bbox.find("xmax").text),
                "ymax": int(bbox.find("ymax").text)
            }
            objects.append(box)

        # Append paired image and annotation
        dataset.append({
            "image_id": image_id,
            "image": image,
            "annotations": objects
        })

print(f"✅ Loaded {len(dataset)} image-annotation pairs from CNN dataset.")

✅ Loaded 2330 image-annotation pairs from CNN dataset.


In [4]:
# # Dataset uploading
# DATASET_DIR = '/kaggle/input/leaf-types-object-detection'
# IMAGES_DIR = os.path.join(DATASET_DIR, 'images')
# ANNOTATIONS_DIR = os.path.join(DATASET_DIR, 'annotations')


In [5]:
#Extracting bounding boxes
def parse_voc_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes = []
    for obj in root.findall("object"):
        label = obj.find("name").text
        bbox = obj.find("bndbox")
        box = [
            int(bbox.find("xmin").text),
            int(bbox.find("ymin").text),
            int(bbox.find("xmax").text),
            int(bbox.find("ymax").text),
            label
        ]
        boxes.append(box)
    return boxes


In [6]:
#Splitting Data
image_files = sorted([
    f for f in os.listdir(IMAGES_DIR)
    if f.lower().endswith(('.jpg', '.jpeg', '.png'))
])

image_files = sorted([f for f in os.listdir(IMAGES_DIR) if f.endswith('.png')])
train_imgs, test_imgs = train_test_split(image_files, test_size=0.1, random_state=42)
train_imgs, val_imgs = train_test_split(train_imgs, test_size=0.1, random_state=42)

print(f"Train: {len(train_imgs)} | Val: {len(val_imgs)} | Test: {len(test_imgs)}")


Train: 1887 | Val: 210 | Test: 233


In [7]:

class LeafDataset(Dataset):
    def __init__(self, image_filenames, transform=None):
        self.image_filenames = image_filenames
        self.transform = transform

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_name = self.image_filenames[idx]
        img_path = os.path.join(IMAGES_DIR, img_name)
        xml_filename = get_xml_filename(img_name)
        xml_path = os.path.join(ANNOTATIONS_DIR, xml_filename)


        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        boxes = parse_voc_annotation(xml_path)

        # Skip images with no boxes or inconsistent shapes
        if len(boxes) == 0:
            box = [0, 0, 0, 0]
        else:
            box = boxes[0][:4]  # Take only first box

        label_tensor = torch.tensor(box, dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, label_tensor



In [9]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

train_set = LeafDataset(train_imgs, transform=transform)
val_set = LeafDataset(val_imgs, transform=transform)
test_set = LeafDataset(test_imgs, transform=transform)

train_loader = DataLoader(train_set, batch_size=8, shuffle=True)
val_loader = DataLoader(val_set, batch_size=8)
test_loader = DataLoader(test_set, batch_size=8)


In [10]:
class LeafCNN(nn.Module):
    def __init__(self):
        super(LeafCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 4)  # Predict one bounding box (x1, y1, x2, y2)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.regressor(x)
        return x


In [11]:
def calculate_iou(box1, box2):
    """
    box = [xmin, ymin, xmax, ymax]
    """
    xA = max(box1[0], box2[0])
    yA = max(box1[1], box2[1])
    xB = min(box1[2], box2[2])
    yB = min(box1[3], box2[3])

    inter_area = max(0, xB - xA) * max(0, yB - yA)
    box1_area = max(0, (box1[2] - box1[0])) * max(0, (box1[3] - box1[1]))
    box2_area = max(0, (box2[2] - box2[0])) * max(0, (box2[3] - box2[1]))

    union_area = box1_area + box2_area - inter_area
    iou = inter_area / union_area if union_area != 0 else 0
    return iou


In [12]:
def evaluate_iou(model, loader):
    model.eval()
    total_iou = 0
    count = 0

    with torch.no_grad():
        for images, targets in loader:
            images = images.to(device)
            targets = targets.to(device)

            outputs = model(images)  # Should be [batch_size, 4]
            if outputs.ndim == 1:
                outputs = outputs.unsqueeze(0)
            if targets.ndim == 1:
                targets = targets.unsqueeze(0)

            for pred, actual in zip(outputs, targets):
                pred_box = pred.cpu().numpy()
                true_box = actual.cpu().numpy()
                if len(pred_box) == 4 and len(true_box) == 4:
                    iou = calculate_iou(pred_box, true_box)
                    total_iou += iou
                    count += 1

    avg_iou = total_iou / count if count > 0 else 0
    print(f"📊 Average IoU on test set: {avg_iou:.4f}")
    return avg_iou



In [14]:
def get_xml_filename(img_name):
    """
    Given an image filename (e.g., leaf1.jpg), returns the corresponding XML annotation filename (e.g., leaf1.xml).
    """
    base = os.path.splitext(img_name)[0]  # Remove file extension
    return base + '.xml'


In [15]:
model = LeafCNN()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(model, loader, val_loader, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, targets in train_loader:
            images = images.to(device)
            targets = targets.to(device)  # now shape [batch_size, 4]
            
            outputs = model(images)
            loss = criterion(outputs, targets)
        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(loader):.4f}")

train(model, train_loader, val_loader)

def evaluate(model, loader):
    model.eval()
    with torch.no_grad():
        for images, targets in loader:
            images, targets = images.to(device), targets[:, 0].to(device)
            outputs = model(images)
            print("Prediction:", outputs[0].cpu().numpy())
            print("Actual:", targets[0].cpu().numpy())
            break

avg_iou = evaluate_iou(model, test_loader)

Epoch [1/10], Loss: 573.5683
Epoch [2/10], Loss: 462.5101
Epoch [3/10], Loss: 435.5832
Epoch [4/10], Loss: 410.8650
Epoch [5/10], Loss: 403.0633
Epoch [6/10], Loss: 369.0020
Epoch [7/10], Loss: 341.4133
Epoch [8/10], Loss: 296.0272
Epoch [9/10], Loss: 240.5239
Epoch [10/10], Loss: 190.1183
📊 Average IoU on test set: 0.3245


In [19]:
class LeafCNN_V1(nn.Module):
    def __init__(self):
        super(LeafCNN_V1, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, 4)
        )

    def forward(self, x):
        x = self.features(x)
        return self.regressor(x)



In [35]:
class LeafCNN_V2(nn.Module):
    def __init__(self):
        super(LeafCNN_V2, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 14 * 14, 512),
            nn.ReLU(),
            nn.Linear(512, 4)
        )

    def forward(self, x):
        x = self.features(x)
        return self.regressor(x)

In [36]:
from itertools import product

# Define search space
lr_list = [0.001, 0.0005]
batch_sizes = [8, 16]
hidden_sizes = [256, 512]

search_space = list(product(lr_list, batch_sizes, hidden_sizes))


In [37]:
class TunedLeafCNN(nn.Module):
    def __init__(self, hidden_size):
        super(TunedLeafCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 4)
        )

    def forward(self, x):
        x = self.features(x)
        return self.regressor(x)


In [50]:
def run_tuning(search_space, train_imgs, val_imgs):
    best_loss = float('inf')
    best_params = None
    best_model = None

    for lr, batch_size, hidden_size in search_space:
        print(f"\n🔧 Testing config: LR={lr}, Batch={batch_size}, Hidden={hidden_size}")

        # Load data
        train_set = LeafDataset(train_imgs, transform=transform)
        val_set = LeafDataset(val_imgs, transform=transform)
        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=batch_size)

        # Model + Optimizer
        model = TunedLeafCNN(hidden_size).to(device)
        optimizer = optim.Adam(model.parameters(), lr=lr)
        criterion = nn.SmoothL1Loss()  # Can replace with IoU loss later

        best_val_loss_for_config = float('inf')

        for epoch in range(10):  # ⏱️ Increased to 30 epochs
            model.train()
            train_loss = 0

            for images, targets in train_loader:
                images, targets = images.to(device), targets.to(device)
                outputs = model(images)

                loss = criterion(outputs, targets)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                train_loss += loss.item()

            avg_train_loss = train_loss / len(train_loader)

            # Validation per epoch
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for images, targets in val_loader:
                    images, targets = images.to(device), targets.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, targets)
                    val_loss += loss.item()
            avg_val_loss = val_loss / len(val_loader)

            # Optional: log per epoch
            print(f"Epoch {epoch+1:02d} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

            # Save best model for this config
            if avg_val_loss < best_val_loss_for_config:
                best_val_loss_for_config = avg_val_loss
                current_best_model = model

        # Track best model across all configs
        print(f"🔎 Final Val Loss for config: {best_val_loss_for_config:.4f}")
        if best_val_loss_for_config < best_loss:
            best_loss = best_val_loss_for_config
            best_params = (lr, batch_size, hidden_size)
            best_model = current_best_model

    print(f"\n✅ Best Config Overall → LR={best_params[0]}, Batch={best_params[1]}, Hidden={best_params[2]}")
    return best_model, best_params

best_model, best_params = run_tuning(search_space, train_imgs, val_imgs)
avg_iou = evaluate_iou(best_model, test_loader)
print(f"✅ Final Average IoU: {avg_iou:.4f}")


🔧 Testing config: LR=0.001, Batch=8, Hidden=256
Epoch 01 | Train Loss: 18.6849 | Val Loss: 17.5073
Epoch 02 | Train Loss: 17.0696 | Val Loss: 16.1190
Epoch 03 | Train Loss: 16.3890 | Val Loss: 15.4930
Epoch 04 | Train Loss: 15.8385 | Val Loss: 15.2044
Epoch 05 | Train Loss: 15.5713 | Val Loss: 16.2533
Epoch 06 | Train Loss: 15.2611 | Val Loss: 15.1906
Epoch 07 | Train Loss: 14.7214 | Val Loss: 14.6279
Epoch 08 | Train Loss: 14.5746 | Val Loss: 14.3078
Epoch 09 | Train Loss: 14.2589 | Val Loss: 14.6688
Epoch 10 | Train Loss: 13.4664 | Val Loss: 16.0170
🔎 Final Val Loss for config: 14.3078

🔧 Testing config: LR=0.001, Batch=8, Hidden=512
Epoch 01 | Train Loss: 18.4107 | Val Loss: 17.4747
Epoch 02 | Train Loss: 17.2358 | Val Loss: 15.7940
Epoch 03 | Train Loss: 16.4915 | Val Loss: 15.2623
Epoch 04 | Train Loss: 15.9545 | Val Loss: 15.1991
Epoch 05 | Train Loss: 15.9219 | Val Loss: 15.3399
Epoch 06 | Train Loss: 15.3183 | Val Loss: 16.4173
Epoch 07 | Train Loss: 15.0801 | Val Loss: 14.613