In [1]:
#!pip install opencv-python

In [2]:
#!pip install scikit-learn

In [19]:
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torchvision import transforms

In [20]:
# ==============================================
# GPU CONFIGURATION (USING CUDA:3 AS INSTRUCTED)
# ==============================================
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda:3


In [21]:
# Parse XML file to extract bounding boxes and labels
def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes, labels = [], []
   
    for obj in root.findall("object"):
        label = obj.find("name").text  # Get class label
        bbox = obj.find("bndbox")      # Get bounding box
        xmin = int(bbox.find("xmin").text)
        ymin = int(bbox.find("ymin").text)
        xmax = int(bbox.find("xmax").text)
        ymax = int(bbox.find("ymax").text)
       
        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label)
   
    return boxes, labels

In [22]:
# Load images and their corresponding XML files
def load_dataset(folder_path):
    images, labels = [], []
   
    # Loop through the folder and match each image with its XML annotation
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg"):
            img_path = os.path.join(folder_path, filename)
            xml_path = os.path.join(folder_path, filename.replace(".jpg", ".xml"))

            if os.path.exists(xml_path):
                # Load the image and annotation
                img = cv2.imread(img_path)
                _, label_list = parse_xml(xml_path)

                # If multiple labels exist, duplicate the image entry
                for label in label_list:
                    images.append(img)
                    labels.append(label)
            else:
                print(f"Warning: XML annotation missing for {filename}")

    return images, labels

In [23]:
# Provide the path to your dataset folder
dataset_path = "Object detection dataset/Object detection dataset/train/train"  # Replace with your dataset folder path

In [24]:
# Load the dataset
images, all_labels = load_dataset(dataset_path)

In [25]:
# Check if images and labels are aligned
assert len(images) == len(all_labels), "Mismatch between images and labels!"

In [26]:
# Encode labels to numeric format
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(all_labels)

In [27]:
# One-hot encode the labels
num_classes = len(np.unique(encoded_labels))
y = np.zeros((len(encoded_labels), num_classes))
for i, label in enumerate(encoded_labels):
    y[i, label] = 1


In [28]:
# Resize images to 224x224 and normalize pixel values
X = np.array([cv2.resize(img, (128, 128)) / 255.0 for img in images])

In [29]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")


Training samples: 372, Test samples: 93


In [31]:
# Data augmentation for training
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [32]:
datagen = ImageDataGenerator(rotation_range=20, width_shift_range=2.0,
    height_shift_range=2.0 ,zoom_range=0.15, horizontal_flip=True)
datagen.fit(X_train)

In [33]:
# Example Model Definition (Using a Simple CNN)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [None]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])