In [1]:
import torch

# Check if CUDA is available at all
print("CUDA available:", torch.cuda.is_available())

# Check how many GPUs are available
print("Number of GPUs:", torch.cuda.device_count())

# Get the name of all GPUs
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

# This is the most important check: See if device 3 exists
try:
    device = torch.device(f'cuda:3' if torch.cuda.is_available() else 'cpu')
    print(f"\nYour target device is: {device}")
    # A quick test to see if we can use it
    test_tensor = torch.tensor([1.0, 2.0]).to(device)
    print("Success! Can use cuda:3")
except Exception as e:
    print(f"Error with cuda:3: {e}")

CUDA available: True
Number of GPUs: 8
GPU 0: NVIDIA H200
GPU 1: NVIDIA H200
GPU 2: NVIDIA H200
GPU 3: NVIDIA H200
GPU 4: NVIDIA H200
GPU 5: NVIDIA H200
GPU 6: NVIDIA H200
GPU 7: NVIDIA H200

Your target device is: cuda:3
Success! Can use cuda:3


## 1.Installing the Libraries

In [2]:
#!pip install tensorflow
#!pip install opencv-python
#!pip install scikit-learn
#!pip install numpy

## Importing the packages

In [3]:
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2025-08-21 10:28:10.841169: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-21 10:28:10.854118: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755752290.867909 3965512 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755752290.871838 3965512 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755752290.882883 3965512 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [4]:
# --- NEW FIX: CONFIGURE GPU MEMORY GROWTH ---
# This code checks if a GPU is available and tells TensorFlow to only allocate
# memory as it's needed, instead of grabbing it all at once.
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    print(f"Enabled memory growth for {len(gpus)} GPU(s).")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)
# -----------------------------------------

Enabled memory growth for 8 GPU(s).


In [5]:
def parse_xml(xml_file):
    """Parses an XML file to extract bounding boxes and labels."""
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes, labels = [], []
   
    for obj in root.findall("object"):
        label = obj.find("name").text
        bbox = obj.find("bndbox")
        xmin = int(bbox.find("xmin").text)
        ymin = int(bbox.find("ymin").text)
        xmax = int(bbox.find("xmax").text)
        ymax = int(bbox.find("ymax").text)
       
        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label)
   
    return boxes, labels

In [6]:
def load_dataset(folder_path):
    """Loads all images and their labels from the dataset folder."""
    images, all_labels = [], []
   
    print(f"Loading dataset from: {folder_path}")
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg"):
            img_path = os.path.join(folder_path, filename)
            xml_path = os.path.join(folder_path, filename.replace(".jpg", ".xml"))

            if os.path.exists(xml_path):
                img = cv2.imread(img_path)
                # We only need the labels for this classification task
                _, label_list = parse_xml(xml_path)

                # If an image has multiple objects, we add it multiple times
                for label in label_list:
                    images.append(img)
                    all_labels.append(label)
            else:
                print(f"Warning: XML annotation missing for {filename}")

    return images, all_labels

In [7]:
dataset_path = "Object detection dataset/Object detection dataset/train/train" 

images, all_labels = load_dataset(dataset_path)

# A quick check to make sure everything loaded correctly
print(f"Loaded {len(images)} images and {len(all_labels)} labels.")
assert len(images) == len(all_labels), "Mismatch between images and labels!"

Loading dataset from: Object detection dataset/Object detection dataset/train/train
Loaded 465 images and 465 labels.


In [8]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(all_labels)

num_classes = len(label_encoder.classes_)
y = tf.keras.utils.to_categorical(encoded_labels, num_classes=num_classes)

print(f"Found {num_classes} classes: {label_encoder.classes_}")

Found 3 classes: ['apple' 'banana' 'orange']


In [9]:
# Define the new, smaller image size
IMG_SIZE = 128 
print(f"Resizing images to {IMG_SIZE}x{IMG_SIZE}.")

X = np.array([cv2.resize(img, (IMG_SIZE, IMG_SIZE)) for img in images]) / 255.0

Resizing images to 128x128.


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")

Training samples: 372, Test samples: 93


In [11]:
model = Sequential([
    # The input_shape must match our new IMG_SIZE
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1755752309.770050 3965512 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1612 MB memory:  -> device: 0, name: NVIDIA H200, pci bus id: 0000:1b:00.0, compute capability: 9.0
I0000 00:00:1755752309.772872 3965512 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 139253 MB memory:  -> device: 1, name: NVIDIA H200, pci bus id: 0000:43:00.0, compute capability: 9.0
I0000 00:00:1755752309.775320 3965512 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 137461 MB memory:  -> device: 2, name: NVIDIA H200, pci bus id: 0000:52:00.0, compute capability: 9.0
I0000 00:00:1755752309.778099 3965512 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 101971 MB memory:  -> device: 3, name: NVIDIA H200, pci bus id: 0000:61:00.0, compute capability: 9.0
I0000 00:00:1755752309.781402 

In [12]:
# Compile the model
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Train the model with a smaller batch size
print("\nStarting model training...")
history = model.fit(X_train, y_train, 
                    batch_size=16,  # <--- REDUCED BATCH SIZE
                    epochs=10, 
                    validation_data=(X_test, y_test))
print("Model training finished.")


Starting model training...
Epoch 1/10


I0000 00:00:1755752312.912616 3967257 service.cc:152] XLA service 0x7f4654005bb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1755752312.912682 3967257 service.cc:160]   StreamExecutor device (0): NVIDIA H200, Compute Capability 9.0
I0000 00:00:1755752312.912688 3967257 service.cc:160]   StreamExecutor device (1): NVIDIA H200, Compute Capability 9.0
I0000 00:00:1755752312.912692 3967257 service.cc:160]   StreamExecutor device (2): NVIDIA H200, Compute Capability 9.0
I0000 00:00:1755752312.912695 3967257 service.cc:160]   StreamExecutor device (3): NVIDIA H200, Compute Capability 9.0
I0000 00:00:1755752312.912697 3967257 service.cc:160]   StreamExecutor device (4): NVIDIA H200, Compute Capability 9.0
I0000 00:00:1755752312.912700 3967257 service.cc:160]   StreamExecutor device (5): NVIDIA H200, Compute Capability 9.0
I0000 00:00:1755752312.912702 3967257 service.cc:160]   StreamExecutor device (6): NVIDIA H200, Compute Capability 9.

[1m20/24[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 8ms/step - accuracy: 0.4922 - loss: 2.0164

I0000 00:00:1755752317.121193 3967257 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 217ms/step - accuracy: 0.5112 - loss: 1.8612 - val_accuracy: 0.7204 - val_loss: 0.7721
Epoch 2/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.7714 - loss: 0.5359 - val_accuracy: 0.8602 - val_loss: 0.5255
Epoch 3/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.8767 - loss: 0.2975 - val_accuracy: 0.8280 - val_loss: 0.4853
Epoch 4/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.8602 - loss: 0.3376 - val_accuracy: 0.8280 - val_loss: 0.4130
Epoch 5/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.9095 - loss: 0.2265 - val_accuracy: 0.8602 - val_loss: 0.4705
Epoch 6/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.9122 - loss: 0.1813 - val_accuracy: 0.8710 - val_loss: 0.5694
Epoch 7/10
[1m24/24[0m [32m━━━━━━━━━━━━━

In [13]:
def preprocess_image(image_path):
    """Loads and prepares a single image for prediction."""
    img = cv2.imread(image_path)
    img_resized = cv2.resize(img, (224, 224)) / 255.0
    img_expanded = np.expand_dims(img_resized, axis=0) # Add batch dimension
    return img_expanded, img # Return preprocessed and original images

def visualize_prediction(original_img, predicted_label, confidence):
    """Draws the predicted label on the image."""
    # We'll just put the text on the image for this classifier
    text = f"{predicted_label} ({confidence:.2f}%)"
    cv2.putText(original_img, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    # Display the image
    cv2.imshow("Prediction", original_img)
    cv2.waitKey(0) # Wait for a key press to close the window
    cv2.destroyAllWindows()

In [None]:
# --- Make sure IMG_SIZE is defined. It should be 128, just like in training. ---
IMG_SIZE = 128 

# 7.1. Helper Functions for Prediction
def preprocess_image(image_path):
    """Loads and prepares a single image for prediction."""
    img = cv2.imread(image_path)
    # --- THIS IS THE FIX ---
    # Ensure we resize to the SAME dimensions the model was trained on.
    img_resized = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) / 255.0
    img_expanded = np.expand_dims(img_resized, axis=0) # Add batch dimension
    return img_expanded, img # Return preprocessed and original images

def visualize_prediction(original_img, predicted_label, confidence):
    """Draws the predicted label on the image."""
    text = f"{predicted_label} ({confidence:.2f}%)"
    cv2.putText(original_img, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    # Display the image
    cv2.imshow("Prediction", original_img)
    cv2.waitKey(0) # Wait for a key press to close the window
    cv2.destroyAllWindows()

# 7.2. Make the Prediction
# --- IMPORTANT: Change this path to your test image! ---
image_path_to_predict = r"Object detection dataset/Object detection dataset/test/test/orange_86.jpg" 

# 1. Preprocess the image using the corrected function
processed_img, original_img = preprocess_image(image_path_to_predict)

# 2. Get the model's prediction
prediction_probabilities = model.predict(processed_img)[0]

# 3. Find the class with the highest probability
predicted_label_index = np.argmax(prediction_probabilities)
confidence = prediction_probabilities[predicted_label_index] * 100
predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]

print(f"\n--- Prediction Result ---")
print(f"Predicted Label: {predicted_label}")
print(f"Confidence: {confidence:.2f}%")

# 4. Visualize the result
visualize_prediction(original_img, predicted_label, confidence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 659ms/step

--- Prediction Result ---
Predicted Label: banana
Confidence: 61.12%


# Object Detection Using Bounding boxes

In [1]:
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense

# Configure GPU for memory growth to prevent memory errors
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    print(f"Enabled memory growth for {len(gpus)} GPU(s).")
  except RuntimeError as e:
    print(e)

2025-08-21 10:30:12.622112: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-21 10:30:12.658803: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755752412.685434 3978607 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755752412.692998 3978607 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755752412.714358 3978607 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Enabled memory growth for 8 GPU(s).


In [2]:
def parse_xml(xml_file):
    """Parses an XML file to extract bounding boxes and labels."""
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes, labels = [], []
    for obj in root.findall("object"):
        label = obj.find("name").text
        bbox = obj.find("bndbox")
        xmin = int(bbox.find("xmin").text)
        ymin = int(bbox.find("ymin").text)
        xmax = int(bbox.find("xmax").text)
        ymax = int(bbox.find("ymax").text)
        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label)
    return boxes, labels

def load_full_dataset(folder_path):
    """Loads images, their labels, and their bounding boxes."""
    image_paths, all_labels, all_boxes = [], [], []
    print(f"Loading dataset from: {folder_path}")
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg"):
            img_path = os.path.join(folder_path, filename)
            xml_path = os.path.join(folder_path, filename.replace(".jpg", ".xml"))
            if os.path.exists(xml_path):
                # For simplicity, we'll assume one object per image.
                # Real-world detectors are more complex.
                boxes, labels = parse_xml(xml_path)
                if len(boxes) > 0: # Only take single-object images
                    image_paths.append(img_path)
                    all_labels.append(labels[0])
                    all_boxes.append(boxes[0])
            else:
                print(f"Warning: XML annotation missing for {filename}")
    return image_paths, all_labels, all_boxes

In [None]:
# 1. Load the raw data
dataset_path = "Object detection dataset/Object detection dataset/train/train"
image_paths, text_labels, raw_boxes = load_full_dataset(dataset_path)

# 2. Define image size and prepare lists
IMG_SIZE = 128
processed_images, normalized_boxes = [], []

print("Processing images and normalizing bounding boxes...")
for i in range(len(image_paths)):
    # Read image and get original dimensions
    img = cv2.imread(image_paths[i])
    h, w, _ = img.shape
    
    # Resize image and add to list
    img_resized = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    processed_images.append(img_resized)
    
    # Normalize the bounding box coordinates
    xmin, ymin, xmax, ymax = raw_boxes[i]
    normalized_boxes.append([xmin/w, ymin/h, xmax/w, ymax/h])

# 3. Convert lists to NumPy arrays
X_images = np.array(processed_images, dtype=np.float32) / 255.0
y_boxes = np.array(normalized_boxes, dtype=np.float32)

# 4. Encode labels to categorical format
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(text_labels)
y_labels = tf.keras.utils.to_categorical(encoded_labels, num_classes=len(label_encoder.classes_))
num_classes = len(label_encoder.classes_)

# 5. Split all data into training and testing sets
(X_train, X_test, 
 y_labels_train, y_labels_test, 
 y_boxes_train, y_boxes_test) = train_test_split(X_images, y_labels, y_boxes, test_size=0.2, random_state=42)

print(f"Data prepared: {len(X_train)} training samples, {len(X_test)} test samples.")

Loading dataset from: Object detection dataset/Object detection dataset/train/train
Processing images and normalizing bounding boxes...


In [4]:
# Define the input layer
inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3))

# Feature extraction layers (the "backbone")
x = Conv2D(32, (3, 3), activation='relu')(inputs)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)

# Head for bounding box regression
bbox_output = Dense(4, activation='sigmoid', name='bounding_box')(x)

# Head for class label prediction
class_output = Dense(num_classes, activation='softmax', name='class_label')(x)

# Combine into a single model with two outputs
model = Model(inputs=inputs, outputs=[bbox_output, class_output])

# Compile the model with two separate loss functions
losses = {
    "bounding_box": "mean_squared_error",
    "class_label": "categorical_crossentropy"
}
model.compile(optimizer='adam', loss=losses, metrics=['accuracy'])
model.summary()

I0000 00:00:1755752459.704895 3978607 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1612 MB memory:  -> device: 0, name: NVIDIA H200, pci bus id: 0000:1b:00.0, compute capability: 9.0
I0000 00:00:1755752459.709859 3978607 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 139253 MB memory:  -> device: 1, name: NVIDIA H200, pci bus id: 0000:43:00.0, compute capability: 9.0
I0000 00:00:1755752459.713039 3978607 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 137461 MB memory:  -> device: 2, name: NVIDIA H200, pci bus id: 0000:52:00.0, compute capability: 9.0
I0000 00:00:1755752459.716379 3978607 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 101973 MB memory:  -> device: 3, name: NVIDIA H200, pci bus id: 0000:61:00.0, compute capability: 9.0
I0000 00:00:1755752459.720698 3978607 gpu_device.cc:2019] Created device /job:localhost/replica:0/task

In [8]:
print("\nStarting object detector training...")

# Prepare the training and validation data with the CORRECT dictionary keys
# The keys must exactly match the output layer names from Step 4.
y_train_dict = {
    "bounding_box": y_boxes_train,  # <--- FIX: Changed "bounding box" to "bounding_box"
    "class_label": y_labels_train
}
y_test_dict = {
    "bounding_box": y_boxes_test,   # <--- FIX: Changed "bounding box" to "bounding_box"
    "class_label": y_labels_test
}

history = model.fit(X_train, y_train_dict,
                    validation_data=(X_test, y_test_dict),
                    epochs=5,
                    batch_size=8,
                    verbose=1)

print("Model training finished.")


Starting object detector training...
Epoch 1/5


ValueError: Attr 'Toutput_types' of 'OptionalFromValue' Op passed list of length 0 less than minimum 1.