In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "3"  # Force to use CUDA 3 (dleg28-dleg36)

import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print(f"Using GPU: {gpus[0]} (CUDA 3)")
    except RuntimeError as e:
        print(f"GPU configuration error: {e}")

2025-08-14 10:31:18.503861: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-14 10:31:18.539999: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755147678.566722   74938 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755147678.574266   74938 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755147678.595374   74938 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Using GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU') (CUDA 3)


In [2]:
import cv2
import xml.etree.ElementTree as ET
import numpy as np
from sklearn.preprocessing import LabelEncoder

def parse_xml(xml_file):
    """Parse XML annotation files to extract bounding boxes and labels"""
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes, labels = [], []
    
    for obj in root.findall("object"):
        label = obj.find("name").text
        bbox = obj.find("bndbox")
        boxes.append([
            int(bbox.find("xmin").text),
            int(bbox.find("ymin").text),
            int(bbox.find("xmax").text),
            int(bbox.find("ymax").text)
        ])
        labels.append(label)
    return boxes, labels

def load_dataset(folder_path):
    """Load images and corresponding XML annotations"""
    images, labels = [], []
    
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(folder_path, filename)
            xml_path = os.path.splitext(img_path)[0] + '.xml'
            
            if not os.path.exists(xml_path):
                continue
                
            img = cv2.imread(img_path)
            if img is None:
                continue
                
            _, label_list = parse_xml(xml_path)
            for label in label_list:
                images.append(img)
                labels.append(label)
    
    print(f"Loaded {len(images)} images with {len(labels)} labels")
    return images, labels

In [3]:
# Load dataset
dataset_path = "Object detection dataset/Object detection dataset/train/train"
images, all_labels = load_dataset(dataset_path)

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(all_labels)
num_classes = len(label_encoder.classes_)

# Convert to one-hot encoding
y = tf.keras.utils.to_categorical(encoded_labels, num_classes)

# Preprocess images
X = np.array([cv2.resize(img, (224, 224)) / 255.0 for img in images])

# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

Loaded 465 images with 465 labels


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.mixed_precision import set_global_policy

# Enable mixed precision for better GPU utilization
set_global_policy('mixed_float16')

def build_model(input_shape, num_classes):
    """Create a CNN model optimized for GPU training"""
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2,2)),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D((2,2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax', dtype='float32')  # Output in float32
    ])
    
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

model = build_model((224, 224, 3), num_classes)
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1755147711.772530   74938 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 139784 MB memory:  -> device: 0, name: NVIDIA H200, pci bus id: 0000:61:00.0, compute capability: 9.0


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.15,
    horizontal_flip=True
)

# Train with GPU acceleration
with tf.device('/GPU:0'):  # Will use CUDA 3 as configured
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size=32),
        epochs=5,
        validation_data=(X_test, y_test),
        verbose=1
    )

  self._warn_if_super_not_called()


Epoch 1/5


I0000 00:00:1755147716.071921   93335 service.cc:152] XLA service 0x7fe37400b4c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1755147716.072005   93335 service.cc:160]   StreamExecutor device (0): NVIDIA H200, Compute Capability 9.0
2025-08-14 10:31:56.112989: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1755147716.523449   93335 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 1/12[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1:16[0m 7s/step - accuracy: 0.3125 - loss: 1.1179

I0000 00:00:1755147721.047639   93335 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 728ms/step - accuracy: 0.2905 - loss: 22.5147







[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 1s/step - accuracy: 0.2944 - loss: 22.1859 - val_accuracy: 0.3763 - val_loss: 1.0336
Epoch 2/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 291ms/step - accuracy: 0.3534 - loss: 1.0829 - val_accuracy: 0.5484 - val_loss: 0.8473
Epoch 3/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 295ms/step - accuracy: 0.6660 - loss: 0.7385 - val_accuracy: 0.8495 - val_loss: 0.4279
Epoch 4/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 295ms/step - accuracy: 0.8237 - loss: 0.5046 - val_accuracy: 0.8387 - val_loss: 0.5120
Epoch 5/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 299ms/step - accuracy: 0.8594 - loss: 0.4115 - val_accuracy: 0.8602 - val_loss: 0.5243


In [6]:
def predict_and_visualize(image_path, model, label_encoder):
    """Make prediction and visualize results with bounding box"""
    try:
        # Clear previous session to free GPU memory
        tf.keras.backend.clear_session()
        
        # Preprocess - USE THE PASSED image_path PARAMETER
        img = cv2.imread(image_path)  # Changed from hardcoded path
        if img is None:
            raise ValueError(f"Could not read image at {image_path}")
            
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_resized = cv2.resize(img_rgb, (224, 224)) / 255.0
        img_input = np.expand_dims(img_resized, axis=0)
        
        # Predict with GPU context
        with tf.device('/GPU:0'):
            # Limit GPU memory growth
            gpus = tf.config.list_physical_devices('GPU')
            if gpus:
                tf.config.experimental.set_memory_growth(gpus[0], True)
            
            prediction = model.predict(img_input, verbose=0)
        
        # Get results
        pred_idx = np.argmax(prediction)
        label = label_encoder.inverse_transform([pred_idx])[0]
        confidence = np.max(prediction) * 100
        
        # Visualization
        h, w = img.shape[:2]
        cv2.rectangle(img, (0, 0), (w, h), (0, 255, 0), 3)
        cv2.putText(img, f"{label} ({confidence:.1f}%)", 
                   (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        cv2.imshow("Prediction", img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
    except Exception as e:
        print(f"Error during prediction: {e}")
        tf.keras.backend.clear_session()

In [None]:
# Test with these steps:

# 1. First verify the image exists
test_path = "Object detection dataset/Object detection dataset/test/test/apple_77.jpg"
print(f"Image exists: {os.path.exists(test_path)}")

# 2. Test basic image loading
test_img = cv2.imread(test_path)
print(f"Image loaded successfully: {test_img is not None}")

# 3. Call the function properly
predict_and_visualize(test_path, model, label_encoder)

Image exists: True
Image loaded successfully: True




