In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
!pip install ultralytics
!pip install torch-geometric
!pip install opencv-python
!pip install networkx
!pip install numpy
!pip install torch-geometric-temporal
!pip install --upgrade torch-geometric

In [None]:
!pip install torch-geometric==2.0.4 torch-scatter torch-sparse torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-2.0.1+cu124.html

Dynamic GNNs Code

In [2]:
import cv2
import torch
from ultralytics import YOLO
import time
import numpy as np
from torch_geometric.nn import GCNConv
import os
import networkx as nx

# Set CUDA Launch Blocking for debugging
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load the YOLOv11 Model on GPU if available
model = YOLO("E:/paper_code/small_object_detection/runs/detect/train4/weights/best.pt").to(device)

# Set up the Video Capture Object (for video file or webcam)
cap = cv2.VideoCapture("E:/paper_code/small_object_detection/traffic_1.mp4")  # Use 0 for webcam

# Define a Dynamic GNN Model (DGNN)
class DGNNTracker(torch.nn.Module):
    def __init__(self):
        super(DGNNTracker, self).__init__()
        self.conv1 = GCNConv(4, 16)
        self.conv2 = GCNConv(16, 8)
    
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x

# Initialize DGNN model on the appropriate device
dgnn_model = DGNNTracker().to(device)
dgnn_model.eval()

# Initialize Graph to store edges and node features dynamically
G = nx.Graph()  # Initialize the graph here

# Store object previous positions and velocities
object_previous_positions = {}
object_velocities = {}

# Define the region of interest (ROI) for tracking vehicles
def define_roi(frame):
    frame_width = frame.shape[1]  # Frame width (number of pixels)
    frame_height = frame.shape[0]  # Frame height (number of pixels)

    # Define the ROI as the whole frame or divide it into two halves
    # Tracking in both the left and right halves of the frame
    roi_left = 0
    roi_top = 0
    roi_right = frame_width  # Whole width
    roi_bottom = frame_height  # Whole height
    
    return roi_left, roi_top, roi_right, roi_bottom

# Post-process bounding boxes to avoid overly large boxes
def refine_bounding_box(box, min_width=20, min_height=20):
    x_min, y_min, x_max, y_max = box
    width = x_max - x_min
    height = y_max - y_min

    # Apply constraints on minimum width and height
    if width < min_width:
        x_max = x_min + min_width
    if height < min_height:
        y_max = y_min + min_height

    return [x_min, y_min, x_max, y_max]

# Loop through Video Frames
while cap.isOpened():
    start_time = time.time()
    ret, frame = cap.read()

    if ret:
        # Define the region of interest (ROI) for tracking vehicles
        roi_left, roi_top, roi_right, roi_bottom = define_roi(frame)

        # Visualize the ROI on the frame
        cv2.rectangle(frame, (int(roi_left), int(roi_top)), (int(roi_right), int(roi_bottom)), (0, 255, 0), 2)

        # Run YOLOv11 detection
        results = model.track(frame, persist=True, conf=0.5)  # Adjust confidence threshold here (0.5)

        # Ensure results are not empty
        if results and results[0].boxes is not None and len(results[0].boxes) > 0:
            detections = results[0].boxes.xyxy.cpu().numpy()  # Extract bounding boxes

            # Extract node features (center x, center y, width, height)
            nodes = []
            for i, box in enumerate(detections):
                x_min, y_min, x_max, y_max = box
                # Refine the bounding box to prevent it from being too large
                refined_box = refine_bounding_box([x_min, y_min, x_max, y_max])

                center_x = (refined_box[0] + refined_box[2]) / 2
                center_y = (refined_box[1] + refined_box[3]) / 2
                width = refined_box[2] - refined_box[0]
                height = refined_box[3] - refined_box[1]

                # Filter nodes based on their position within the ROI
                if roi_left < center_x < roi_right and roi_top < center_y < roi_bottom:
                    nodes.append([center_x, center_y, width, height])

                # Track object velocity (difference in position from previous frame)
                if i in object_previous_positions:
                    prev_position = object_previous_positions[i]
                    velocity = np.array([center_x, center_y]) - np.array(prev_position)
                    object_velocities[i] = velocity
                else:
                    object_velocities[i] = np.array([0, 0])  # Initial velocity is zero

                # Update the previous position for the current object
                object_previous_positions[i] = [center_x, center_y]

            # Create node feature tensor and edge index tensor
            if len(nodes) > 0:
                x = torch.tensor(nodes, dtype=torch.float).to(device)  # Ensure tensor is on the correct device
                
                # Edge index (dynamic addition/removal of edges)
                edges_to_remove = []
                edges_to_add = []
                
                # Iterate over all existing edges to decide which to remove (objects no longer interacting)
                for i in list(G.nodes):
                    if i >= len(nodes):  # Remove objects that have disappeared
                        edges_to_remove.extend(list(G.edges(i)))
                
                # Remove disconnected edges safely (check if edge exists before removal)
                for edge in edges_to_remove:
                    if G.has_edge(*edge):  # Check if the edge still exists
                        G.remove_edge(*edge)

                # Add new edges based on distance, velocity, and other criteria
                for i in range(len(nodes)):
                    for j in range(i + 1, len(nodes)):
                        dist = np.linalg.norm(np.array(nodes[i][:2]) - np.array(nodes[j][:2]))
                        velocity_magnitude = np.linalg.norm(object_velocities[i] - object_velocities[j])

                        # Add edge if within distance and velocity threshold
                        if dist < 50 or velocity_magnitude < 10:  # Adjust thresholds as needed
                            G.add_edge(i, j)

                # Convert edge list to edge_index tensor
                edges = list(G.edges)
                if len(edges) > 0:
                    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous().to(device)  # Ensure edge_index is on the correct device
                    
                    # Use DGNN model to refine associations
                    with torch.no_grad():
                        out = dgnn_model(x, edge_index)  # Process graph

                # Visualize results with slimmer bounding boxes and smaller font
                annotated_frame = results[0].plot()
                for i, (cx, cy, w, h) in enumerate(nodes):
                    # Draw a slim bounding box
                    cv2.rectangle(
                        annotated_frame,
                        (int(cx - w / 2), int(cy - h / 2)),
                        (int(cx + w / 2), int(cy + h / 2)),
                        (0, 0, 255),
                        1  # Slimmer line thickness
                    )
                    # Draw a small circle for the center
                    cv2.circle(annotated_frame, (int(cx), int(cy)), 3, (0, 255, 0), -1)
                    
                # Display FPS with smaller font
                end_time = time.time()
                fps = 1 / (end_time - start_time)
                cv2.putText(
                    annotated_frame,
                    f"FPS: {fps:.2f}",
                    (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,  # Smaller font size
                    (0, 255, 0),
                    1  # Thinner font weight
                )
                
                # Show the frame
                cv2.imshow("YOLOv11 Tracking with DGNN", annotated_frame)
        else:
            print("No detections in this frame.")
        
        # Break the loop if 'q' key is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        print("End of video or error in capturing frames.")
        break

# Release resources
cap.release()
cv2.destroyAllWindows()

Using device: cuda

0: 192x288 3 Cars, 6 Cngs, 1 Covered-Van, 1 Motor-Cycle, 75.3ms
Speed: 6.6ms preprocess, 75.3ms inference, 0.0ms postprocess per image at shape (1, 3, 192, 288)

0: 192x288 3 Cars, 6 Cngs, 1 Covered-Van, 1 Motor-Cycle, 96.7ms
Speed: 2.3ms preprocess, 96.7ms inference, 6.3ms postprocess per image at shape (1, 3, 192, 288)

0: 192x288 2 Cars, 3 Cngs, 1 Covered-Van, 55.4ms
Speed: 2.5ms preprocess, 55.4ms inference, 0.0ms postprocess per image at shape (1, 3, 192, 288)

0: 192x288 3 Cars, 2 Cngs, 1 Covered-Van, 1 Motor-Cycle, 64.0ms
Speed: 0.0ms preprocess, 64.0ms inference, 0.0ms postprocess per image at shape (1, 3, 192, 288)

0: 192x288 2 Cars, 4 Cngs, 1 Covered-Van, 1 Motor-Cycle, 96.2ms
Speed: 15.9ms preprocess, 96.2ms inference, 15.6ms postprocess per image at shape (1, 3, 192, 288)

0: 192x288 4 Cars, 6 Cngs, 1 Covered-Van, 90.7ms
Speed: 3.1ms preprocess, 90.7ms inference, 7.1ms postprocess per image at shape (1, 3, 192, 288)

0: 192x288 2 Cars, 6 Cngs, 1 Motor-C

In [None]:
import torch
from ultralytics import YOLO

def train_model():
    # Load the YOLOv11 model
    model = YOLO("yolo11m.pt").to("cuda" if torch.cuda.is_available() else "cpu")

    # Confirm that the model is on GPU
    if next(model.parameters()).is_cuda:
        print("Model is running on GPU.")
    else:
        print("Model is running on CPU.")
    

    # Train the model with additional options
    train_results = model.train(
        data="E:/Ml_Course/pythonProject_object/i2-9/dataset.yaml",  # Path to your dataset YAML
        epochs=40,
        imgsz=280,                # Image size for training
        device="cuda",                 # Set to "cuda" for GPU or "cpu" for CPU
    )

    # Return training results for analysis
    return train_results

# Run the training function
train_model()