In [None]:
!pip install ultralytics

In [None]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
from collections import Counter

In [None]:
model = YOLO('yolov8n.pt')  # Use 'yolov8n.pt', 'yolov8s.pt', etc., depending on the model size

In [None]:
# Video input and output paths
video_path = "/kaggle/input/cars-traffic/3.mp4"
output_path = "/kaggle/working/output_video.mp4"

In [None]:
# Open video file
cap = cv2.VideoCapture(video_path)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Video writer for saving the output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

In [None]:
car_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 on the frame
    results = model(frame)

    # Extract detections for "car" class
    for box in results[0].boxes:  # Access the first result (since batch size = 1)
        class_id = int(box.cls)   # Class ID
        confidence = float(box.conf)  # Confidence score
        if confidence > 0.5:  # Confidence threshold
            if class_id == 2:  # Assuming '2' is the ID for 'car'
                car_count += 1
                x1, y1, x2, y2 = map(int, box.xyxy[0])  # Coordinates
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, "Car", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

    # Display total car count on the video
    cv2.putText(frame, f"Car Count: {car_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Write the frame to output video
    out.write(frame)

In [None]:
# Release video objects
cap.release()
out.release()

print(f"Processing complete. Output saved to {output_path}")

# Single Frame Processing

In [None]:
import torch
import cv2
import matplotlib.pyplot as plt
from ultralytics import YOLO

# Load the pre-trained YOLOv5 model (e.g., YOLOv5s)
model = YOLO('yolov5s.pt')  # You can choose the model variant: yolov5s, yolov5m, yolov5l, yolov5x

# Open the video file (for single frame extraction)
video_path = "/kaggle/input/cars-traffic/3.mp4"  # Replace with your video path
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Couldn't open video.")
    exit()

# Capture a single frame (first frame)
ret, frame = cap.read()
if not ret:
    print("Failed to capture the first frame.")
    exit()

# Process the captured frame with YOLO
results = model(frame)  # Run inference on the frame

# Extract bounding boxes and labels from the YOLO output
labels = results.names  # Class labels (e.g., 'car', 'truck')
boxes = results.xywh[0].cpu().numpy()  # Get bounding boxes in xywh format (normalized)

# Filter out detections for 'car' (label 2 corresponds to 'car' in YOLOv5 COCO dataset)
car_count = 0
for box in boxes:
    x_center, y_center, width, height, conf, class_id = box
    if labels[int(class_id)] == 'car':  # 'car' label in YOLO
        car_count += 1
        x1 = int((x_center - width / 2) * frame.shape[1])
        y1 = int((y_center - height / 2) * frame.shape[0])
        x2 = int((x_center + width / 2) * frame.shape[1])
        y2 = int((y_center + height / 2) * frame.shape[0])

        # Draw the bounding box around the car
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, "Car", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

# Display the frame with detections
cv2.putText(frame, f"Car Count: {car_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

# Convert to RGB for matplotlib (since OpenCV uses BGR)
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# Show the frame with bounding boxes and car count
plt.imshow(frame_rgb)
plt.title(f"Car Count: {car_count}")
plt.axis("off")
plt.show()

# Optionally, save the frame with detections
output_path = "/path/to/output_frame.jpg"  # Replace with your desired output path
cv2.imwrite(output_path, frame)

# Clean up
cap.release()

# Detr Model

In [None]:
!pip install torch torchvision

In [None]:
!pip install transformers

In [None]:
import torch
import cv2
from transformers import DetrImageProcessor, DetrForObjectDetection

In [None]:
# Load pretrained DETR model and processor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
model.eval()

In [None]:
video_path = "/kaggle/input/cars-traffic/3.mp4"
output_path = "/kaggle/working/output_video_detr.mp4"

# Single Frame Processing

In [None]:
import torch
import cv2
import matplotlib.pyplot as plt
from transformers import DetrImageProcessor, DetrForObjectDetection

# Load the pre-trained DETR model and processor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

# Open video file and capture a frame
video_path = "/kaggle/input/cars-traffic/3.mp4"
cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
cap.release()

if not ret:
    print("Failed to capture a frame from the video.")
else:
    # Convert the captured frame from BGR (OpenCV format) to RGB (for DETR)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Preprocess the image
    inputs = processor(images=frame_rgb, return_tensors="pt")

    # Perform object detection
    with torch.no_grad():
        outputs = model(**inputs)

    # Post-process the results
    target_sizes = torch.tensor([frame_rgb.shape[:2]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]

    # Draw bounding boxes around detected cars
    for box, label, score in zip(results["boxes"], results["labels"], results["scores"]):
        if label == 3:  # Label 3 corresponds to 'car' in COCO dataset
            x1, y1, x2, y2 = map(int, box.tolist())
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, "Car", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

    # Convert frame back to RGB for displaying with matplotlib
    frame_rgb_with_boxes = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Display the frame with detections using matplotlib
    plt.imshow(frame_rgb_with_boxes)
    plt.axis('off')  # Hide axis
    plt.show()


# full video Processing

In [None]:
import torch
import cv2
from transformers import DetrImageProcessor, DetrForObjectDetection

# Load the pre-trained DETR model and processor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

# Open video file
video_path = "/kaggle/input/cars-traffic/3.mp4"
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Couldn't open video.")
    exit()

# Initialize car count
total_car_count = 0

# Process each frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame from BGR (OpenCV) to RGB (for DETR)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Preprocess the frame
    inputs = processor(images=rgb_frame, return_tensors="pt")

    # Perform object detection
    with torch.no_grad():
        outputs = model(**inputs)

    # Post-process the results
    target_sizes = torch.tensor([rgb_frame.shape[:2]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]

    # Count the vehicles (label 3 is for 'car' in COCO)
    frame_car_count = 0
    for label in results["labels"]:
        if label == 3:  # 'Car' label in COCO
            frame_car_count += 1

    # Add the count of cars in this frame to the total
    total_car_count += frame_car_count

    # Optionally, draw bounding boxes around detected cars in the frame
    for box, label, score in zip(results["boxes"], results["labels"], results["scores"]):
        if label == 3:  # Label 3 corresponds to 'car'
            x1, y1, x2, y2 = map(int, box.tolist())
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, "Car", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

    # Display the vehicle count on the frame
    cv2.putText(frame, f"Car Count: {frame_car_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Optionally, show the frame
    # cv2.imshow("Frame with Cars", frame)
    # if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
    #     break

# After processing all frames
cap.release()

print(f"Total number of cars detected in the video: {total_car_count}")


# rtDetr

In [None]:
!pip install transformers torch torchvision opencv-python-headless

In [None]:
import torch
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from transformers import RTDetrForObjectDetection, RTDetrImageProcessor

# Load the RT-DETR model and image processor
image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
model.eval()

# Path to input video
video_path = '/kaggle/input/cars-traffic/3.mp4'

# Open the video and read a single frame
cap = cv2.VideoCapture(video_path)
success, frame = cap.read()
cap.release()

if not success:
    raise ValueError("Could not read frame from video.")

# Convert the frame to a PIL Image
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Process the image with RT-DETR
inputs = image_processor(images=image, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)

# Post-process detections
results = image_processor.post_process_object_detection(
    outputs,
    target_sizes=torch.tensor([(image.height, image.width)]),
    threshold=0.3,
)

# Annotate the frame with bounding boxes and labels
for result in results:
    for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]):
        label = model.config.id2label[label_id.item()]
        score = score.item()
        box = [round(coord) for coord in box.tolist()]
        x1, y1, x2, y2 = box
        # Draw bounding box and label
        frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        frame = cv2.putText(
            frame,
            f"{label} {score:.2f}",
            (x1, y1 - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (0, 255, 0),
            2,
        )

# Convert frame to RGB for display
annotated_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# Display the annotated frame
plt.figure(figsize=(12, 8))
plt.imshow(annotated_image)
plt.axis("off")
plt.show()


In [None]:
import torch
import cv2
from PIL import Image
from transformers import RTDetrForObjectDetection, RTDetrImageProcessor

# Load the RT-DETR model and image processor
image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
model.eval()

# Video paths
input_video_path = '/kaggle/input/cars-traffic/3.mp4'
output_video_path = '/kaggle/working/processed_output.mp4'

# Open the input video
cap = cv2.VideoCapture(input_video_path)

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to PIL image
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # Process the image with RT-DETR
    inputs = image_processor(images=image, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)

    # Post-process detections
    results = image_processor.post_process_object_detection(
        outputs,
        target_sizes=torch.tensor([(image.height, image.width)]),
        threshold=0.3,
    )

    # Draw bounding boxes and labels
    for result in results:
        for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]):
            label = model.config.id2label[label_id.item()]
            score = score.item()
            box = [round(coord) for coord in box.tolist()]
            x1, y1, x2, y2 = box
            # Draw the bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            # Draw the label and score
            cv2.putText(
                frame,
                f"{label} {score:.2f}",
                (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 255, 0),
                2,
            )

    # Write the processed frame to the output video
    out.write(frame)

# Release resources
cap.release()
out.release()
print(f"Processed video saved to {output_video_path}")


In [None]:
from IPython.display import Video
Video('/kaggle/working/processed_output.mp4', embed=True)
