In [20]:
# Cell 0: Setup
import os
import random
import cv2  # OpenCV for video processing
import time

# For inference with YOLO (assuming you use ultralytics)
from ultralytics import YOLO

# Define directory paths (modify as per your folder structure)
test_videos_dir = 'data/test_raw'  # Directory containing your test videos
extracted_frames_dir = 'data/test_frames3'  # Directory to save extracted frames

# Create folder if it doesn't exist
os.makedirs(extracted_frames_dir, exist_ok=True)


In [8]:
# select 3 random vids
import random
random.seed(162)

# Cell 1: Randomly choose 3 candidate videos
all_videos = [f for f in os.listdir(test_videos_dir) if f.lower().endswith(('.mp4', '.avi', '.mov'))]
print(f"Total videos found: {len(all_videos)}")

# Select 3 random videos (ensure there are at least 3)
num_candidates = 3
candidate_videos = random.sample(all_videos, min(num_candidates, len(all_videos)))
print("Selected candidate videos:")
for vid in candidate_videos:
    print(vid)



Total videos found: 10
Selected candidate videos:
bicycle_test.mp4
car_test.mp4
people_test.mp4


In [21]:
# use all vids for inference
import os
candidate_videos = []

test_raw_dir = r'C:\Users\redoks\Documents\skripzii\data\test_raw'
candidate_videos = [f for f in os.listdir(test_raw_dir) if f.lower().endswith('.mp4')]
candidate_videos = [vid for vid in candidate_videos if vid not in ['bus_test.mp4', 'boat_test.mp4', 'dog-chair-bottle_test.mp4','table_test.mp4']]
print(f"Total videos found: {len(candidate_videos)}")

special_candidate = ['bus_test.mp4', 'boat_test.mp4', 'dog-chair-bottle_test.mp4']
fastframe_candidate = ['table_test.mp4']



Total videos found: 6


In [None]:
import random
random.seed(162)

# Cell 2: Extract one frame from each segment
# Define the segment length and the gap between segments
segment_length = 6   # seconds per segment
segment_gap = 1      # gap between segments, so segments start at 0, 7, 13, etc.
special_length = 2 # for special candidates (bus and boat)
fast_length = 1 # for fast frame candidates (table)

# We'll choose the middle of each segment. For a 6-sec segment, the midpoint is 3 seconds after the segment's start.
def extract_segment_frames(video_path, output_dir, seg_length, seg_gap):
    cap = cv2.VideoCapture(video_path)
    
    # Get FPS and total frame count
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration_sec = total_frames / video_fps
    print(f"Video: {os.path.basename(video_path)} | FPS: {video_fps:.2f} | Duration: {duration_sec:.2f}s")
    
    # Calculate the start times for segments; segments start at times 0, (seg_length + seg_gap), (2*(seg_length+seg_gap)), etc.
    segment_interval = seg_length + seg_gap
    segment_starts = [t for t in range(0, int(duration_sec), segment_interval) if t + seg_length <= duration_sec]
    
    print(f"Number of segments to extract: {len(segment_starts)}")
    
    # For each segment, choose the midpoint frame (start time + seg_length/2)
    for seg_start in segment_starts:
        target_time = seg_start + seg_length / 2  # in seconds
        target_frame_index = int(target_time * video_fps)
        
        # Set the video capture position to the target frame index
        cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame_index)
        ret, frame = cap.read()
        if ret:
            # Define filename and save the extracted frame
            frame_filename = os.path.join(output_dir, f"frame_{target_frame_index:04d}.jpg")
            cv2.imwrite(frame_filename, frame)
            print(f"Saved frame at {target_time:.1f}s (Frame {target_frame_index}) -> {frame_filename}")
        else:
            print(f"Failed to capture frame at {target_time:.1f}s (Frame {target_frame_index})")
    
    cap.release()

# Process each candidate video
for vid_name in candidate_videos:
    vid_path = os.path.join(test_videos_dir, vid_name)
    # Create a subdirectory for frames for this video
    video_frames_dir = os.path.join(extracted_frames_dir, os.path.splitext(vid_name)[0])
    os.makedirs(video_frames_dir, exist_ok=True)
    
    # Extract frames with non-continuous segments
    extract_segment_frames(vid_path, video_frames_dir, segment_length, segment_gap)
    print()

# Process each candidate video
for vid_name in special_candidate:
    vid_path = os.path.join(test_videos_dir, vid_name)
    # Create a subdirectory for frames for this video
    video_frames_dir = os.path.join(extracted_frames_dir, os.path.splitext(vid_name)[0])
    os.makedirs(video_frames_dir, exist_ok=True)
    
    # Extract frames with non-continuous segments
    extract_segment_frames(vid_path, video_frames_dir, special_length, segment_gap)
    print()

# Process each candidate video
for vid_name in fastframe_candidate:
    vid_path = os.path.join(test_videos_dir, vid_name)
    # Create a subdirectory for frames for this video
    video_frames_dir = os.path.join(extracted_frames_dir, os.path.splitext(vid_name)[0])
    os.makedirs(video_frames_dir, exist_ok=True)
    
    # Extract frames with non-continuous segments
    extract_segment_frames(vid_path, video_frames_dir, fast_length, segment_gap)
    print()


Video: table_test.mp4 | FPS: 25.00 | Duration: 14.60s
Number of segments to extract: 7
Saved frame at 0.5s (Frame 12) -> data/test_frames3\table_test\frame_0012.jpg
Saved frame at 2.5s (Frame 62) -> data/test_frames3\table_test\frame_0062.jpg
Saved frame at 4.5s (Frame 112) -> data/test_frames3\table_test\frame_0112.jpg
Saved frame at 6.5s (Frame 162) -> data/test_frames3\table_test\frame_0162.jpg
Saved frame at 8.5s (Frame 212) -> data/test_frames3\table_test\frame_0212.jpg
Saved frame at 10.5s (Frame 262) -> data/test_frames3\table_test\frame_0262.jpg
Saved frame at 12.5s (Frame 312) -> data/test_frames3\table_test\frame_0312.jpg



# TEST CLASS CONVERTER

In [None]:
# TEST
# names: ['Bicycle', 'Car', 'Motorbike', 'People']
# 0, 1, 2, 3

# ACTUAL
#names: ['Bicycle', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cup', 'Dog', 'Motorbike', 'People', 'Table']
# 0, 4, 9, 10

# Inference


In [None]:
from sklearn.metrics import average_precision_score
from ultralytics import YOLO
import os

# Cell 4: Run inference on all images in a folder, measure inference time, and calculate mAP


# Load your pre-trained YOLO model (update the model path accordingly)
model = YOLO("C:\\Users\\redoks\\Documents\\skripzii\\products6k\\YOLOv9\\YOLOv9s\\batch8_lr0.01\\weights\\best.pt")  # Replace with your model file

def run_inference_on_image(image_path):
    img = cv2.imread(image_path)
    start_time = time.time()
    # Run prediction; here we use conf=0.25 as default
    results = model.predict(source=img, conf=0.489, imgsz=640)
    elapsed = time.time() - start_time
    return results, elapsed

def parse_results(results):
    """Extract bounding boxes and classes from YOLO results."""
    boxes = results[0].boxes.xyxy.cpu().numpy()  # Bounding boxes (x1, y1, x2, y2)
    scores = results[0].boxes.conf.cpu().numpy()  # Confidence scores
    classes = results[0].boxes.cls.cpu().numpy()  # Class IDs
    return boxes, scores, classes

def parse_ground_truth(label_path):
    """Parse ground truth labels from a YOLO-format label file."""
    with open(label_path, 'r') as f:
        lines = f.readlines()
    gt_boxes = []
    gt_classes = []
    for line in lines:
        parts = line.strip().split()
        cls = int(parts[0])  # Class ID
        x_center, y_center, width, height = map(float, parts[1:])
        gt_classes.append(cls)
        # Convert YOLO format (x_center, y_center, width, height) to (x1, y1, x2, y2)
        x1 = x_center - width / 2
        y1 = y_center - height / 2
        x2 = x_center + width / 2
        y2 = y_center + height / 2
        gt_boxes.append([x1, y1, x2, y2])
    return gt_boxes, gt_classes

# Define the folder containing images and labels
images_dir = 'data/test_ready/test/images'
labels_dir = 'data/test_ready/test/labels'

# Get all image files in the folder
image_files = sorted([f for f in os.listdir(images_dir) if f.endswith('.jpg') or f.endswith('.png')])
print(f"Found {len(image_files)} images in {images_dir}")

# Run inference on all images
all_inference_times = []
all_aps = []  # Store average precision for each image
for image_file in image_files:
    image_path = os.path.join(images_dir, image_file)
    label_path = os.path.join(labels_dir, os.path.splitext(image_file)[0] + '.txt')  # Corresponding label file
    # print(image_path)
    # print(label_path)
    # exit()

    # Check if the label file exists
    if not os.path.exists(label_path):
        print(f"Warning: Label file not found for {image_file}. Skipping...")
        continue

    # Run inference
    results, elapsed = run_inference_on_image(image_path)
    all_inference_times.append(elapsed)

    # Parse results and ground truth
    pred_boxes, pred_scores, pred_classes = parse_results(results)
    gt_boxes, gt_classes = parse_ground_truth(label_path)

    # Calculate mAP (for simplicity, we calculate AP per class and average them)
    aps = []
    for cls in set(gt_classes + list(pred_classes)):
        gt_binary = [1 if c == cls else 0 for c in gt_classes]
        # Filter pred_scores for predictions matching the current class:
        pred_scores_cls = [score for score, c in zip(pred_scores, pred_classes) if c == cls]
        # Also create a pred_binary that has the same length as pred_scores_cls (typically all ones, since these are only predictions for this class)
        pred_binary = [1] * len(pred_scores_cls)

        if sum(gt_binary) > 0 and len(pred_scores_cls) > 0:
            aps.append(average_precision_score(gt_binary, pred_scores_cls))


    # Print results
    print(f"Image {image_file}: Inference time: {elapsed*1000:.2f} ms")
    print(f"Results: {results}")  # You can process results further if needed

# Calculate and display average inference time and mAP
if all_inference_times:
    avg_time = sum(all_inference_times) / len(all_inference_times)
    print(f"Average inference time per image: {avg_time*1000:.2f} ms")

if all_aps:
    mean_ap = sum(all_aps) / len(all_aps)
    print(f"Mean Average Precision (mAP): {mean_ap:.4f}")


# 2nd way to inference

In [24]:
from ultralytics import YOLO
import time

# === Load your trained model ===
model = YOLO("C:\\Users\\redoks\\Documents\\skripzii\\products6k\\YOLOv9\\YOLOv9s\\batch8_lr0.01\\weights\\best.pt")  # Replace with your model file

# === Start timer for full validation phase ===
start_time = time.time()

# === Run validation using the test set ===
results = model.val(
    data=r'C:\Users\redoks\Documents\skripzii\data\test_ready2\data.yaml',
    split='test',       # Force it to use the test split
    conf=0.489,           # Confidence threshold
    iou=0.5,             # IoU threshold
    imgsz=640,           # Image size (default from YOLO config)
    # max_det=300,         # Max detections per image
    device='cpu',
    cache=False,
)

# === End timer ===
end_time = time.time()

# === Print relevant metrics ===
print("\n=== Evaluation Results ===")
print(f"mAP@0.5:        {results.box.map50:.4f}")
print(f"mAP@0.5:0.95:   {results.box.map:.4f}")
print(f"Inference Time: {results.speed['inference']:.2f} ms/image")
print(f"Total Time:     {end_time - start_time:.2f} seconds")


Ultralytics 8.3.107  Python-3.12.0 torch-2.6.0+cpu CPU (Intel Core(TM) i5-8265U 1.60GHz)
YOLOv9s summary (fused): 197 layers, 7,171,732 parameters, 0 gradients, 26.8 GFLOPs


[34m[1mval: [0mScanning C:\Users\redoks\Documents\skripzii\data\test_ready2\test\labels... 56 images, 0 backgrounds, 0 corrupt: 100%|██████████| 56/56 [00:00<00:00, 468.78it/s]

[34m[1mval: [0mNew cache created: C:\Users\redoks\Documents\skripzii\data\test_ready2\test\labels.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:13<00:00,  3.39s/it]


                   all         56        533      0.535      0.233      0.396      0.181
               Bicycle         12         21      0.833      0.238      0.555      0.108
                  Boat          7         12        0.4      0.167      0.284       0.16
                Bottle         14         26        0.4     0.0769      0.215     0.0648
                   Bus          3          3      0.333      0.333      0.446      0.223
                   Car          8         14        0.4      0.286      0.414      0.231
                   Cat          9         23      0.714      0.217      0.467      0.214
                 Chair         19        119      0.614      0.294      0.431      0.249
                   Cup          6        114          0          0          0          0
                   Dog          6         12        0.6       0.25      0.473       0.24
             Motorbike          6         18      0.714      0.278      0.489      0.208
                Peopl

# 3rd way inference

In [None]:
from ultralytics import YOLO
import torch
import time
from pathlib import Path
from tqdm import tqdm
from ultralytics.utils.metrics import ConfusionMatrix
from ultralytics.utils.ops import xywh2xyxy

# === Load full model (trained with 12 classes) ===
model = YOLO("C:\\Users\\redoks\\Documents\\skripzii\\products6k\\YOLOv9\\YOLOv9s\\batch8_lr0.01\\weights\\best.pt")  # Replace with your model file

# === Define only test classes to evaluate ===
selected_classes = [0, 4, 9, 10]  # Adjust as needed
class_names = model.names

# === Load test images and labels ===
test_img_dir = Path("C:/Users/redoks/Documents/skripzii/data/test_ready/test/images")
test_label_dir = Path("C:/Users/redoks/Documents/skripzii/data/test_ready/test/labels")

image_paths = list(test_img_dir.glob("*.jpg"))

confmat = ConfusionMatrix(nc=len(class_names))  # full 12 classes
start_time = time.time()

# === Inference & Evaluation ===
for img_path in tqdm(image_paths, desc="Running inference"):
    result = model(img_path, conf=0.489, iou=0.5, imgsz=640, device='cpu', cache=False, verbose=False)[0]
    preds = result.boxes

    # Load and filter GT
    label_path = test_label_dir / (img_path.stem + ".txt")
    if not label_path.exists():
        continue

    gt_raw = []
    with open(label_path) as f:
        for line in f:
            parts = list(map(float, line.strip().split()))
            cls_id = int(parts[0])
            if cls_id in selected_classes:
                gt_raw.append(parts)

    if not gt_raw:
        continue

    gt_raw = torch.tensor(gt_raw)
    gt_cls = gt_raw[:, 0]
    gt_boxes_xyxy = xywh2xyxy(gt_raw[:, 1:])  # convert normalized xywh to xyxy

    # Filter prediction by selected classes
    keep = [i for i, cls in enumerate(preds.cls) if int(cls.item()) in selected_classes]
    if keep:
        cls_pred = preds.cls[keep].unsqueeze(1)
        boxes_pred = preds.xyxy[keep]
        conf_pred = preds.conf[keep].unsqueeze(1)
        pred_combined = torch.cat([cls_pred, boxes_pred, conf_pred], dim=1).cpu()
    else:
        pred_combined = torch.zeros((0, 6))

    # === Use separate GT boxes and GT classes ===
    confmat.process_batch(pred_combined, gt_boxes_xyxy, gt_cls)

# === Compute results manually ===
end_time = time.time()
confmat_matrix = confmat.matrix  # [num_classes x num_classes] confusion matrix

# Optionally print matrix
print("\nConfusion Matrix:")
# print(confmat_matrix)

# === Calculate simple stats (Precision, Recall, etc) ===
tp = confmat_matrix.diagonal()
fp = confmat_matrix.sum(0) - tp
fn = confmat_matrix.sum(1) - tp
precision = tp / (tp + fp + 1e-9)
recall = tp / (tp + fn + 1e-9)

print("\n=== Evaluation on Classes [0, 4, 9, 10] ===")
for i in selected_classes:
    print(f"Class {i} ({class_names[i]}): Precision {precision[i]:.4f}, Recall {recall[i]:.4f}")
print(f"Inference Time: {(end_time - start_time)/len(image_paths)*1000:.2f} ms/image")
print(f"Total Time:     {end_time - start_time:.2f} seconds")


Running inference: 100%|██████████| 18/18 [00:05<00:00,  3.06it/s]


Confusion Matrix:
[[          0           0           0           0           0           0           0           0           0           0           0           0          37]
 [          0           0           0           0           0           0           0           0           0           0           0           0           0]
 [          0           0           0           0           0           0           0           0           0           0           0           0           0]
 [          0           0           0           0           0           0           0           0           0           0           0           0           0]
 [          0           0           0           0           0           0           0           0           0           0           0           0           0]
 [          0           0           0           0           0           0           0           0           0           0           0           0           0]
 [          0           0  




# terminal inference

In [19]:
from ultralytics import YOLO
import time
from pathlib import Path
from tqdm import tqdm

# === Load full model (trained with 12 classes) ===
model_path = "C:\\Users\\redoks\\Documents\\skripzii\\products6k\\YOLOv9\\YOLOv9s\\batch8_lr0.01\\weights\\best.pt"
model = YOLO(model_path)

# === Path to your updated data.yaml (should contain nc: 12 and full names list) ===
data_yaml = "C:/Users/redoks/Documents/skripzii/data/test_ready/data.yaml"

# === Measure Inference Time over Test Images ===
test_img_dir = Path("C:/Users/redoks/Documents/skripzii/data/test_ready/test/images")
image_paths = list(test_img_dir.glob("*.jpg"))

inference_times = []
for img_path in tqdm(image_paths, desc="Running inference"):
    start = time.time()
    _ = model(img_path, conf=0.489, iou=0.5, imgsz=640, device='cpu', cache=False, verbose=False)
    inference_times.append(time.time() - start)

avg_time_ms = (sum(inference_times) / len(inference_times)) * 1000
print(f"Average Inference Time: {avg_time_ms:.2f} ms/image")

# === Compute mAP Metrics via Built-In Evaluation ===
# This runs the evaluation on the 12-class setup as described in your data.yaml.
val_results = model.val(data=data_yaml, split="test", iou=0.5, verbose=True)

# Extract and display the mAP metrics.
print("\n=== mAP Metrics ===")
print(f"mAP@0.5:        {val_results.box.map50:.4f}")
print(f"mAP@0.5:0.95:   {val_results.box.map:.4f}")


Running inference: 100%|██████████| 18/18 [00:06<00:00,  2.80it/s]

Average Inference Time: 351.20 ms/image
Ultralytics 8.3.107  Python-3.12.0 torch-2.6.0+cpu CPU (Intel Core(TM) i5-8265U 1.60GHz)



[34m[1mval: [0mScanning C:\Users\redoks\Documents\skripzii\data\test_ready\test\labels.cache... 18 images, 0 backgrounds, 0 corrupt: 100%|██████████| 18/18 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:04<00:00,  2.30s/it]


                   all         18        104      0.917      0.131      0.148     0.0273
               Bicycle         12         21      0.669      0.524      0.491     0.0991
                  Boat          6         12          1          0     0.0997    0.00997
                Bottle          6         18          1          0          0          0
                   Bus         11         53          1          0          0          0
Speed: 2.1ms preprocess, 228.9ms inference, 0.0ms loss, 1.6ms postprocess per image
Results saved to [1mruns\detect\val9[0m

=== mAP Metrics ===
mAP@0.5:        0.1477
mAP@0.5:0.95:   0.0273
