# Baseline model implication- using the pretrained YOLOv8-small

This notebook demonstrates video loading, frame extraction, these are given to the YOLOv8 and processed videos are outputted marking the cricket ball, as per detected by the model

Installing open-cv for the image and video processing

In [1]:
!pip install opencv-python



creating a dataframe containing the basic details of all the videos

In [8]:
import cv2
import pandas as pd 

df = pd.DataFrame()

for i in range(1,15):
    video_path = f"../cricket-ball-tracker/data/raw_videos/{i}.mp4"

    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error: Could not open video.")
        exit()

    # Fetch properties
    width  = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    fps    = cap.get(cv2.CAP_PROP_FPS)
    frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    duration = frames / fps if fps > 0 else 0
    new_row = pd.DataFrame({"width":width, "height":height, "fps":fps, "frame_count":frames, "duration_sec":duration}, index=[0])
    df = pd.concat([df, new_row], ignore_index=True)


### defining a function to preprocess the videos and convert them to same height, width and fps

In [None]:
import cv2

def preprocess_video(input_path, output_path, target_width=1920, target_height=1080, target_fps=40):
    cap = cv2.VideoCapture(input_path)

    # Use target FPS
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, target_fps, (target_width, target_height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Resize to target resolution
        frame = cv2.resize(frame, (target_width, target_height))

        out.write(frame)

    cap.release()
    out.release()
    print("Preprocessing complete! Saved to:", output_path)




### defining a function to preprocess the videos and convert them to same height, width and fps

In [13]:
for i in range(1, 15):
    preprocess_video(f"../cricket-ball-tracker/data/raw_videos/{i}.mp4", f"../cricket-ball-tracker/data/processed_videos/{i}.mp4", target_width=1920, target_height=1080, target_fps=40)

Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/1.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/2.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/3.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/4.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/5.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/6.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/7.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/8.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/9.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/10.mp4
Preprocessing complete! Saved to: ../cricket-ball-tracker/data/processed_videos/11.mp4
Preprocessing complete! Saved to: ../cricket-ball-tr

creating the dataframe of the processed videos, showing now the equal dimensions

In [None]:
df_processed = pd.DataFrame()
for i in range(1,15):
    video_path = f"../cricket-ball-tracker/data/processed_videos/{i}.mp4"

    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error: Could not open video.")
        exit()

    # Fetch properties
    width  = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    fps    = cap.get(cv2.CAP_PROP_FPS)
    frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    duration = frames / fps if fps > 0 else 0
    new_row = pd.DataFrame({"width":width, "height":height, "fps":fps, "frame_count":frames, "duration_sec":duration}, index=[0])
    df_processed = pd.concat([df_processed, new_row], ignore_index=True)

In [15]:
df

Unnamed: 0,width,height,fps,frame_count,duration_sec
0,1920.0,1080.0,25.0,31.0,1.24
1,2560.0,1600.0,57.077922,293.0,5.133333
2,2560.0,1600.0,56.956522,262.0,4.6
3,2560.0,1600.0,56.382979,265.0,4.7
4,2560.0,1600.0,57.411765,244.0,4.25
5,2560.0,1440.0,38.201058,361.0,9.45
6,2560.0,1416.0,43.404255,238.0,5.483333
7,2560.0,1440.0,42.222222,209.0,4.95
8,2560.0,1440.0,40.942928,275.0,6.716667
9,2560.0,1440.0,42.526316,202.0,4.75


In [16]:
df_processed 

Unnamed: 0,width,height,fps,frame_count,duration_sec
0,1920.0,1080.0,40.0,31.0,0.775
1,1920.0,1080.0,40.0,293.0,7.325
2,1920.0,1080.0,40.0,262.0,6.55
3,1920.0,1080.0,40.0,265.0,6.625
4,1920.0,1080.0,40.0,243.0,6.075
5,1920.0,1080.0,40.0,361.0,9.025
6,1920.0,1080.0,40.0,237.0,5.925
7,1920.0,1080.0,40.0,209.0,5.225
8,1920.0,1080.0,40.0,274.0,6.85
9,1920.0,1080.0,40.0,201.0,5.025


## Frame Extraction

In [None]:
import cv2
import os

def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    frame_index = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Save
        filename = os.path.join(output_folder, f"frame_{frame_index:06d}.jpg")
        cv2.imwrite(filename, frame)

        frame_index += 1

    cap.release()
    print("Frames extracted for {i}.mp4:", frame_index)




In [20]:
for i in range(1,15):
    extract_frames(f"../cricket-ball-tracker/data/processed_videos/{i}.mp4", f"../cricket-ball-tracker/data/frames/video_{i}")

Frames extracted: 31
Frames extracted: 293
Frames extracted: 262
Frames extracted: 265
Frames extracted: 243
Frames extracted: 361
Frames extracted: 237
Frames extracted: 209
Frames extracted: 274
Frames extracted: 201
Frames extracted: 177
Frames extracted: 138
Frames extracted: 178
Frames extracted: 196


# installing ultralytics for YOLO

In [21]:
!pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.233-py3-none-any.whl.metadata (37 kB)
Collecting matplotlib>=3.3.0 (from ultralytics)
  Downloading matplotlib-3.10.7-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting pillow>=7.1.2 (from ultralytics)
  Downloading pillow-12.0.0-cp310-cp310-win_amd64.whl.metadata (9.0 kB)
Collecting pyyaml>=5.3.1 (from ultralytics)
  Downloading pyyaml-6.0.3-cp310-cp310-win_amd64.whl.metadata (2.4 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Downloading torch-2.9.1-cp310-cp310-win_amd64.whl.metadata (30 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Downloading torchvision-0.24.1-cp310-cp310-win_amd64.whl.metadata (5.9 kB)
Collecting polars>=0.20.0 (from ultralytics)
  Downloading polars-1.35.2-py3-none-any.whl.metadata (10 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Collecting contourpy>=1.0.1 (from matplotlib>=3.3.0->ultralytics)
  Downloading con

# defining a function to detect the ball
In the YOLOv8 we have 80 classes, out of which
sports ball = class 32 in the COCO-80 list used by YOLOv8.

In [22]:
from ultralytics import YOLO
model = YOLO("yolov8s.pt")   # pre-trained model

def detect(frame):
    results = model.predict(frame, verbose=False)[0]

    bboxes = []
    confs = []

    for box in results.boxes:
        cls = int(box.cls)
        conf = float(box.conf)

        # YOLO class 32 = sports ball (COCO)
        if cls == 32 and conf > 0.25:
            x1, y1, x2, y2 = box.xyxy[0].tolist()
            bboxes.append([int(x1), int(y1), int(x2), int(y2)])
            confs.append(conf)

    return bboxes, confs


Creating new Ultralytics Settings v0.0.6 file  
View Ultralytics Settings with 'yolo settings' or at 'C:\Users\KRISHNA\AppData\Roaming\Ultralytics\settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt': 100% ━━━━━━━━━━━━ 21.5MB 5.9MB/s 3.7s3.6s<0.1s


defined a simple Tracker class

In [23]:
import numpy as np
class SimpleTracker:
    def __init__(self):
        self.next_id = 0
        self.tracks = {}  
        self.max_age = 10  

    def update(self, detections):
        assigned = {}

        if len(self.tracks) > 0 and len(detections) > 0:
            track_ids = list(self.tracks.keys())
            track_points = np.array([self.tracks[t]['centroid'] for t in track_ids])
            det_points = np.array(detections)

            # Euclidean distance
            cost = np.linalg.norm(track_points[:, None, :] - det_points[None, :, :], axis=2)

            for i in range(cost.shape[0]):
                j = np.argmin(cost[i])
                if cost[i, j] < 50:  
                    tid = track_ids[i]
                    self.tracks[tid]['centroid'] = tuple(det_points[j])
                    self.tracks[tid]['age'] = 0
                    assigned[j] = tid

        # Create new track for unassigned detections
        for i, det in enumerate(detections):
            if i not in assigned:
                tid = self.next_id
                self.next_id += 1
                self.tracks[tid] = {'centroid': det, 'age': 0}

        # Remove stale tracks
        for tid in list(self.tracks.keys()):
            self.tracks[tid]['age'] += 1
            if self.tracks[tid]['age'] > self.max_age:
                del self.tracks[tid]

        return [(tid, data['centroid']) for tid, data in self.tracks.items()]


In [24]:
import cv2
import os
import csv
import numpy as np
from collections import deque

def process_frames(
    frames_folder,
    output_video_path,
    output_csv_path,
    fps=30
):

    # Sort frames correctly
    frame_files = sorted(
        [f for f in os.listdir(frames_folder) if f.endswith(".jpg")]
    )

    if len(frame_files) == 0:
        print("ERROR: No frames found in folder:", frames_folder)
        return

    # Read first frame to get size
    first_frame = cv2.imread(os.path.join(frames_folder, frame_files[0]))
    H, W = first_frame.shape[:2]

    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (W, H))

    # Tracker + trajectory
    tracker = SimpleTracker()
    trajectory = deque(maxlen=3000)

    # CSV file
    csvfile = open(output_csv_path, "w", newline="")
    writer = csv.writer(csvfile)
    writer.writerow(["frame_index", "x_centroid", "y_centroid", "visibility_flag"])

    frame_idx = 0

    # Iterate through frames
    for fname in frame_files:

        frame_path = os.path.join(frames_folder, fname)
        frame = cv2.imread(frame_path)

        # Detect
        bboxes, confs = detect(frame)

        centroids = []
        for (x1, y1, x2, y2) in bboxes:
            cx = int((x1 + x2) / 2)
            cy = int((y1 + y2) / 2)
            centroids.append((cx, cy))

        # Update tracker
        tracks = tracker.update(centroids)

        # Write CSV
        if len(centroids) == 0:
            writer.writerow([frame_idx, 0, 0, 0])
        else:
            cx, cy = centroids[0]
            writer.writerow([frame_idx, cx, cy, 1])
            trajectory.append((cx, cy))

        # Draw detections
        for tid, (cx, cy) in tracks:
            cv2.circle(frame, (cx, cy), 8, (0, 0, 255), -1)
            cv2.putText(frame, f"{tid}", (cx + 10, cy),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)

        # Draw trajectory
        for i in range(1, len(trajectory)):
            cv2.line(frame, trajectory[i-1], trajectory[i], (0, 255, 0), 2)

        out.write(frame)
        frame_idx += 1

    csvfile.close()
    out.release()

    print("Processing complete!")
    print("Video saved at:", output_video_path)
    print("CSV saved at:", output_csv_path)


this cell outputs out the videos

In [None]:
for i in range(1, 15):
    process_frames(
            frames_folder=f"../cricket-ball-tracker/data/frames/video_{i}",
            output_video_path=f"../cricket-ball-tracker/data/output_videos/output_{i}.mp4",
            output_csv_path=f"../cricket-ball-tracker/data/output_csvs/output_{i}.csv",
            fps=30
        )

### Explanation
This cell performs the following operations:

(Explain here based on context.)