# Extract files

In [None]:
import tarfile

# Specify the path to your .tar.xz file
file_path = "/content/challenge_videos.tar.xz"

# Open the .tar.xz file for reading
with tarfile.open(file_path, "r:xz") as tar:
    # Extract all contents of the archive to the current working directory
    tar.extractall()

print(f"Successfully decompressed '{file_path}'")


In [None]:
!pip install ultralytics

# Create the detector object for evaluation

In [6]:
import cv2
import time
import torch
import os
import torchvision
from torchvision.models.detection import SSD300_VGG16_Weights
import torchvision.transforms as transforms
from ultralytics import YOLO
from tqdm import tqdm

class ObjectDetector:
    def __init__(self, model):
        # Initialize model
          self.model = model


    def predict(self, image):
        # Perform inference
        results = self.model(image, verbose=False)
        return results#.xyxy[0].cpu().numpy()  # Assuming single image inference

# Function to evaluate model speed on a video
def evaluate_model_speed(video_path, object_detector):
    # Open video file
    cap = cv2.VideoCapture(video_path)

    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(frame_count)
    # Initialize object detector
    detector = object_detector

    # Process each frame and measure inference time
    total_time = 0
    transform = transforms.Compose([transforms.ToTensor(),])
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    for _ in range(frame_count):
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (640, 640))

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image_tensor = torch.from_numpy(rgb_frame).float()

        # transform the image to tensor
        image = transform(rgb_frame).to(device)
        # add a batch dimension
        image = image.unsqueeze(0)


        start_time = time.time()
        detections = detector.predict(image)
        end_time = time.time()

        inference_time = end_time - start_time
        total_time += inference_time

    # Calculate average inference time per frame and FPS
    avg_inference_time = total_time / frame_count
    avg_fps = 1 / avg_inference_time

    print(f"Video: {video_path}")
    print(f"Average Inference Time per Frame: {avg_inference_time:.4f} seconds")
    print(f"Average FPS: {avg_fps:.2f}")
    print()

    # Release video capture object
    cap.release()
    return [avg_inference_time, avg_fps]



# Comparison of both models of YOLOv5 and v8

In [None]:
# Paths to videos
video_paths = os.listdir("challenge_videos")
# video_paths = ['video_1.mp4']

models = {}
# Load YOLOv5 model
models['yolov5'] = YOLO('yolov5su.pt')

# # # Load YOLOv8 model
models['yolov8'] = YOLO('yolov8s.pt')

# Load SSD model
# models['ssd'] = torchvision.models.detection.ssd300_vgg16(weights=SSD300_VGG16_Weights.COCO_V1, threshold=0.5)


results = {}
for model in models.keys():
  print(f"Inference with {model}")
  detector = ObjectDetector(models[model])
  inference = []
  avg_fps = []
  # Evaluate model speed on each video
  for video_path in tqdm(video_paths):
      path = f'challenge_videos/{video_path}'
      print(f'Inference for {video_path}')
      inf, fps = evaluate_model_speed(path, detector)
      inference.append(inf)
      avg_fps.append(fps)
  results[model] = [inference, avg_fps]


In [None]:
import numpy as np
print(f'Inference average time for test videos using YOLOv5: ', np.mean(results['yolov5'][0][:12]))
print(f'Inference average time for test videos using YOLOv8: ', np.mean(results['yolov8'][0][12:]))
print(f'FPS for test videos using YOLOv5: ', np.mean(results['yolov5'][1][:12]))
print(f'FPS for test videos using YOLOv8: ', np.mean(results['yolov8'][1][12:]))