In [1]:
!python -m pip install -q torchvision
!python -m pip install -q pillow
!python -m pip install -q numpy
!python -m pip install -q pandas
!python -m pip install -q opencv-python
!python -m pip install -q loguru
!python -m pip install -q pydantic

In [5]:
import time

import torch
from torchvision.models.detection import RetinaNet, retinanet_resnet50_fpn_v2
from pydantic import BaseModel
from typing import Protocol, TypeAlias, TypedDict
from numpy import ndarray
from loguru import logger

from utils.video import read_video, read_vid_batch


In [6]:
weights = "DEFAULT"
backend = "inductor"
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [7]:
model = retinanet_resnet50_fpn_v2(weights=weights).to(device)
model = torch.compile(model, backend=backend)
model.eval()

OptimizedModule(
  (_orig_mod): RetinaNet(
    (backbone): BackboneWithFPN(
      (body): IntermediateLayerGetter(
        (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (layer1): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn3): BatchNorm2d(256, eps=1e-05,

In [8]:
from utils.protocols import Detection
from utils.utils import convert_model_detection
    

def pre_process_image(image_array: ndarray) -> torch.Tensor:
    image = image_array.transpose((-1, 0, 1))
    image = torch.from_numpy(image)
    image = image.to(device)
    image = image / 255
    image = image.unsqueeze(axis=0)
    return image


def detect(image_array: ndarray) -> list[Detection]:
    image = pre_process_image(image_array)
    with torch.no_grad():
        detections = model(image)
        detections = convert_model_detection(detections[0])
        return detections



In [9]:

from utils.protocols import Retina
from datetime import datetime
from utils.utils import convert_model_detection, get_gpu_name, get_file_name
import torch

MODEL="retina-net"
BASE_DIR="experiments/retina"

def run_retina_net(frames):
    results = {}
    for frame_id, frame in frames.items():
        with torch.no_grad():
            outputs = model(frame)
            results[frame_id] = outputs[0]
    return results

def process_video(vid_file, frames=[]):
    start_time = datetime.now()
    
    frames_ = {}
    start = time.time()
    for frame_id, frame in read_video(vid_file):
        frames_[frame_id] = pre_process_image(frame)

    pre_processing_time = time.time() - start

    n_frames = len(frames_)
    logger.info(f"nframes = {n_frames}")
    
    start = time.time()
    outputs = run_retina_net(frames_)
    inference_time = time.time() - start
    
    results = {}
    start = time.time()
    for frame_id, output in outputs.items():    
        results[frame_id] = convert_model_detection(output)
    post_processing_time = time.time() - start

    end_time = datetime.now()
    exp =  Retina(
        model=MODEL,
        gpu=get_gpu_name(),
        video_file=vid_file,
        frames=None if not frames else frames,
        n_frames=n_frames,
        
        pre_processing_time=pre_processing_time,
        inference_time=inference_time,
        post_processing_time=post_processing_time,
        video_processing_time=(end_time-start_time).seconds,
       
        start_time=start_time.isoformat(),
        end_time=end_time.isoformat(),
        record_file=get_file_name(BASE_DIR, start, MODEL, vid_file),
        
        data=results,        
    )
    exp.save()
    exp.log()
    torch.cuda.empty_cache()
    return exp


In [10]:
results = []
for i in range(5):
    vid_hd = process_video("data/720.mp4")
    results.append(vid_hd)
    vid_fhd = process_video("data/1080.mp4")
    results.append(vid_fhd)

[32m2024-06-12 00:53:42.439[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_video[0m:[36m30[0m - [1mnframes = 3[0m
[32m2024-06-12 00:54:23.885[0m | [1mINFO    [0m | [36mutils.protocols[0m:[36mlog[0m:[36m113[0m - [1mdata/720.mp4 | frames=3 | model_fps=0.07241458026752826 | inference_time=41.4281210899353 | preprocess_time=1.323056936264038[0m
[32m2024-06-12 00:54:27.090[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_video[0m:[36m30[0m - [1mnframes = 3[0m
[32m2024-06-12 00:55:07.393[0m | [1mINFO    [0m | [36mutils.protocols[0m:[36mlog[0m:[36m113[0m - [1mdata/1080.mp4 | frames=3 | model_fps=0.07446268300776261 | inference_time=40.288636922836304 | preprocess_time=2.794055938720703[0m
[32m2024-06-12 00:55:08.389[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_video[0m:[36m30[0m - [1mnframes = 3[0m
[32m2024-06-12 00:55:11.728[0m | [1mINFO    [0m | [36mutils.protocols[0m:[36mlog[0m:[36m113[0m - [1mdata/720.mp4 | fr

In [11]:
columns = results[0].columns
rows = [result.row for result in results]

In [12]:
import pandas as pd
df = pd.DataFrame(rows, columns=columns)
df.head(100)

Unnamed: 0,model,gpu,video_file,batch_size,n_frames,pre_processing_fps,inference_fps,post_processing_fps,video_fps,pre_processing_time,inference_time,post_processing_time,video_processing_time,start_time,end_time,record_file,data
0,retina-net,cpu,data/720.mp4,1,3,2.267476,0.072415,588.729331,0.071429,1.323057,41.428121,0.005096,42,2024-06-12T00:53:40.929685,2024-06-12T00:54:23.875040,experiments/retina/exp-retina-net-cpu-720.mp4-...,"{0: [box=[419.129150390625, 96.44957733154297,..."
1,retina-net,cpu,data/1080.mp4,1,3,1.073708,0.074463,958.478976,0.069767,2.794056,40.288637,0.00313,43,2024-06-12T00:54:23.887274,2024-06-12T00:55:07.383764,experiments/retina/exp-retina-net-cpu-1080.mp4...,"{0: [box=[640.0203857421875, 146.5866394042968..."
2,retina-net,cpu,data/720.mp4,1,3,3.276616,0.901504,1161.320904,0.75,0.915579,3.327771,0.002583,4,2024-06-12T00:55:07.397182,2024-06-12T00:55:11.721005,experiments/retina/exp-retina-net-cpu-720.mp4-...,"{0: [box=[419.1293029785156, 96.44957733154297..."
3,retina-net,cpu,data/1080.mp4,1,3,1.158893,0.877508,1012.464757,0.5,2.588678,3.418774,0.002963,6,2024-06-12T00:55:11.730021,2024-06-12T00:55:18.163033,experiments/retina/exp-retina-net-cpu-1080.mp4...,"{0: [box=[640.0203857421875, 146.5866394042968..."
4,retina-net,cpu,data/720.mp4,1,3,3.45147,0.903409,1221.997863,0.75,0.869195,3.320754,0.002455,4,2024-06-12T00:55:18.172756,2024-06-12T00:55:22.413838,experiments/retina/exp-retina-net-cpu-720.mp4-...,"{0: [box=[419.1293029785156, 96.44957733154297..."
5,retina-net,cpu,data/1080.mp4,1,3,1.138395,0.867528,1055.967774,0.5,2.635288,3.4581,0.002841,6,2024-06-12T00:55:22.422790,2024-06-12T00:55:28.942555,experiments/retina/exp-retina-net-cpu-1080.mp4...,"{0: [box=[640.0203857421875, 146.5866394042968..."
6,retina-net,cpu,data/720.mp4,1,3,3.445143,0.890307,1216.092781,0.75,0.870791,3.369626,0.002467,4,2024-06-12T00:55:28.951853,2024-06-12T00:55:33.242124,experiments/retina/exp-retina-net-cpu-720.mp4-...,"{0: [box=[419.1293029785156, 96.44957733154297..."
7,retina-net,cpu,data/1080.mp4,1,3,1.184184,0.892731,989.767325,0.5,2.533391,3.360475,0.003031,6,2024-06-12T00:55:33.251127,2024-06-12T00:55:39.554939,experiments/retina/exp-retina-net-cpu-1080.mp4...,"{0: [box=[640.0203857421875, 146.5866394042968..."
8,retina-net,cpu,data/720.mp4,1,3,3.496369,0.831782,1192.806143,0.75,0.858033,3.606714,0.002515,4,2024-06-12T00:55:39.564561,2024-06-12T00:55:44.105620,experiments/retina/exp-retina-net-cpu-720.mp4-...,"{0: [box=[419.1293029785156, 96.44957733154297..."
9,retina-net,cpu,data/1080.mp4,1,3,1.163623,0.720034,1314.279507,0.428571,2.578154,4.166468,0.002283,7,2024-06-12T00:55:44.166896,2024-06-12T00:55:51.331281,experiments/retina/exp-retina-net-cpu-1080.mp4...,"{0: [box=[640.0203857421875, 146.5866394042968..."


In [15]:
from datetime import datetime
now = datetime.now()
csv_file = f"zz-{MODEL}-{get_gpu_name().replace(' ','_')}-{now.day}-{now.hour}-{now.minute}.csv"
df.to_csv(csv_file)