In [1]:
!python -m pip install  torchvision pillow numpy opencv-python loguru pydantic pandas

[0m

In [3]:
import time

import torch
from torchvision.models.detection import RetinaNet, retinanet_resnet50_fpn_v2
from pydantic import BaseModel
from typing import Protocol, TypeAlias, TypedDict
from numpy import ndarray
from loguru import logger

from utils.video import read_video, read_vid_batch


In [4]:
weights = "DEFAULT"
backend = "inductor"
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [5]:
model = retinanet_resnet50_fpn_v2(weights=weights).to(device)
model = torch.compile(model, backend=backend)
model.eval()

OptimizedModule(
  (_orig_mod): RetinaNet(
    (backbone): BackboneWithFPN(
      (body): IntermediateLayerGetter(
        (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (layer1): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn3): BatchNorm2d(256, eps=1e-05,

In [6]:
from utils.protocols import Detection


FrameId: TypeAlias = int
BoundingBox: TypeAlias = list[float]

SCORE_THRESHOLD: float = 0.5
PERSON_LABEL: int = 1
    


def pre_process_image(image_array: ndarray) -> torch.Tensor:
    image = image_array.transpose((-1, 0, 1))
    image = torch.from_numpy(image)
    image = image.to(device)
    image = image / 255
    image = image.unsqueeze(axis=0)
    return image



    return results

def detect(image_array: ndarray) -> list[Detection]:
    image = pre_process_image(image_array)
    with torch.no_grad():
        detections = model(image)
        detections = convert_model_detection(detections[0])
        #detections = post_process_detections(detections)
        return detections



In [7]:

from utils.protocols import Retina
from datetime import datetime
from utils.utils import convert_model_detection, get_gpu_name, get_file_name
import torch

MODEL="retina-net"
BASE_DIR="experiments/retina"

def process_video(vid_file, frames=[]):
    start_time = datetime.now().isoformat()
    start = time.time()
    results = {}
    for frame_id, frame in read_video(vid_file):
        results[frame_id] = detect(frame)
    end = time.time()
    end_time = datetime.now().isoformat()
    n_frames = frame_id+1 if not frames else len(frames)

    exp = Retina(
        model="retinanet",
        gpu=get_gpu_name(),
        filename=get_file_name(BASE_DIR, start, MODEL, vid_file),
        file=vid_file,
        frames=None if not frames else frames,
        n_frames=n_frames,
        processing_time=end-start,
        fps=n_frames/(end-start),
        data=results,
        start_time=start_time,
        end_time=end_time
    )
    exp.save()
    exp.log()
    torch.cuda.empty_cache()
    return exp


In [8]:
results = []
for i in range(5):
    vid_hd = process_video("data/720.mp4")
    results.append(vid_hd)
    vid_fhd = process_video("data/1080.mp4")
    results.append(vid_fhd)

[32m2024-06-11 12:40:03.664[0m | [1mINFO    [0m | [36mutils.protocols[0m:[36mlog[0m:[36m64[0m - [1mdata/720.mp4 | frames=283 | delta=40.57912349700928 | fps=6.974029392745691[0m
[32m2024-06-11 12:40:33.349[0m | [1mINFO    [0m | [36mutils.protocols[0m:[36mlog[0m:[36m64[0m - [1mdata/1080.mp4 | frames=283 | delta=28.730128526687622 | fps=9.850286598513447[0m
[32m2024-06-11 12:40:43.633[0m | [1mINFO    [0m | [36mutils.protocols[0m:[36mlog[0m:[36m64[0m - [1mdata/720.mp4 | frames=283 | delta=9.305566310882568 | fps=30.41190514853893[0m
[32m2024-06-11 12:40:51.074[0m | [1mINFO    [0m | [36mutils.protocols[0m:[36mlog[0m:[36m64[0m - [1mdata/1080.mp4 | frames=283 | delta=6.501656532287598 | fps=43.527368539787645[0m
[32m2024-06-11 12:40:58.398[0m | [1mINFO    [0m | [36mutils.protocols[0m:[36mlog[0m:[36m64[0m - [1mdata/720.mp4 | frames=283 | delta=6.348702430725098 | fps=44.57603787356561[0m
[32m2024-06-11 12:41:05.949[0m | [1mINFO   

In [9]:
columns = results[0].columns
rows = [result.row for result in results]

In [10]:
import pandas as pd

df = pd.DataFrame(rows, columns=columns)
df.head(100)

Unnamed: 0,model,gpu,file,batch_size,n_frames,processing_time,fps,start_time,end_time,result_file,data
0,retinanet,NVIDIA L40,data/720.mp4,1,283,40.579123,6.974029,2024-06-11T12:39:22.068273,2024-06-11T12:40:02.647409,experiments/retina/17181095620682838-retina-ne...,"{0: [box=[419.4405517578125, 96.95218658447266..."
1,retinanet,NVIDIA L40,data/1080.mp4,1,283,28.730129,9.850287,2024-06-11T12:40:03.671040,2024-06-11T12:40:32.401185,experiments/retina/17181096036710525-retina-ne...,"{0: [box=[638.3297729492188, 142.3682403564453..."
2,retinanet,NVIDIA L40,data/720.mp4,1,283,9.305566,30.411905,2024-06-11T12:40:33.356546,2024-06-11T12:40:42.662134,experiments/retina/17181096333565607-retina-ne...,"{0: [box=[419.4794921875, 96.98332214355469, 6..."
3,retinanet,NVIDIA L40,data/1080.mp4,1,283,6.501657,43.527369,2024-06-11T12:40:43.640390,2024-06-11T12:40:50.142067,experiments/retina/17181096436404006-retina-ne...,"{0: [box=[638.3297729492188, 142.3682556152343..."
4,retinanet,NVIDIA L40,data/720.mp4,1,283,6.348702,44.576038,2024-06-11T12:40:51.082059,2024-06-11T12:40:57.430781,experiments/retina/17181096510820744-retina-ne...,"{0: [box=[419.4794921875, 96.98332214355469, 6..."
5,retinanet,NVIDIA L40,data/1080.mp4,1,283,6.601366,42.869918,2024-06-11T12:40:58.405432,2024-06-11T12:41:05.006814,experiments/retina/17181096584054437-retina-ne...,"{0: [box=[638.3297729492188, 142.3682556152343..."
6,retinanet,NVIDIA L40,data/720.mp4,1,283,6.51129,43.462972,2024-06-11T12:41:05.955898,2024-06-11T12:41:12.467209,experiments/retina/171810966595591-retina-net-...,"{0: [box=[419.4794921875, 96.98332214355469, 6..."
7,retinanet,NVIDIA L40,data/1080.mp4,1,283,6.058604,46.710432,2024-06-11T12:41:13.441636,2024-06-11T12:41:19.500263,experiments/retina/17181096734416528-retina-ne...,"{0: [box=[638.3297729492188, 142.3682556152343..."
8,retinanet,NVIDIA L40,data/720.mp4,1,283,6.545049,43.23879,2024-06-11T12:41:20.445067,2024-06-11T12:41:26.990136,experiments/retina/17181096804450834-retina-ne...,"{0: [box=[419.4794921875, 96.98332214355469, 6..."
9,retinanet,NVIDIA L40,data/1080.mp4,1,283,6.188423,45.730551,2024-06-11T12:41:28.006594,2024-06-11T12:41:34.195039,experiments/retina/17181096880066106-retina-ne...,"{0: [box=[638.3297729492188, 142.3682556152343..."


In [11]:
from datetime import datetime
now = datetime.now()
csv_file = f"__{MODEL}-{get_gpu_name().replace(' ','_')}-{now.day}-{now.hour}-{now.min}.csv"
df.to_csv(csv_file)