In [10]:
import cv2 
import time
import torch
import tqdm 
from custom_utils import * 
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F

device = torch.device('cuda:4' if torch.cuda.is_available() else 'cpu')

video_name = 'after_detection'
input_video = '/home/yoojinoh/Others/PR/PedDetect-Data/2954065-hd_1920_1080_30fps.mp4'
output_video = f'/home/yoojinoh/Others/PR/PedDetect-Data/{video_name}'

In [11]:
def build_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
    return model

def load_model(checkpoint_path, num_classes, device):
    model = build_model(num_classes)
    model.load_state_dict(torch.load(checkpoint_path, map_location=device)['model_state_dict'])
    model.to(device)

    model.eval() # Eval mode

    return model

def process_frame(frame, model, device, iou_thresh=0.3, confidence_threshold=0.5):
    image = F.to_tensor(frame).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)[0]

    output = apply_nms(output, iou_thresh)
    output = filter_boxes_by_score(output, confidence_threshold)

    boxes = output["boxes"].cpu().numpy()
    labels = output["labels"].cpu().numpy()
    scores = output["scores"].cpu().numpy()

    return boxes, labels, scores



In [12]:
def check_label(label):
    if label == 1:
            class_name = 'person'
    else:
        class_name = ''
    return class_name

def draw_boxes_on_frame(frame, boxes, labels, scores, thr = 0.5, save_path = None):
#    image = cv2.imread(image_path) 
    for box, label, score in zip(boxes, labels, scores):
        print(score)
        if score >= thr:
            x1, y1, x2, y2 = map(int, box) # box.astype(int)
            class_name = check_label(label)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            cv2.putText(frame, f'{class_name} : {score:.2f}', (x1, y1- 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
    return frame


In [13]:
def detect_frame(model, img_frame, confidence_threshold, score_threshold):
    boxes, labels, scores = process_frame(img_frame, model, device, confidence_threshold=confidence_threshold)
    img_frame = draw_boxes_on_frame(img_frame, boxes, labels, scores, threshold=score_threshold)
    return img_frame

def detect_video(model, input_path, output_path, confidence_threshold=0.5, score_threshold=0.5):
    # Set VideoCapture, VideoWriter
    cap = cv2.VideoCapture(input_path)
    codec = cv2.VideoWriter_fourcc(*'XVID') # format of video writer
    video_size = (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) 
    video_fps = cap.get(cv2.CAP_PROP_FPS)

    video_writer = cv2.VideoWriter(output_video, codec, video_fps, video_size)
    frame_cnt = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f'Total number of frame : {frame_cnt}')

    while True:
        hasFrame, img_frame = cap.read()
        if not hasFrame:
            print(f'Processed all frames')
            break 
            
        img_frame = detect_frame(model, img_frame, confidence_threshold, score_threshold)
        video_writer.write(img_frame)
    
    video_writer.realse()
    cap.release()


In [14]:
model_path = '/home/yoojinoh/Others/PR/ATRIDA_prom5_AIproject/Pedestrian-Detection/outputs/best_fasterrcnn_e6s0.7686803706057437l0.10222071961704958.pth'
num_classes = 2 

model = load_model(model_path, num_classes, device)
detect_video(model, input_video, output_video, device)



Total number of frame : 884


[ WARN:0@973.383] global cap.cpp:643 open VIDEOIO(CV_IMAGES): raised OpenCV exception:

OpenCV(4.10.0) /io/opencv/modules/videoio/src/cap_images.cpp:430: error: (-215:Assertion failed) !filename_pattern.empty() in function 'open'




TypeError: '>' not supported between instances of 'Tensor' and 'torch.device'