In [1]:
import sys, getopt, os
import subprocess

def extract_keyframes():
    
    inFile = 'video10.mp4'
    oString = 'video10/out'
    print('Input file is ',inFile)
    print('oString is ',oString)

    outFile = oString + '%05d.png'

    cmd = ['ffmpeg','-i', inFile,'-f', 'image2','-vf', 
               "select='eq(pict_type,PICT_TYPE_I)'",'-vsync','vfr',outFile]
    print(cmd)
    subprocess.call(cmd)
extract_keyframes()

Input file is  video10.mp4
oString is  video10/out
['ffmpeg', '-i', 'video10.mp4', '-f', 'image2', '-vf', "select='eq(pict_type,PICT_TYPE_I)'", '-vsync', 'vfr', 'video10/out%05d.png']


In [2]:
"""
code adopted from https://towardsdatascience.com/implementing-real-time-object-detection-system-using-pytorch-and-opencv-70bac41148f7
"""

# import required libraries
import torch
import numpy as np
import cv2
import csv
import sys
from time import time
import glob
import pandas as pd

class ObjectDetection:
    """
    Class implements Yolo5 model to make inferences on a streaming video using Opencv2.
    """
    
    
    def __init__(self, out_file, width, height, logging_interval):
        """
        Initializes the class with the streaming video input device and output file.
        :param video_device: Has to be the input device identifier,on which prediction is made.
        :param out_file: A valid output file name.
        """
        self.model = self.load_model()
        self.classes = self.model.names
        self.out_file = out_file
        self.device = 'cpu'
        self.width = int(width)
        self.height = int(height)
        self.logging_interval = int(logging_interval)

    def get_video_from_device(self):
        """
        Creates a new video streaming object to extract video frame by frame to make prediction on.
        :return: opencv2 video capture object.
        """
#         return cv2.VideoCapture(self._video_device)
        return cv2.VideoCapture("test.mp4")

    def load_model(self):
        """
        Loads Yolo5 model from pytorch hub.
        :return: Trained Pytorch model.
        """
        model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
        return model

    def score_frame(self, frame, i):
        """
        Takes a single frame as input, and scores the frame using yolo5 model.
        :param frame: input frame in numpy/list/tuple format.
        :return: Labels and Coordinates of objects detected by model in the frame.
        """
        self.model.to(self.device)
        frame = [frame]
        results = self.model(frame)    
        outString = 'video4' + str(i) + '.xlsx'
        df = results.pandas().xyxy[0]
        df.to_excel(outString)

        labels, cord = results.xyxyn[0][:, -1].numpy(), results.xyxyn[0][:, :-1].numpy()
        return labels, cord

    def class_to_label(self, x):
        """
        For a given label value, return corresponding string label.
        :param x: numeric label
        :return: corresponding string label
        """
        return self.classes[int(x)]

    def plot_boxes(self, results, frame):
        """
        Takes a frame and its results as input, and plots the bounding boxes and label on to the frame.
        :param results: contains labels and coordinates predicted by model on the given frame.
        :param frame: Frame which has been scored.
        :return: Frame with bounding boxes and labels ploted on it.
        """
        labels, cord = results
        n = len(labels)
        x_shape, y_shape = frame.shape[1], frame.shape[0]
        for i in range(n):
            row = cord[i]
            if row[4] >= 0.2:
                x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape)
                bgr = (0, 255, 0)
                cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 2)
                cv2.putText(frame, self.class_to_label(labels[i]), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.9, bgr, 2)

        return frame, n

    def __call__(self):
        """
        This function is called when class is executed, it runs the loop to read the video frame by frame,
        and write the output into a new file.
        :return: void
        """
        print(self.device)
        filenames = glob.glob("video10/*.png")
        filenames.sort()
        images = [cv2.imread(img) for img in filenames]
       
        for i in range(len(images)):
            frame = images[i]
            results = self.score_frame(frame, i)
            frame, n = self.plot_boxes(results, frame)
            cv2.imshow('object_detect', frame) # view the annotated images
            if cv2.waitKey(10) & 0xFF == ord('q'): # close gracefully
                player.release()
                cv2.destroyWindow('object_detect')
                break


In [3]:
a = ObjectDetection("test1.avi", 720, 1024, 1)
a()

Using cache found in C:\Users\91947/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2021-11-23 torch 1.10.0 CUDA:0 (NVIDIA GeForce MX450, 2048MiB)

Fusing layers... 
Model Summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


cpu


In [18]:
import sys
import glob
import pandas as pd

file_excel = glob.glob("video4/*.xlsx")
file_excel.sort()

df2 = pd.DataFrame()
i = 0

for file in file_excel:
    df = pd.read_excel(file)
    df = df[(df.confidence > 0.6)]
    df1 = df.groupby(['name']).count()
    df1 = df1.drop( columns = ['xmin','ymin','xmax','ymax', 'confidence'])
    df1 = df1[("class")]
    arr = df1.to_dict()
    arr ['frame id'] = i
    x = pd.DataFrame([arr])
    x = x.set_index('frame id')
#     print(arr)
    
    if i == 0:
        df2 = x
    else:
        df2 = pd.concat([df2,x], axis = 0)
    i = i+1
#     print(i)
#     if i ==3:        
#     break


# print(df1)
print(df2)
    
df2.to_excel('final_video4.xlsx')  

          car
frame id     
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
...       ...
103       NaN
104       NaN
105       NaN
106       NaN
107       NaN

[108 rows x 1 columns]
