In [2]:
import pandas as pd
import cv2
import imutils
import json 
import urllib.request

## Get XML File

In [3]:
url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml"
response = urllib.request.urlopen(url)
xml_bytes = response.read()

# Load the classifier
xml_string = xml_bytes.decode('utf-8')
fs = cv2.FileStorage(xml_string, cv2.FILE_STORAGE_READ | cv2.FILE_STORAGE_MEMORY)

## Detect Face

In [4]:
# Function to extract frames
def extract_faces(filepath):
  # Path to video file
  vid_obj = cv2.VideoCapture(filepath)

  rows = []

  frame_count = 0

  # Load face detector
  detector = cv2.CascadeClassifier()
  detector.read(fs.getFirstTopLevelNode())

  while True:
      # Read the next video object
      success, image = vid_obj.read()

      if not success: 
        break
      
      # Resize image and convert it to grayscale
      image = imutils.resize(image, width=500)
      gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

      # detect faces in image using the haar cascade face detector
      rects = detector.detectMultiScale(image=gray, 
                                        scaleFactor=1.1, 
                                        minNeighbors=9, 
                                        minSize=(30, 30), 
                                        flags=cv2.CASCADE_SCALE_IMAGE)

      #extract x,y,w,h of each rect and store in dataframe
      for (x, y, w, h) in rects:
        row = {'frame': frame_count, 'x': x, 'y': y, 'w': w, 'h': h}
        rows.append(row)

      frame_count += 1
  return 0, 0, 0, 0, 0, pd.DataFrame(rows)

In [5]:
video_path = 'INSERT_PATH'
a, b, c, d, e, video_df = extract_faces(video_path)
video_df.head(3)

Unnamed: 0,frame,x,y,w,h
0,0,196,351,150,150
1,1,194,350,153,153
2,2,192,349,155,155


## Fit the bounding boxes

In [6]:
video_obj = cv2.VideoCapture(video_path)
fps = video_obj.get(cv2.CAP_PROP_FPS)
scale_factor = int(video_obj.get(cv2.CAP_PROP_FRAME_WIDTH)) / 500

for index, row in video_df.iterrows():
    # read the frame from the video
    video_obj.set(cv2.CAP_PROP_POS_FRAMES, row['frame'])
    ret, frame = video_obj.read()
    
    x, y, w, h = int(row['x'] * scale_factor), int(row['y'] * scale_factor), int(row['w'] * scale_factor), int(row['h'] * scale_factor)
    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    cv2.imshow("Frame with bounding box", frame)
    cv2.waitKey(1)

# Release the video object and close all windows
video_obj.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

: 

## JSON Converter

In [55]:
import json 
from datetime import datetime

def json_converter(filename, video_metadata):
    # Get today's date
    creation_date = datetime.now().strftime("%Y-%m-%d")

    # Convert video_metadata into appropriate format
    start_time, end_time, start_frame, end_frame, fps, video_df = video_metadata

    rows_dict = {}
    for index, row in video_df.iterrows():
        key = "FrameNumber" + str(row['frame'])
        values = {"FrameCoordinates": [str(row['x']), str(row['y']), str(row['w']), str(row['h'])]}
        rows_dict[key] = values

    json_obj = {
        "VideoInformation": filename,
        "CreationDate": creation_date,
        "VideoMetadata": {
            "StartTime": start_time,
            "EndTime": end_time,
            "StartFrame": start_frame,
            "EndFrame": end_frame,
            "Fps": fps,
            "FrameData": rows_dict
        }
    }

    return json.dumps(json_obj)

## Main Function 

In [None]:
def face_detector(filename, filepath, start_time=0, end_time=None, start_frame=0, end_frame=None, fps=None):
    video_metadata = extract_faces(filepath, start_time, end_time, start_frame, end_frame, fps)
    return json_converter(filename, video_metadata)