<a href="https://colab.research.google.com/github/peterfo/jetson-surveillance-processor/blob/master/Lab2_Jetson_Nano_v3_darknet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Prepare the environment

In [0]:
# Define constants
USE_GPU = True

# Control if we do face detection and verification or not
DO_FACE_DETECTION_VERIFICATION = False

# Used for Object Detection
#USE_RETINANET = 0
#USE_YOLOV3 = 1
#USE_TINYYOLOV3 = 2
# Flag to control which model we use
#MODEL_TO_USE = USE_RETINANET
# Flag to control which resulting framerate we want
RESULTING_FRAMERATE = 15
# Control the threshold for Object Detection
OBJDET_MIN_PERCENTAGE_PROBABILITY = 30
#OBJDET_JETSON_MODEL_TO_USE = "ssd-inception-v2"
OBJDET_JETSON_MODEL_TO_USE = "ssd-mobilenet-v2"
OBJDET_VIDEO_FRAME_SKIP_RATE = 3
OBJDET_OVERLAY_FLAG = "--overlay=box,labels,conf"
OBJDET_TEMP_IMAGE_FILENAME = "/tmp/ramdisk/temp_frame_img.jpg"

DARKNET_PATH = "../git/darknet"
DARKNET_CFG = "cfg/yolov3.cfg"
DARKNET_WEIGHTS = "yolov3.weights"
DARKNET_META = "cfg/coco.data"

# Control the skip rate for face detection in videos
FACEDET_FRAME_SKIP_RATE = 5   # We look at every 5th frame. With FPS=15 this means 
                              # we will look for faces in 3 frames per second


ACCESS_RIGHTS = 0o755  # define the access rights for new created directories

# Root directories (CHANGE AS NEEDED)
#INPUT_ROOT = "./input/"            # The input file tree we will process recursively
#OUTPUT_ROOT = "./output/"          # Where we will put the predicted images & videos
#PROCESSED_ROOT = "./processed/"    # Where we will move the files after processing
PROCESSED_POS_OBJDET_SUBROOT = "pos_objdet"  # Added after PROCESSED_ROOT when object(s) detected
PROCESSED_NEG_OBJDET_SUBROOT = "neg_objdet"  # Added after PROCESSED_ROOT when object(s) not detected
PROCESSED_ERROR_SUBROOT = "error"            # Added after PROCESSED_ROOT when error occurred

# The trees we process
PROCESSING_TREES = [ ["/mnt/samba/ovak/inne_uppe/", "/mnt/samba/ovak/output/inne_uppe/", "/mnt/samba/ovak/processed/inne_uppe/"],
                     ["/mnt/samba/ovak/inne_nere/", "/mnt/samba/ovak/output/inne_nere/", "/mnt/samba/ovak/processed/inne_nere/"],
                     ["/mnt/samba/ovak/inne_garage/", "/mnt/samba/ovak/output/inne_garage/", "/mnt/samba/ovak/processed/inne_garage/"],
                     ["/mnt/samba/ovak/ute_fram/", "/mnt/samba/ovak/output/ute_fram/", "/mnt/samba/ovak/processed/ute_fram/"],
                     ["/mnt/samba/ovak/ute_bak/", "/mnt/samba/ovak/output/ute_bak/", "/mnt/samba/ovak/processed/ute_bak/"] ]

LOGFILE_NAME = "/mnt/samba/ovak/log.txt"
CSVFILE_NAME = "/mnt/samba/ovak/log.csv"

In [0]:
# Global variables

# These are used to pass info from the callback-function used for 
# Object Detection in videos
person_max_prob_in_video = 0.0
cat_max_prob_in_video = 0.0
car_max_prob_in_video = 0.0
truck_max_prob_in_video = 0.0
motorcycle_max_prob_in_video = 0.0
bicycle_max_prob_in_video = 0.0

# These will be calculated in the main flow
if DO_FACE_DETECTION_VERIFICATION:
  p_face_embedding = None
  m_face_embedding = None
  s_face_embedding = None
  l_face_embedding = None

In [0]:
# Check if we are in Colab
import sys
IN_COLAB = 'google.colab' in sys.modules
print("IN_COLAB:", IN_COLAB)

In [0]:
# Import what we need
import sys
import os
import errno
import datetime
import shutil
import time
from matplotlib import pyplot
#import urllib
#import urllib.request
import cv2
import numpy as np
#from google.colab import files

#import jetson.inference
#import jetson.utils
import darknet

if DO_FACE_DETECTION_VERIFICATION:
  import insightface
  import mxnet
  mxnet_num_gpus = mxnet.context.num_gpus()
  print("mxnet.num_gpus() =", mxnet_num_gpus)
  for i in range(mxnet_num_gpus):
    print("mxnet.gpu_memory_info(", i, ") =", mxnet.context.gpu_memory_info(i))

## Define functions

In [0]:
# Define a function to downscale an image (if needed), preserving aspect ratio
def downscale_image(img, max_x, max_y):
  x_ratio = max_x / img.shape[0]
  y_ratio = max_y / img.shape[1]
  #print("img.shape =", img.shape)
  #print("x_ratio =", x_ratio, "   y_ratio =", y_ratio)
  downscale_ratio = min(x_ratio, y_ratio)
  if downscale_ratio < 1:
    width = int(img.shape[1] * downscale_ratio)
    height = int(img.shape[0] * downscale_ratio)
    dim = (width, height)
    # resize image
    resized_img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) 
    #print("resized_img.shape =", resized_img.shape)
    return resized_img
  else:
    return img

In [0]:
# Define a function to do face detection on an image and optionally save the
# result to a new image file.
# Return the faces, landmarks detected
def detect_faces_and_save_results_img(input_image, model, output_filename = None):
  # Downscale the image if needed
  img = downscale_image(input_image, 1280, 1280)  # Max x size = 1280, max y size = 1280
  # Do face detection on input image, with original resolution and threshold 0.5.
  faces, landmarks = model.detect(img, threshold=0.5, scale=1.0)
  # Draw boxes around the detected faces and save the image as output_filename
  if output_filename is not None:
    if faces is not None:
      #For each face, we draw a rectangle
      for i in range(faces.shape[0]):
        box = faces[i].astype(np.int)
        #print("box =", box)
        cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
    cv2.imwrite(output_filename, img)
  return faces, landmarks

In [0]:
# Define a function to do face detection on an image file and optionally save the
# result to a new image file.
# Return the faces, landmarks detected
def detect_faces_and_save_results(input_filename, model, output_filename = None):
  # Read the image from file
  img = cv2.imread(input_filename)
  return detect_faces_and_save_results_img(img, model, output_filename)

In [0]:
# Define a function to copy out all detected faces from an image
# Returns the detected face images in a list
# Uses an InsightsFace utility function to align, normalize and crop the face 
# images so that they are ready for embeddings vector calculation
def get_all_faces_in_image_img(input_image, faces, landmarks):
  # Downscale the image if needed
  img = downscale_image(input_image, 1280, 1280)  # Max x size = 1280, max y size = 1280
  # Initialize an empty list
  face_images = []
  # Loop over the faces copying them out
  if faces is not None:
    #For each face, we copy it from the image
    for i in range(faces.shape[0]):
      # box = faces[i].astype(np.int)
      # face_img = img[box[1]:box[3], box[0]:box[2]]  # ROI = image[y1:y2, x1:x2]
      face_img = insightface.utils.face_align.norm_crop(img, landmarks[i])
      face_images.append(face_img)
  return face_images

In [0]:
# Define a function to copy out all detected faces from an image
# Returns the detected face images in a list
def get_all_faces_in_image(input_filename, faces, landmarks):
  # Read the image from file
  img = cv2.imread(input_filename)
  return get_all_faces_in_image_img(img, faces, landmarks)

In [0]:
# Define a function to calculate the cosine similarity between two vectors (embeddings)
def cosine_similarity(vec1, vec2):
  return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

In [0]:
# Define a function to create a directory, even > 1 level down
def makedirs(pathname):
  try:
    os.makedirs(pathname, ACCESS_RIGHTS)
  except OSError as exc:
    if exc.errno != errno.EEXIST:
      raise
    pass

In [0]:
# Define a function to clear all global detection flags
def clear_all_global_detection_variables():
  global person_max_prob_in_video
  global cat_max_prob_in_video
  global car_max_prob_in_video
  global truck_max_prob_in_video
  global motorcycle_max_prob_in_video
  global bicycle_max_prob_in_video
  person_max_prob_in_video = 0.0
  cat_max_prob_in_video = 0.0
  car_max_prob_in_video = 0.0
  truck_max_prob_in_video = 0.0
  motorcycle_max_prob_in_video = 0.0
  bicycle_max_prob_in_video = 0.0

In [0]:
#
# Define a function to process one image file
# 
def process_image_file(input_root, output_root, processed_root, input_dir, filename):
  #print("input_root", input_root)
  #print("output_root", output_root)
  #print("processed_root", processed_root)
  #print("input_dir", input_dir)
  #print("filename", filename)
  #input_dir = "./input"
  #output_dir = "./output"
  if not (filename.endswith(".jpg") or filename.endswith(".jpeg")):
    return
# for filename in os.listdir(input_dir):
#   if filename.endswith(".jpg") or filename.endswith(".jpeg"):

  # Split out the "middle part" of the full input file name
  before_sep, sep, after_sep = input_dir.partition(input_root)
  assert before_sep == ""	# Should always be ""
  #print("after %s comes %s" % (input_root, after_sep))

  # Prepare the output_dir
  output_dir = os.path.join(output_root, after_sep)
  makedirs(output_dir)

  # First detect objects in the image
  input_filename = os.path.join(input_dir, filename)
  output_filename = os.path.join(output_dir, "predicted-objects-"+filename)
  print("Processing file:", input_filename, "... output_dir:", output_dir)
  #log_file.write("Image file: %s\n" % (input_filename))
  try:
#    detections = imageDetector.detectCustomObjectsFromImage(custom_objects=customObjectsImage, 
#                                                      input_image=input_filename, 
#                                                      output_image_path=output_filename, 
#                                                      minimum_percentage_probability=OBJDET_MIN_PERCENTAGE_PROBABILITY)
    # load an image (into shared CPU/GPU memory)
    # Outcommented -->
    #img, width, height = jetson.utils.loadImageRGBA(input_filename)
    #detections = objectDetector.Detect(img, width, height, OBJDET_OVERLAY_FLAG)
    # <-- outcommented
    detections = darknet.performDetect(imagePath = input_filename, 
                                       thresh = OBJDET_MIN_PERCENTAGE_PROBABILITY / 100.0, 
                                       configPath = os.path.join(DARKNET_PATH, DARKNET_CFG), 
                                       weightPath = os.path.join(DARKNET_PATH, DARKNET_WEIGHTS), 
                                       metaPath = os.path.join(DARKNET_PATH, DARKNET_META), 
                                       showImage = False, 
                                       makeImageOnly = False, 
                                       initOnly = False)
    #res.append((nameTag, dets[j].prob[i], (b.x, b.y, b.w, b.h)))
    #print("detections:", detections)
    if detections is None:
      return
    # print the detections
#    print("  detected {:d} objects in image".format(len(detections)))
#    for detection in detections:
#      print(detection)
#      print("objectDetector.GetClassDesc(detection): ", objectDetector.GetClassDesc(detection.ClassID))
  except Exception as exc:
    print("  ERROR: Caught exception from darknet.performDetect: ", type(exc), exc.args)
    log_file = open(LOGFILE_NAME, "a+")
    log_file.write("ERROR: Caught exception when processing file %s:" % (input_filename))
    log_file.write("  Exception type: %s args: %s" % (type(exc), exc.args))
    log_file.close()
    #log_file.write("  ERROR processing this file, skipping it.\n")
    #log_file.close()
    dest_dir = os.path.join(processed_root, PROCESSED_ERROR_SUBROOT, after_sep)
    print("  Destination directory for move: ", dest_dir)
    makedirs(dest_dir)
    shutil.move(input_filename, dest_dir)
    return

  #print("  len(detections) =", len(detections))

  # Get the object detection probablilties and check if any person(s) were found
  # We work with max probabilities since there might be more than one object of each type 
  # in each image, each with different probability
  person_max_prob_in_image = 0.0    
  cat_max_prob_in_image = 0.0
  car_max_prob_in_image = 0.0
  truck_max_prob_in_image = 0.0
  motorcycle_max_prob_in_image = 0.0
  bicycle_max_prob_in_image = 0.0
  object_of_interest_in_image = False

  for detection in detections:
    #detection_name = objectDetector.GetClassDesc(detection.ClassID)
    det_name = detection[0]
    det_prob = detection[1]
    det_bbox = detection[2]
    #print("det_name:", det_name, "det_prob:", det_prob, "det_bbox:", det_bbox)
    #print(eachObject["name"] , " : " , eachObject["percentage_probability"] )
    #log_file.write("%s:%.4f  " % (eachObject["name"], eachObject["percentage_probability"] / 100.0))
    if det_name == "person":
      person_max_prob_in_image = max(person_max_prob_in_image, det_prob)
      object_of_interest_in_image = True
    elif det_name == "cat":
      cat_max_prob_in_image = max(cat_max_prob_in_image, det_prob)
      object_of_interest_in_image = True
    elif det_name == "car":
      car_max_prob_in_image = max(car_max_prob_in_image, det_prob)
      object_of_interest_in_image = True
    elif det_name == "truck":
      truck_max_prob_in_image = max(truck_max_prob_in_image, det_prob)
      object_of_interest_in_image = True
    elif det_name == "motorcycle":
      motorcycle_max_prob_in_image = max(motorcycle_max_prob_in_image, det_prob)
      object_of_interest_in_image = True
    elif det_name == "bicycle":
      bicycle_max_prob_in_image = max(bicycle_max_prob_in_image, det_prob)
      object_of_interest_in_image = True
    #else:
    #  print("  WARNING: Unexpected detection in image: ", detection)

  print("  ObjDet: person: %.4f cat: %.4f car: %.4f truck: %.4f motorcycle: %.4f bicycle: %.4f" %
        (person_max_prob_in_image, cat_max_prob_in_image, car_max_prob_in_image, 
         truck_max_prob_in_image, motorcycle_max_prob_in_image, bicycle_max_prob_in_image))

  p_max_similarity = -1.0
  m_max_similarity = -1.0
  s_max_similarity = -1.0
  l_max_similarity = -1.0

  # If person(s) are present, detect faces in the image
  if person_max_prob_in_image > 0.0 and DO_FACE_DETECTION_VERIFICATION:
    output_filename = os.path.join(output_dir, "predicted-faces-"+filename)
    # print(filename, "   ", input_filename, "   ", output_filename)
    faces, landmarks = detect_faces_and_save_results(input_filename, rf_model, output_filename)
    # For each face found, calculate embedding vector and similarity to each reference face
    if faces is not None:
      face_images = get_all_faces_in_image(input_filename, faces, landmarks)
      for i in range(len(face_images)):
        face_embedding = af_model.get_embedding(face_images[i]).flatten()
        p_similarity = cosine_similarity(p_face_embedding, face_embedding)
        m_similarity = cosine_similarity(m_face_embedding, face_embedding)
        s_similarity = cosine_similarity(s_face_embedding, face_embedding)
        l_similarity = cosine_similarity(l_face_embedding, face_embedding)
        #log_file.write("  FaceVer: face %d similarity: p: %.4f m: %.4f s: %.4f l: %.4f\n" %
        #               (i, p_similarity, m_similarity, s_similarity, l_similarity))
        p_max_similarity = max(p_max_similarity, p_similarity)
        m_max_similarity = max(m_max_similarity, m_similarity)
        s_max_similarity = max(s_max_similarity, s_similarity)
        l_max_similarity = max(l_max_similarity, l_similarity)
        if IN_COLAB:
          pyplot.imshow(face_images[i])
          pyplot.show()
      print("  FaceVer: max similarity scores: p: %.4f m: %.4f s: %.4f l: %.4f" %
            (p_max_similarity, m_max_similarity, s_max_similarity, l_max_similarity))

  # Move the file to the proper destination
  if object_of_interest_in_image:
    # Object found, move accordingly
    dest_dir = os.path.join(processed_root, PROCESSED_POS_OBJDET_SUBROOT, after_sep)
  else:
    # Object not found, move accordingly
    dest_dir = os.path.join(processed_root, PROCESSED_NEG_OBJDET_SUBROOT, after_sep)
  print("  Destination directory for move: ", dest_dir)
  makedirs(dest_dir)
  shutil.move(input_filename, dest_dir)
  moved_filename = os.path.join(dest_dir, filename)

  # Write a row to the log file, format:
  # "FileDate;FileTime;FileType;ODPerson;ODCat;ODCar;ODTruck;ODMotorcycle;ODBicycle;FV_P;FV_M;FV_S;FV_L;FileName\n"
  file_mod_time = time.localtime(os.path.getmtime(moved_filename))
  file_date = time.strftime("%Y-%m-%d", file_mod_time)
  file_time = time.strftime("%H:%M:%S", file_mod_time)
  csv_file = open(CSVFILE_NAME, "a+")
  csv_file.write("%s;%s;%s;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%s\n" %
      (file_date, file_time, "image", person_max_prob_in_image, cat_max_prob_in_image, 
       car_max_prob_in_image, truck_max_prob_in_image, motorcycle_max_prob_in_image, 
       bicycle_max_prob_in_image, p_max_similarity, m_max_similarity, s_max_similarity, 
       l_max_similarity, moved_filename))
  csv_file.close()

  return

In [0]:
#
# Define a function to process one video file
# Do object and face detection and save 
# the results in an output directory. Then calculate embedding vector for each
# found face, and the similarity to each reference face, print the results
# 
def process_video_file(input_root, output_root, processed_root, input_dir, filename):
  #print("input_root", input_root)
  #print("output_root", output_root)
  #print("processed_root", processed_root)
  #print("input_dir", input_dir)
  #print("filename", filename)

  if not filename.endswith(".mp4"):
    return

  # Split out the "middle part" of the full input file name
  before_sep, sep, after_sep = input_dir.partition(input_root)
  assert before_sep == ""	# Should always be ""
  #print("after %s comes %s" % (input_root, after_sep))

  # Prepare the output_dir
  output_dir = os.path.join(output_root, after_sep)
  makedirs(output_dir)

  # Clear the video object detection variables
  person_max_prob_in_video = 0.0    
  cat_max_prob_in_video = 0.0
  car_max_prob_in_video = 0.0
  truck_max_prob_in_video = 0.0
  motorcycle_max_prob_in_video = 0.0
  bicycle_max_prob_in_video = 0.0
  object_of_interest_in_video = False

  # First detect objects in the video
  input_filename = os.path.join(input_dir, filename)
  output_filename = os.path.join(output_dir, "predicted-objects-"+filename)
  print("Processing video file:", input_filename, "... output_dir:", output_dir)
  #log_file.write("Video file: %s\n" % (input_filename))

  # Open the video file
  cap = cv2.VideoCapture()
  cap.open(input_filename)
  #width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  #height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

  # We optimize the run-time performance by not processing each frame of the video
  frame_counter = 0
  while True:
    (rv, cv2_im) = cap.read()   # im is a valid image if and only if rv is true
    if not rv:              # we reached the end of video file
      cap.release()
      break

    if (frame_counter % OBJDET_VIDEO_FRAME_SKIP_RATE) == 0:
      # Process this frame
      # Write the frame to a temp image file: OBJDET_TEMP_IMAGE_FILENAME
      cv2.imwrite(OBJDET_TEMP_IMAGE_FILENAME, cv2_im)
      # load an image (into shared CPU/GPU memory)
      #img, width, height = jetson.utils.loadImageRGBA(OBJDET_TEMP_IMAGE_FILENAME)
      try:
        #detections = objectDetector.Detect(img, width, height, OBJDET_OVERLAY_FLAG)
        detections = darknet.performDetect(imagePath = OBJDET_TEMP_IMAGE_FILENAME, 
                                           thresh = OBJDET_MIN_PERCENTAGE_PROBABILITY / 100.0, 
                                           configPath = os.path.join(DARKNET_PATH, DARKNET_CFG), 
                                           weightPath = os.path.join(DARKNET_PATH, DARKNET_WEIGHTS), 
                                           metaPath = os.path.join(DARKNET_PATH, DARKNET_META), 
                                           showImage = False, 
                                           makeImageOnly = False, 
                                           initOnly = False)
        #res.append((nameTag, dets[j].prob[i], (b.x, b.y, b.w, b.h)))
        #print("detections:", detections)
      except Exception as exc:
        print("  ERROR: Caught exception from darknet.performDetect: ", type(exc), exc.args)
        #log_file.write("  ERROR processing this file, skipping it.\n")
        #log_file.close()
        log_file = open(LOGFILE_NAME, "a+")
        log_file.write("ERROR: Caught exception when processing file %s:" % (input_filename))
        log_file.write("  Exception type: %s args: %s" % (type(exc), exc.args))
        log_file.close()

        dest_dir = os.path.join(processed_root, PROCESSED_ERROR_SUBROOT, after_sep)
        print("  Destination directory for move: ", dest_dir)
        makedirs(dest_dir)
        shutil.move(input_filename, dest_dir)
        cap.release()
        return

      # print the detections
      #print("  detected {:d} objects in image".format(len(detections)))
      #for detection in detections:
      #  print(detection)
      #  print("objectDetector.GetClassDesc(detection): ", objectDetector.GetClassDesc(detection.ClassID))

      for detection in detections:
        #detection_name = objectDetector.GetClassDesc(detection.ClassID)
        det_name = detection[0]
        det_prob = detection[1]
        det_bbox = detection[2]
        #print(eachObject["name"] , " : " , eachObject["percentage_probability"] )
        #log_file.write("%s:%.4f  " % (eachObject["name"], eachObject["percentage_probability"] / 100.0))
        if det_name == "person":
          person_max_prob_in_video = max(person_max_prob_in_video, det_prob)
          object_of_interest_in_video = True
        elif det_name == "cat":
          cat_max_prob_in_video = max(cat_max_prob_in_video, det_prob)
          object_of_interest_in_video = True
        elif det_name == "car":
          car_max_prob_in_video = max(car_max_prob_in_video, det_prob)
          object_of_interest_in_video = True
        elif det_name == "truck":
          truck_max_prob_in_video = max(truck_max_prob_in_video, det_prob)
          object_of_interest_in_video = True
        elif det_name == "motorcycle":
          motorcycle_max_prob_in_video = max(motorcycle_max_prob_in_video, det_prob)
          object_of_interest_in_video = True
        elif det_name == "bicycle":
          bicycle_max_prob_in_video = max(bicycle_max_prob_in_video, det_prob)
          object_of_interest_in_video = True
    frame_counter += 1

  # At the end of video processing, the callbackForFull function will have been called
  # setting the global flags according to what was detected in the video
  print("  ObjDet: person: %.4f cat: %.4f car: %.4f truck: %.4f motorcycle: %.4f bicycle: %.4f" %
    (person_max_prob_in_video, cat_max_prob_in_video, car_max_prob_in_video, 
     truck_max_prob_in_video, motorcycle_max_prob_in_video, bicycle_max_prob_in_video))

  p_max_similarity = -1.0
  m_max_similarity = -1.0
  s_max_similarity = -1.0
  l_max_similarity = -1.0

  # If person(s) were detected, detect faces in the image
  if DO_FACE_DETECTION_VERIFICATION and person_max_prob_in_video > 0.0:
    # First initialize the video reading
    cap = cv2.VideoCapture()
    cap.open(input_filename)
    #width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    #height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    #size = (width,height)

    # We optimize the run-time performance by not processing each frame of the video
    frame_counter = 0
    while True:
      (rv, im) = cap.read()   # im is a valid image if and only if rv is true
      if not rv:              # we reached the end of video file
        cap.release()
        break

      if (frame_counter % FACEDET_FRAME_SKIP_RATE) == 0:
        # Process this frame
        output_filename = os.path.join(output_dir, "predicted-faces-"+filename+"-frm"+str(frame_counter)+".jpg")
        #print("output_filename =", output_filename)
        faces, landmarks = detect_faces_and_save_results_img(im, rf_model, output_filename)
        # For each face found, calculate embedding vector and similarity to each reference face
        if faces is not None:
          face_images = get_all_faces_in_image_img(im, faces, landmarks)
          for i in range(len(face_images)):
            face_embedding = af_model.get_embedding(face_images[i]).flatten()
            p_similarity = cosine_similarity(p_face_embedding, face_embedding)
            m_similarity = cosine_similarity(m_face_embedding, face_embedding)
            s_similarity = cosine_similarity(s_face_embedding, face_embedding)
            l_similarity = cosine_similarity(l_face_embedding, face_embedding)
            #print("   Face ", i, "similarity scores: p:", p_similarity, 
            #      "m:", m_similarity, "s:", s_similarity, "l:", l_similarity)
            #pyplot.imshow(face_images[i])
            #pyplot.show()
            p_max_similarity = max(p_max_similarity, p_similarity)
            m_max_similarity = max(m_max_similarity, m_similarity)
            s_max_similarity = max(s_max_similarity, s_similarity)
            l_max_similarity = max(l_max_similarity, l_similarity)
            #log_file.write("  FaceVer: face %d similarity: p: %.4f m: %.4f s: %.4f l: %.4f\n" %
            #               (i, p_similarity, m_similarity, s_similarity, l_similarity))
      frame_counter += 1

    # Print out the max similarity for this video
    print("  FaceVer: max similarity scores: p: %.4f m: %.4f s: %.4f l: %.4f" %
          (p_max_similarity, m_max_similarity, s_max_similarity, l_max_similarity))
    #log_file.write("  FaceVer: max similarity: p: %.4f m: %.4f s: %.4f l: %.4f\n" %
    #               (p_max_similarity, m_max_similarity, s_max_similarity, l_max_similarity))
    # End if

  # Move the file to the proper destination
  if object_of_interest_in_video:
    # Object found, move accordingly
    dest_dir = os.path.join(processed_root, PROCESSED_POS_OBJDET_SUBROOT, after_sep)
  else:
    # Object not found, move accordingly
    dest_dir = os.path.join(processed_root, PROCESSED_NEG_OBJDET_SUBROOT, after_sep)
  print("  Destination directory for move: ", dest_dir)
  makedirs(dest_dir)
  shutil.move(input_filename, dest_dir)
  moved_filename = os.path.join(dest_dir, filename)

  # Write a row to the log file, format:
  # "FileDate;FileTime;FileType;ODPerson;ODCat;ODCar;ODTruck;ODMotorcycle;ODBicycle;FV_P;FV_M;FV_S;FV_L;FileName\n"
  file_mod_time = time.localtime(os.path.getmtime(moved_filename))
  file_date = time.strftime("%Y-%m-%d", file_mod_time)
  file_time = time.strftime("%H:%M:%S", file_mod_time)
  csv_file = open(CSVFILE_NAME, "a+")
  csv_file.write("%s;%s;%s;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%.4f;%s\n" %
      (file_date, file_time, "video", person_max_prob_in_video, cat_max_prob_in_video, 
       car_max_prob_in_video, truck_max_prob_in_video, motorcycle_max_prob_in_video, 
       bicycle_max_prob_in_video, p_max_similarity, m_max_similarity, s_max_similarity, 
       l_max_similarity, moved_filename))
  csv_file.close()

  return

In [0]:
# Define a function to process one file tree
def process_file_tree(input_root, output_root, processed_root):
  num_images_processed = 0
  num_videos_processed = 0
  # Walk through the input_root tree
  for dir_name, subdir_list, file_list in os.walk(input_root):
    print("Found directory:", dir_name)
    #before_sep, sep, after_sep = dir_name.partition(INPUT_ROOT)
    #assert before_sep == ""	# Should always be ""
    #print("before_sep: ", before_sep, " sep: ", sep, " after_sep: ", after_sep)
    #make_dirs(OUTPUT_ROOT+after_sep)
    print("Found subdirs:")
    for subdirname in subdir_list:
      print("\t%s" % subdirname)
    print("Found files:")
    for filename in file_list:
      print("\t%s" % filename)
    print("Processing image files...")
    for filename in file_list:
      if filename.endswith(".jpg") or filename.endswith(".jpeg"):
        process_image_file(input_root, output_root, processed_root, dir_name, filename)
        num_images_processed += 1
    print("Processing video files...")
    for filename in file_list:
      if filename.endswith(".mp4"):
        process_video_file(input_root, output_root, processed_root, dir_name, filename)
        num_videos_processed += 1
  return num_images_processed, num_videos_processed

## Main program flow

### Preparation & initialization

In [0]:
# Prepare for Object Detection by loading the Object Detection model
# The second argument should look like this:
#   ['./PF-detectnet-console.py', '--network=ssd-inception-v2', 'images/peds_0.jpg', 'peds_0_output.jpg']
#argv = [sys.argv[0], "--network="+OBJDET_JETSON_MODEL_TO_USE, "dummy.jpg", "dummy-output.jpg"]
"""
argv = [sys.argv[0], "--network="+OBJDET_JETSON_MODEL_TO_USE, "--threshold="+str(OBJDET_MIN_PERCENTAGE_PROBABILITY / 100.0)]
print("argv:", argv)
objectDetector = jetson.inference.detectNet(OBJDET_JETSON_MODEL_TO_USE, 
                                            argv,
                                            OBJDET_MIN_PERCENTAGE_PROBABILITY / 100.0)
"""

In [0]:
# Get RetinaFace model by name
if DO_FACE_DETECTION_VERIFICATION:
  rf_model = insightface.model_zoo.get_model('retinaface_r50_v1')

In [0]:
# Get ArcFace model by name
if DO_FACE_DETECTION_VERIFICATION:
  af_model = insightface.model_zoo.get_model('arcface_r100_v1')

In [0]:
# Prepare the environment, to use GPU or CPU to detect & compute embeddings for the faces for all incoming images.
# The nms threshold is set to 0.4 in this example.
if DO_FACE_DETECTION_VERIFICATION:
  if USE_GPU:
    CTX_ID = 0
  else:
    CTX_ID = -1

  rf_model.prepare(ctx_id = CTX_ID, nms=0.4)
  af_model.prepare(ctx_id = CTX_ID)

In [0]:
# Reference images have fixed names, detect the reference faces
if DO_FACE_DETECTION_VERIFICATION:
  p_faces, p_landmarks = detect_faces_and_save_results("p.jpg", rf_model, "predicted-p.jpg")
  m_faces, m_landmarks = detect_faces_and_save_results("m.jpg", rf_model, "predicted-m.jpg")
  s_faces, s_landmarks = detect_faces_and_save_results("s.jpg", rf_model, "predicted-s.jpg")
  l_faces, l_landmarks = detect_faces_and_save_results("l.jpg", rf_model, "predicted-l.jpg")

In [0]:
# Get the faces and save them in one list per reference
if DO_FACE_DETECTION_VERIFICATION:
  p_face_images = get_all_faces_in_image("p.jpg", p_faces, p_landmarks)
  m_face_images = get_all_faces_in_image("m.jpg", m_faces, m_landmarks)
  s_face_images = get_all_faces_in_image("s.jpg", s_faces, s_landmarks)
  l_face_images = get_all_faces_in_image("l.jpg", l_faces, l_landmarks)

  # We expect only one face per image for the reference images
  assert len(p_face_images) == 1
  assert len(m_face_images) == 1
  assert len(s_face_images) == 1
  assert len(l_face_images) == 1

In [0]:
# Save the faces as new image files (for inspection)
if DO_FACE_DETECTION_VERIFICATION:
  cv2.imwrite("detected-face-p.jpg", p_face_images[0])
  cv2.imwrite("detected-face-m.jpg", m_face_images[0])
  cv2.imwrite("detected-face-s.jpg", s_face_images[0])
  cv2.imwrite("detected-face-l.jpg", l_face_images[0])

In [0]:
# Calculate the 512D embedding vectors for the reference (known) faces
if DO_FACE_DETECTION_VERIFICATION:
  p_face_embedding = af_model.get_embedding(p_face_images[0]).flatten()
  m_face_embedding = af_model.get_embedding(m_face_images[0]).flatten()
  s_face_embedding = af_model.get_embedding(s_face_images[0]).flatten()
  l_face_embedding = af_model.get_embedding(l_face_images[0]).flatten()

### Main processing loop

In [0]:
# Loop over all .jpg files in an input directory, do face detection and save 
# the results in an output directory. Then calculate embedding vector for each
# found face, and the similarity to each reference face, print the results

"""
# Debug:
for tree_list in PROCESSING_TREES:
  print("Entry 1:")
  print("  input_tree:", tree_list[0])
  print("  output_tree:", tree_list[1])
  print("  processed_tree:", tree_list[2])
"""

#logfile_name = LOGFILE_NAME   # os.path.join(OUTPUT_ROOT, LOGFILE_NAME)
#print("Log file name: ", logfile_name)

# Check if the CSV file exists, if not create it
if not os.path.isfile(CSVFILE_NAME):
  # Write the headings to the CSV file
  csv_file = open(CSVFILE_NAME, "a+")
  csv_file.write("FileDate;FileTime;FileType;ODPerson;ODCat;ODCar;ODTruck;ODMotorcycle;ODBicycle;FV_P;FV_M;FV_S;FV_L;FileName\n")
  csv_file.close()


while True:
  try:
    # Get the start time
    start_time = datetime.datetime.now()

    # Write the start time and headings to the log file
    log_file = open(LOGFILE_NAME, "a+")
    log_file.write("---------------------------------\n")
    log_file.write("Start processing at: %s\n" % (start_time))
    log_file.close()

    # Initialize counters
    num_images_processed = 0
    num_videos_processed = 0

    # Loop over the defined file structures to process, and do the processing
    for tree_list in PROCESSING_TREES:
      input_root = tree_list[0]
      output_root = tree_list[1]
      processed_root = tree_list[2]
      num_images, num_videos = process_file_tree(input_root, output_root, processed_root)
      num_images_processed += num_images
      num_videos_processed += num_videos

    """
    # Walk through the INPUT_ROOT tree  --> Now converted into function process_file_tree(...)
    for dir_name, subdir_list, file_list in os.walk(INPUT_ROOT):
      print("Found directory:", dir_name)
      #before_sep, sep, after_sep = dir_name.partition(INPUT_ROOT)
      #assert before_sep == ""	# Should always be ""
      #print("before_sep: ", before_sep, " sep: ", sep, " after_sep: ", after_sep)
      #make_dirs(OUTPUT_ROOT+after_sep)
      print("Found subdirs:")
      for subdirname in subdir_list:
        print("\t%s" % subdirname)
      print("Found files:")
      for filename in file_list:
        print("\t%s" % filename)
      print("Processing image files...")
      for filename in file_list:
        if filename.endswith(".jpg") or filename.endswith(".jpeg"):
          process_image_file(INPUT_ROOT, OUTPUT_ROOT, PROCESSED_ROOT, dir_name, filename, logfile_name)
          num_images_processed += 1
      print("Processing video files...")
      for filename in file_list:
        if filename.endswith(".mp4"):
          process_video_file(INPUT_ROOT, OUTPUT_ROOT, PROCESSED_ROOT, dir_name, filename, logfile_name)
          num_videos_processed += 1
    """

    # Get the end time and calculate the processing time
    end_time = datetime.datetime.now()
    processing_time = end_time - start_time

    # Write the end time to the log file
    log_file = open(LOGFILE_NAME, "a+")
    log_file.write("End processing at: %s\n" % (end_time))
    log_file.write("Processed %d images and %d videos in: %s\n" % (num_images_processed, num_videos_processed, processing_time))
    log_file.close()

    # Sleep for a minute if nothing was processed
    if num_images_processed == 0 and num_videos_processed == 0:
      time.sleep(60)
  except Exception as exc:
    print("ERROR: Caught exception from main processing loop: ", type(exc), exc.args, file=sys.stderr)
    print("       Sleeping for 5 minutes before resuming...", file=sys.stderr)
    time.sleep(5 * 60)