<a href="https://colab.research.google.com/github/oddmanru/pills_detector/blob/main/pills_detector_github.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Pill Bottle Detector  
This code snippet is aimed at applying the trained custom object detector based on YOLOv5 engine to identify the pills. The custom object detector was converted from .pt to .onnx format by applying the conversion script. 

### Mount MyDrive  
It is a convenient measure that copies the stored files/scripts on Google cloud 
to local host. Alternatively, any cloud storage is also acceptable to work with this code. 

### Duplicate the cv2 4.7.0-dev   
The purpose of doing this is because the Colab needs the recompiled
version of OpenCV to support DNN bandend on CUDA devices. After that, it is simple to copy the core file to the working directory next time when the code needs to run. 
The current version of Python running on Colab docker is v3.8 btw. 

In [None]:
# !cp ../cv2.cpython-38-x86_64-linux-gnu.so .

In [None]:
# !python --version

In [None]:
import cv2
cv2.__version__

'4.7.0-dev'

### Load the video clips   
Copy the video clips which need to be detected to the yolov5_pills directory

In [None]:
!mkdir /content/yolov5_pills

mkdir: cannot create directory ‘/content/yolov5_pills’: File exists


In [None]:
# Copy a video clip which contains the pills that need to identify
!cp ../pills_2.mp4 /content/yolov5_pills

### Prepare the packages needed

In [None]:
import os
#Append a new path on colab so that it can import the helper modules 
os.sys.path.append("../custom_modules")
from cv_utils.delta_timer import Delta_timer
from unpack_utils.unpack_objects import UnpackObjects
from imutils.video import FPS
import numpy as np
import imutils
import time
import os
import logging

Enable logger 

In [None]:
logger = logging.getLogger("__name__")
logging.basicConfig()

### Load classes  
Load the classes.txt which contains the classes that this model can identify

In [None]:
# Load labels
classPath = "../classes.txt"
CLASSES= open(classPath, 'r').read().strip().split('\n') # Return a list
print(f"[testing] CLASSES length: {len(CLASSES)}")

# Create color list corresponding to the classes
COLORS = np.random.randint(0 ,255, size=(len(CLASSES), 3), dtype='uint8')

[testing] CLASSES length: 2


### Load pre-trained object detection model  
Load the trained custom object detector which is able to identify pills. This pills.onnx btw, was derived the pretrained yolov5s.pt 

In [None]:
# Load YOLOv5 custom object detection model
print("[INFO] loading the object detection model...")
pills_path = "../pills.onnx"

if os.path.isfile(pills_path):
  print('[INFO] loading YOLOv5 model from disk...')
  yoloV5_pills= cv2.dnn.readNet(pills_path) # ATTN: order of argument is different than mask-rcnn
  # Set up the cuda environment for yolo_v3 model
  yoloV5_pills.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
  yoloV5_pills.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
else:
  print("[ALERT] model does not exist...")

# Set up the cuda environment for yolo_v3 model
# yoloV5_pills.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
# yoloV5_pills.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)


[INFO] loading the object detection model...
[INFO] loading YOLOv5 model from disk...


In [None]:
def blob_yoloV5(frame):
  '''
  This function applies a full-square sized mask
  on the given frame and makes blob image for dnn 
  network to detect objects
  '''
  h, w = frame.shape[:2]
  # Grab the maximum value of h and w
  _max = max(h, w)
  # Make a zeros mask
  mask_resized = np.zeros([_max, _max, 3], dtype="uint8")
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
  # copy rgb to the mask
  mask_resized[0:h, 0:w] = rgb
  # Obtain the image shape before fed into blob
  resized_height, resized_width = mask_resized.shape[:2]

  blob = cv2.dnn.blobFromImage(mask_resized, 1.0/255, (640, 640), swapRB=False, crop=False)
  return resized_height, resized_width, blob


### Load Video clip

In [None]:
# Load video clip
# video_path = "drive/MyDrive/computer_vision/mobileSSD_objects_tracking/videos/car_traffic.mp4"
video_path = "/content/yolov5_pills/pills_2.mp4"
camera = cv2.VideoCapture(video_path)
writer = None
# out_path = "drive/MyDrive/computer_vision/yolo_v5_deep_sort/out_videos/yolo5_deepsort_traffic_1024_np_itv_4_opti.mp4"
out_path = "/content/yolov5_pills/pills_2_done.mp4"

### Initialize variables and timers

In [None]:
# Initialize the unpack objects class
yolov5_unpack = UnpackObjects(CLASSES)

# Initialize timer for monitoring array operations
fReader_timer = Delta_timer()
blob_timer = Delta_timer()
yolo_timer = Delta_timer()
bb_finding_timer = Delta_timer() 
nms_timer = Delta_timer()
deepSort_timer = Delta_timer()
tracking_timer = Delta_timer()
tk_index_timer = Delta_timer()
index_timer = Delta_timer()
writing_timer = Delta_timer()

whole_timer = Delta_timer()

In [None]:
# Initialze the frame rate esitmator
fps = FPS().start()
frame_cnt = 0

### Main function

In [None]:
while True:
	
  whole_timer.start()


  fReader_timer.start()
  (grabbed, frame) = camera.read()
  fReader_timer.stop()
  fReader_timer.update()

  # Check if video is at the end 
  if frame is None:
    break

  h, w = frame.shape[:2]
  if h > w: 
    frame = imutils.resize(frame, height = 1024)
  if w >= h:
    frame = imutils.resize(frame, width = 1024)

  # dlib tracker and dnn yolov5 need RGB order of the color channels
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
  
  
  # Initialize the video writer
  if out_path is not None and writer is None:
    print("[video_writer] initializing video wirter...")
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    print(f"[video_writer] {out_path}")
    writer = cv2.VideoWriter(out_path, fourcc, 30, (frame.shape[1], frame.shape[0]), True)
  

  # Prepare the input data before fed into the YOLOv5 model
  # blob_yolov5 function returns three scalar, i.e resized height, resized width and the 
  # blob that is going to be fed into the detector
  blob_timer.start()
  resized_height, resized_width, blob = blob_yoloV5(frame)
  yoloV5_pills.setInput(blob)
  blob_timer.stop()
  blob_timer.update()

  # YOLOv5 returns the real bboxes coords
  width_ratio = resized_width / 640
  height_ratio = resized_height / 640

  # Make the inference

  # (boxes, masks)= yoloV3.forward(['detection_out_final', 'detection_masks'])
  yolo_timer.start()
  layerOutputs= yoloV5_pills.forward()
  yolo_timer.stop()
  yolo_timer.update()

  # Return the candidates of bboxes
  # Loop over each of output layer
  # the shape of layerOutputs is 1 x 25200 x 85.
  detected_objects = layerOutputs[0]


  bb_finding_timer.start()
  boxes, confidences, classIDs = yolov5_unpack.unpack_numpy(detected_objects)
  bb_finding_timer.stop()
  bb_finding_timer.update()

  # apply non-maxima suppression to suppress weak, overlapping bounding
  # boxes. The cv's NMS takes trX, trY, and its w and h
  nms_timer.start()

  # YOLOv5 has different NMS box arguments than YOLO v4
  idxs= cv2.dnn.NMSBoxes(boxes, confidences, 0.45, 0.45)
  nms_timer.stop()
  nms_timer.update()

  # index_timer.start()
  if len(idxs) > 0:

    index_timer.start()
    # Tracking these objects and draw the bboxes 
    for i, idx in enumerate(idxs.flatten()):
      # Obtain the final box in boxes that stores al the possible boxes
      (tlX, tlY)= (boxes[idx][0], boxes[idx][1])
      # ATTN! w, h here is the width and height of ROI boxes 
      (w, h )= (boxes[idx][2], boxes[idx][3])
      # Draw a color from COLOR array and concert it to list
      color= [int(c) for c in COLORS[classIDs[idx]]]
      # print(f"[INFO] color: {color}")

      # Align with the convention of dlib correlation tracker which 
      # takes tlX, tlY, brX, brY
      brX = tlX + w
      brY = tlY + h
     
      # Update the trackers and labels list
      # label = CLASSES[new_classIDs[i]]
      label = CLASSES[classIDs[idx]]
      labels.append(label)

      # Draw bounding box using the current coords 
      cv2.rectangle(frame, (tlX, tlY), (brX, brY), (0,255,0), 1)
      # Draw the title box
      t_width = int((brX - tlX)/2)
      cv2.rectangle(frame, (tlX, tlY-30), (tlX+t_width, tlY-5), color, -1)
      cv2.putText(frame, f"OD_{label}: #{i}", (tlX, tlY-10), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,255,255), 1)
   
    # Index timer
    index_timer.stop()
    index_timer.update()

  writing_timer.start()
  writer.write(frame)
  writing_timer.stop()
  writing_timer.update()

  fps.update()

  whole_timer.stop()
  whole_timer.update()

IndentationError: ignored

In [None]:
fps.stop()

### Create reports

In [None]:
print(f"[INFO] elapsed time: {fps.elapsed():.2f}")
print(f"[INFO] approx FPS: {fps.fps():.2f}")
print("----------------------------")

print("[INFO] cleaning up...")
writer.release()
camera.release()

# Report on timers
fReader_report = fReader_timer.report("reading frame")
blob_report = blob_timer.report("blob transformer")
yolo_report = yolo_timer.report("yolo inference")
bb_finding_report = bb_finding_timer.report("unpack yolo")
nms_report = nms_timer.report("NMS")
# deepSort_report = deepSort_timer.report("DeepSORT intialization")
# tracking_report = tracking_timer.report("deepsort tracker")
# tk_indexing_report = tk_index_timer.report("tracker indexing")
index_report = index_timer.report("yolo indexing")
writing_report = writing_timer.report("writing frame")
whole_report = whole_timer.report("whole time")