<a href="https://colab.research.google.com/github/theoh-io/ReID-Colab/blob/main/OO_Yolo%2BReID.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Using Camera in colab

Tutorial on using webcam with colab: https://github.com/theAIGuysCode/colab-webcam/blob/main/colab_webcam.ipynb

In [None]:
#connect to drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pwd

/content


In [None]:
%cd /content/drive/MyDrive/Colab Notebooks/Yolo+ReID
#%cd ..

/content/drive/MyDrive/Colab Notebooks/Yolo+ReID


In [None]:
# import dependencies
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
import numpy as np
import PIL
import io
import html
import time

In [None]:
# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

In [None]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    
    var pendingResolve = null;
    var shutdown = false;
    
    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }
    
    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }
    
    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);
      
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);
           
      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);
      
      const instruction = document.createElement('div');
      instruction.innerHTML = 
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };
      
      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);
      
      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();
      
      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }
            
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }
      
      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;
      
      return {'create': preShow - preCreate, 
              'show': preCapture - preShow, 
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

In [None]:
import yolo_detector
from yolo_detector import YoloDetector
import ReID
from ReID import ReID_Tracker
import PIL
from PIL import Image
import torch

# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0 

#Instanciation Detector and Tracker
detector=YoloDetector()
tracker=ReID_Tracker()
path='/content/drive/MyDrive/Colab Notebooks/loading/best_model.pth.tar'
tracker.load_pretrained(path)


while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])

    # create transparent overlay for bounding box
    bbox_array = np.zeros([480,640,4], dtype=np.uint8)

    ############
    # Detector #
    ############
    #bbox is now np array
    bbox, label=detector.predict_multiple(img)

    ###########################################
    #   Image Cropping and preprocessing      #
    ###########################################

    #crop_img = img[y:y+h, x:x+w]
    img_list=[]
    if label==True:
      # if(bbo.shape[0]==1):  #meaninng only one detection
      #   crop_img = img[int((bbox[1]-bbox[3]/2)):int((bbox[1]+bbox[3]/2)), int((bbox[0]-bbox[2]/2)):int((bbox[0]+bbox[2]/2))]
      #   #to apply the normalization need a PIL image
      #   # PIL RGB while CV is BGR.
      #   crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB)
      #   crop_img = Image.fromarray(crop_img)
      #   tensor_img=tracker.image_preprocessing(crop_img)
      #   tensor_img=torch.unsqueeze(tensor_img, 0)
      #else:
      for i in range(bbox.shape[0]):
        bbox_indiv=bbox[i]
        crop_img=np.array(img[int((bbox_indiv[1]-bbox_indiv[3]/2)):int((bbox_indiv[1]+bbox_indiv[3]/2)), int((bbox_indiv[0]-bbox_indiv[2]/2)):int((bbox_indiv[0]+bbox_indiv[2]/2))])
        #to apply the normalization need a PIL image
        # PIL RGB while CV is BGR.
        crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB)
        crop_img = Image.fromarray(crop_img)
        tensor_img_indiv=tracker.image_preprocessing(crop_img)
        tensor_img_indiv=torch.unsqueeze(tensor_img_indiv, 0)
        img_list.append(tensor_img_indiv)
      tensor_img=torch.cat(img_list)

      #print("writing the file")
      #cv2.imwrite("cropped/crop.jpg", crop_img)
    else:
        print("no detection")

    ############
    # Tracking #
    ############
    #generate embedding
    #print(tensor_img.shape)
    idx=tracker.embedding_comparator(tensor_img)
    #select the bbox corresponding to correct detection
    #print(bbox.size)
    #print("idx: ", idx)
    bbox=bbox[idx]
    print(bbox)
    # get bounding box for overlay
    #for (x,y,w,h) in bbox:
    start=(int(bbox[0]-bbox[2]/2), int(bbox[1]+bbox[3]/2))  #top-left corner
    stop= (int(bbox[0]+bbox[2]/2), int(bbox[1]-bbox[3]/2)) #bottom right corner
    bbox_array = cv2.rectangle(bbox_array,start,stop,(255,0,0),2)
    #bbox_array = cv2.rectangle(bbox_array,bbox_center,bbox_dim,(255,0,0),2)
    #bbox_array = cv2.rectangle(bbox_array,(x,y),(x+w,y+h),(255,0,0),2)

    bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
    # convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # update bbox so next frame gets new overlay
    bbox = bbox_bytes

<IPython.core.display.Javascript object>

Downloading: "https://github.com/ultralytics/yolov5/archive/master.zip" to /root/.cache/torch/hub/master.zip
Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
[31m[1mrequirements:[0m PyYAML>=5.3.1 not found and is required by YOLOv5, attempting auto-update...
Collecting PyYAML>=5.3.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
Installing collected packages: PyYAML
  Attempting uninstall: PyYAML
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully installed PyYAML-6.0

[31m[1mrequirements:[0m 1 package updated per /root/.cache/torch/hub/ultralytics_yolov5_master/requirements.txt
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m

YOLOv5 🚀 2022-4-11 torch 1.10.0+cu111 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)

Fusing layers... 
YOL

  0%|          | 0.00/97.8M [00:00<?, ?B/s]

[     320.18      330.73      420.79      297.13]
[     316.98      330.68      406.32      297.87]
[     313.71      329.79      429.73      299.44]
[     316.69      329.48      435.02      300.08]
[     321.74      329.23      419.19      298.69]
[     319.31      330.44      423.33      299.06]
[      320.9      330.14      423.71      298.41]
[     319.06      329.52      422.66      297.85]
[     319.57      330.53      421.39      297.95]
[     319.67      330.21      419.23      297.02]
[     320.63      329.86      421.46      297.54]
[     294.69      332.47      413.88      295.06]
[     255.48       339.8      409.69      278.41]
[     224.56       345.4      424.62      259.43]
[     225.02      346.39      425.33      256.59]
[     225.64      346.15      423.78      255.74]
[     236.32      338.11      395.36      283.78]
[     346.76      278.76         490      399.85]
[     291.86      361.24      268.84      236.02]
[      270.6       376.5      229.79      205.97]


TypeError: ignored

In [None]:
#problem: squeeze dist_list=> iterate over 0d tensor for min
#without squeeze => result is [0,0] instead of just [0] and cant convert to int
a=torch.tensor([[2,3,5]])
a=a.squeeze(0)
best_a = min(a).squeeze()
print(a)
print(best_a)
print((a==best_a))
print((a==best_a).nonzero().squeeze())



tensor([2, 3, 5])
tensor(2)
tensor([ True, False, False])
tensor(0)


In [None]:
bbox_list=[]
for i in range(3):
  bbox_list.append([1, 1, 2, 3])

a=np.vstack(bbox_list)
print(a)

[[1 1 2 3]
 [1 1 2 3]
 [1 1 2 3]]


In [None]:
arr=np.array([1,2,3])
print(arr.shape)

print(arr[1])

(3,)
2
