# People recognition for e-scooters

Real time video persons detection and depth prediction.
 - Mobile Net for objects detection 
 - https://github.com/lindawangg/monodepth2 for depth detection

## Libraries

In [1]:
from __future__ import absolute_import, division, print_function
%matplotlib inline

import os
import numpy as np
import PIL.Image as pil
import matplotlib.pyplot as plt
import time 

import torch
from torchvision import transforms

import networks
from utils import download_model_if_doesnt_exist

import cv2

## Setting up depth network and loading weights

In [2]:
model_name = "mono_640x192"

download_model_if_doesnt_exist(model_name)
encoder_path = os.path.join("models", model_name, "encoder.pth")
depth_decoder_path = os.path.join("models", model_name, "depth.pth")

# LOADING PRETRAINED MODEL
encoder = networks.ResnetEncoder(18, False)
depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4))

loaded_dict_enc = torch.load(encoder_path, map_location='cpu')
filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
encoder.load_state_dict(filtered_dict_enc)

loaded_dict = torch.load(depth_decoder_path, map_location='cpu')
depth_decoder.load_state_dict(loaded_dict)

encoder.eval()
depth_decoder.eval();



## Depth prediction function

## Some settings for Mobilenet

In [3]:
net = cv2.dnn.readNetFromCaffe('MobileNetSSD_deploy.prototxt.txt', 'MobileNetSSD_deploy.caffemodel')
categories = { 0: 'background', 1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', 5: 'bottle', 6: 'bus', 
               7: 'car', 8: 'cat', 9: 'chair', 10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse', 
               14: 'motorbike', 15: 'person', 16: 'pottedplant', 17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor'}

classes =  ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", 
            "diningtable",  "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

## Speed transformation

In [4]:
def count_to_speed(count):
    if count < 1:
        speed = 20
    elif count >= 1 and count < 3:
        speed = 15
    elif count >= 3 and count < 5:
        speed = 10
    elif count >=5:
        speed = 5
    else:
        speed = 25
    return speed

## Distance determenantion

In [5]:
dist_wet = [0.46,1.07,1.78,4.33,7.3]  # 
dist_dry = [0.3,0.88,2.03,3.98,6.13]  # in meters
dist_stone = [0.25,0.84,1.87,3.1,5.6] #
velocity_interp = [5,10,15,20,25]    # km/h
velocity_now = 13
th_dist_wet = np.interp(velocity_now, velocity_interp, dist_wet)
th_dist_dry = np.interp(velocity_now, velocity_interp, dist_wet)
th_dist_stone = np.interp(velocity_now, velocity_interp, dist_wet)

## Objects detection function

In [9]:
def make_image(image):
    
        im_pil = pil.fromarray(image)
        ###########disp_resized_np = depth_prediction(im_pil)
    
        (h, w) = image.shape[:2]
        blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5)
        net.setInput(blob)
        detections = net.forward()
        
        colors = [[71,40,252]]*len(classes)
        count = 0
        speed = count_to_speed(count)
        for i in np.arange(0, detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > 0.2:
                idx = int(detections[0, 0, i, 1])
                if idx == 15:
                    count += 1
                    
                    box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                    (startX, startY, endX, endY) = box.astype("int")
                    (centerX, centerY) = np.array([(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]).astype("int")
                    
                    cv2.rectangle(image, (startX, startY), (endX, endY), colors[idx], 2)  
                    
                    label = "{}: {:.2f}%".format(classes[idx], confidence * 100) 
                    ######depth_label = str(round(disp_resized_np[centerY, centerX], 3))
                    speed = count_to_speed(count)
                       
                    y_label = startY - 15 if startY - 15>15 else startY + 15     
                    y_depth = endY - 15
                    
                    cv2.putText(image, label, (startX, y_label),cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
                    ######cv2.putText(image, "depth:{}".format(depth_label), (startX, y_depth),cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
        cv2.putText(image, "number of people: {}".format(count), (360, 475-15*2),cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[15], 2)
        cv2.putText(image, "recomended speed, km/h: {}".format(speed), (360, 475-15),cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[15], 2)
        cv2.putText(image, "current braking distance, m: {}".format(th_dist_dry), (360, 475),cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[15], 2)   
                    
        return image

## Main loop

In [12]:
cap = cv2.VideoCapture(1)
while cap.isOpened():
        ret, frame = cap.read()
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = make_image(image)
        cv2.imshow("Output", image)

        if cv2.waitKey(10) & 0xFF == ord('0'):
            break
cap.release()
cv2.destroyAllWindows()