In [1]:
from jetbot import ObjectDetector

model = ObjectDetector('ssd_mobilenet_v2_coco.engine')

In [2]:
from jetbot import Camera

camera = Camera.instance(width=300, height=300)

In [4]:
detections = model(camera.value)

print(detections)

[[{'label': 62, 'confidence': 0.9213576316833496, 'bbox': [0.07901328802108765, 0.16707581281661987, 0.6022751331329346, 0.9849607348442078]}, {'label': 62, 'confidence': 0.36658975481987, 'bbox': [0.055794745683670044, 0.4926317632198334, 0.5795766115188599, 0.9740325212478638]}]]


In [6]:
image_number = 0
object_number = 0

print(detections[image_number][object_number])

{'label': 62, 'confidence': 0.9213576316833496, 'bbox': [0.07901328802108765, 0.16707581281661987, 0.6022751331329346, 0.9849607348442078]}


In [7]:
import torch
import torchvision
import torch.nn.functional as F
import cv2
import numpy as np

device = torch.device('cuda')


mean = 255.0 * np.array([0.485, 0.456, 0.406])
stdev = 255.0 * np.array([0.229, 0.224, 0.225])

normalize = torchvision.transforms.Normalize(mean, stdev)

def preprocess(camera_value):
    global device, normalize
    x = camera_value
    x = cv2.resize(x, (224, 224))
    x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
    x = x.transpose((2, 0, 1))
    x = torch.from_numpy(x).float()
    x = normalize(x)
    x = x.to(device)
    x = x[None, ...]
    return x

In [None]:
image_widget = widgets.Image(format='jpeg', width=300, height=300)
label_widget = widgets.IntText(value=1, description='tracked label')

display(widgets.VBox([
    image_widget,
    label_widget
]))

width = int(image_widget.width)
height = int(image_widget.height)

def detection_center(detection):
    """Computes the center x, y coordinates of the object"""
    bbox = detection['bbox']
    center_x = (bbox[0] + bbox[2]) / 2.0 - 0.5
    center_y = (bbox[1] + bbox[3]) / 2.0 - 0.5
    return (center_x, center_y)
    
def norm(vec):
    """Computes the length of the 2D vector"""
    return np.sqrt(vec[0]**2 + vec[1]**2)

def closest_detection(detections):
    """Finds the detection closest to the image center"""
    closest_detection = None
    for det in detections:
        center = detection_center(det)
        if closest_detection is None:
            closest_detection = det
        elif norm(detection_center(det)) < norm(detection_center(closest_detection)):
            closest_detection = det
    return closest_detection
        
def execute(change):
    image = change['new']
    
    # compute all detected objects
    detections = model(image)
 
    # select detections that match selected class label
    matching_detections = [d for d in detections[0] if d['label'] == 77] ## is cell phone
    
    if len(matching_detections)>0:
        print(matching_detections)
    # get detection closest to center of field of view and draw it
    det = closest_detection(matching_detections)
    if det is not None:
        bbox = det['bbox']
        cv2.rectangle(image, (int(width * bbox[0]), int(height * bbox[1])), (int(width * bbox[2]), int(height * bbox[3])), (0, 255, 0), 5)
    
    # update image widget
    image_widget.value = bgr8_to_jpeg(image)
    
execute({'new': camera.value})

In [8]:
def bgr8_to_jpeg(value, quality=10):
    return bytes(cv2.imencode('.jpg', value)[1])

In [17]:
image_widget = widgets.Image(format='jpeg', width=300, height=300)
label_widget = widgets.IntText(value=1, description='tracked label')

display(widgets.VBox([
    image_widget,
    label_widget
]))

width = int(image_widget.width)
height = int(image_widget.height)

def detection_center(detection):
    """Computes the center x, y coordinates of the object"""
    bbox = detection['bbox']
    center_x = (bbox[0] + bbox[2]) / 2.0 - 0.5
    center_y = (bbox[1] + bbox[3]) / 2.0 - 0.5
    return (center_x, center_y)
    
def norm(vec):
    """Computes the length of the 2D vector"""
    return np.sqrt(vec[0]**2 + vec[1]**2)

def closest_detection(detections):
    """Finds the detection closest to the image center"""
    closest_detection = None
    for det in detections:
        center = detection_center(det)
        if closest_detection is None:
            closest_detection = det
        elif norm(detection_center(det)) < norm(detection_center(closest_detection)):
            closest_detection = det
    return closest_detection
        
def execute(change):
    image = change['new']
    
    # compute all detected objects
    detections = model(image)
 
    # select detections that match selected class label
    matching_detections = [d for d in detections[0] if d['label'] == 77] ## is cell phone
    
    if len(matching_detections)>0:
        print(matching_detections)
    # get detection closest to center of field of view and draw it
    det = closest_detection(matching_detections)
    if det is not None:
        bbox = det['bbox']
        cv2.rectangle(image, (int(width * bbox[0]), int(height * bbox[1])), (int(width * bbox[2]), int(height * bbox[3])), (0, 255, 0), 5)
    
    # update image widget
    image_widget.value = bgr8_to_jpeg(image)
    
execute({'new': camera.value})

VBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C…

In [19]:
camera.unobserve_all()
camera.observe(execute, names='value')

In [11]:
camera.unobserve_all()