In [1]:
#import necessary modules
import torch
import torchvision
from torch import nn
from torchvision import models

#prepare model for loading
vgg_model = models.vgg16(pretrained=True)
num_features = vgg_model.classifier[0].in_features
new_top = nn.Sequential(nn.Linear(num_features, 8), nn.ReLU(), nn.Linear(8, 2), nn.LogSoftmax(dim=1))
vgg_model.classifier = new_top

In [2]:
#load model
vgg_model.load_state_dict(torch.load('my_fruit_vgg'))

<All keys matched successfully>

In [3]:
#use cuda
device = torch.device('cuda')
vgg_model = vgg_model.to(device)

In [4]:
#preprocess the camera image; this is needed since the model is trained using RGB
import cv2
import numpy as np

mean = 255.0 * np.array([0.485, 0.456, 0.406])
stdev = 255.0 * np.array([0.229, 0.224, 0.225])

normalize = torchvision.transforms.Normalize(mean, stdev)

def preprocess(camera_value):
    global device, normalize
    x = camera_value
    x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
    x = x.transpose((2, 0, 1))
    x = torch.from_numpy(x).float()
    x = normalize(x)
    x = x.to(device)
    x = x[None, ...]
    return x

In [5]:
#load the camera instance
import traitlets
from IPython.display import display
import ipywidgets
from jetbot import Camera, bgr8_to_jpeg

camera = Camera.instance(width=224, height=224)#fps=10)
image_widget = ipywidgets.Image(format='jpeg', width=400, height=400)

camera_link = traitlets.dlink((camera, 'value'), (image_widget, 'value'), transform=bgr8_to_jpeg)

display(image_widget)

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x0…

In [6]:
from jetbot import Robot

robot = Robot()

In [7]:
import cv2
import imutils
import datetime
import torch.nn.functional as F
import matplotlib.pyplot as plt
# avg is used to save a frame of reference picture (background)
# the new picture is compared with it to determine where in the picture has changed.
avg = None


lastMovtionCaptured = datetime.datetime.now()

# Fruit detection function
def fruitDetect(imgInput):
    global avg, lastMovtionCaptured
    #get the prediction and normalize to a probability
    x = preprocess(imgInput)
    y = vgg_model(x)
    y = F.softmax(y, dim=1)
    
    prob_blocked = float(y.flatten()[0])
    # Get the current timestamp.
    timestamp = datetime.datetime.now()
    
    # Convert the frame to black and white, which can increase the efficiency of analysis.
    gray = cv2.cvtColor(imgInput, cv2.COLOR_BGR2GRAY)
    
    # Gaussian blur the frame to avoid misjudgment caused by noise.
    gray = cv2.GaussianBlur(gray, (21, 21), 0)

    # If the reference frame (background) has not been obtained, create a new one.
    if avg is None:
        avg = gray.copy().astype("float")
        return imgInput

    # background update.
    cv2.accumulateWeighted(gray, avg, 0.5)
    
    # Compare the difference between the new frame and the background.
    frameDelta = cv2.absdiff(gray, cv2.convertScaleAbs(avg))

    # Get the outline of the changed area in the frame.
    thresh = cv2.threshold(frameDelta, 5, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.dilate(thresh, None, iterations=2)
    
    x1,y1,w,h = cv2.boundingRect(thresh)
    x2 = x1+w
    y2 = y1+h
    start = (x1, y1)
    end = (x2, y2)
    colour = (255, 0, 0)
    thickness = 1
    
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)

    # There may be more than one area changes in the frame, so you need to use a for loop to get all the contours.
    for c in cnts:
        # The default here is 30, which is the threshold of the change area. We only analyze the area greater than 800.
        # The smaller the value, the more sensitive the motion detection, but it may also detect meaningless noise.
        if cv2.contourArea(c) < 30:
            continue


        # Save the current timestamp to mark the time when the change is detected.
        lastMovtionCaptured = timestamp

    # In order to avoid the high flickering frequency of drawing elements
    # within 0.5 seconds after the motion ends, elements stay.
    if (prob_blocked) >0.99 and (timestamp - lastMovtionCaptured).seconds >= 0.5 :
        cv2.putText(imgInput,"Apple found",(10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(128,255,0),1,cv2.LINE_AA)
    elif (prob_blocked) <0.01 and (timestamp - lastMovtionCaptured).seconds >= 0.5:
        cv2.putText(imgInput,"Orange found",(10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0,128,255),1,cv2.LINE_AA)
    
    # Return to the processed frame.
    return imgInput

In [8]:
#this runs the function
def execute(change):
    global image_widget
    image = change['new']
    image_widget.value = bgr8_to_jpeg(fruitDetect(image))
    
execute({'new': camera.value})
camera.unobserve_all()
camera.observe(execute, names='value')

In [22]:
#run this to stop updating
camera.unobserve(execute, names='value')

In [23]:
#stop camera instance
camera.stop()