In [1]:
from aiy.vision.inference import CameraInference
from aiy.vision.models import object_detection
from aiy.vision.streaming.server import StreamingServer
from aiy.vision.streaming import svg
from aiy.leds import Leds, Color
from gpiozero import Servo
from aiy.pins import PIN_A

from picamera import PiCamera
from IPython.display import Image, display, clear_output

import contextlib
import time

### Animal Detector

The network we use not can detect prople, dogs and cats. Remember if your joy detector is running, you need to turn it off using the commands

```
sudo systemctl stop joy_detection_demo.service
```

I added some code here that is called to create the overlay - basically the box around the objects and the labels above. You can customize it and/or add information you want to overlay on the camera feed.

In [15]:
def svg_overlay(objects, frame_size):
    
    labels = ['Background','Person', 'Cat', 'Dog']

    width, height = frame_size
    doc = svg.Svg(width=width, height=height)

    for obj in objects:
        x, y, w, h = obj.bounding_box
        doc.add(svg.Rect(x=int(x), y=int(y), width=int(w), height=int(h), rx=10, ry=10,
                         fill_opacity=0.1,style='fill:white;stroke:white;stroke-width:4px'))

        doc.add(svg.Text(labels[obj.kind], x=x, y=y-12, fill='red', font_size=50))

    return str(doc)

#### Main loop

Here is our main loop based on the code we used last time. Look at the comments on what was changed. Basically we removed the part saving the picture and adjust the servo value to the joy score each frame. We also added the streaming back in, to while this cell runs, you can connect to http://orcspi-vis.local:4664 and see the stream.

In [19]:
with contextlib.ExitStack() as stack:
    leds   = stack.enter_context(Leds())
    camera = stack.enter_context(PiCamera(sensor_mode=4, resolution=(820, 616)))

    # This starts and runs the streaming of the camera
    server = stack.enter_context(StreamingServer(camera))  

    print ("Loading model - hold on ..")
        
    # Do inference on VisionBonnet
    with CameraInference(object_detection.model()) as inference:
        try:   
            for result in inference.run():
                leds.update(Leds.rgb_on(Color.GREEN))
                objects = object_detection.get_objects(result, 0.5, (0,0))
                
                # This sends the overlay (boxes) to add to the camera stream
                server.send_overlay(svg_overlay(objects, (result.width, result.height)))

                clear_output(wait=True)                 
                for i, obj in enumerate(objects):
                    print('Object #%d: %s' % (i, obj))
                                                              
        except KeyboardInterrupt:
            print("Interrupted ..")
            
    leds.update(Leds.rgb_off())
    print("Done")

Loading model - hold on ..
Object #0: kind=PERSON(1), score=0.506103, bbox=(298, 188, 1319, 1036)
Object #0: kind=PERSON(1), score=0.506103, bbox=(299, 192, 1311, 1037)
Object #0: kind=PERSON(1), score=0.523177, bbox=(300, 191, 1317, 1037)
Object #0: kind=PERSON(1), score=0.578210, bbox=(182, 207, 1434, 1009)
Object #0: kind=PERSON(1), score=0.550366, bbox=(292, 198, 1321, 1029)
Object #0: kind=PERSON(1), score=0.558086, bbox=(286, 191, 1327, 1033)
Object #0: kind=PERSON(1), score=0.503418, bbox=(177, 207, 1429, 1008)
Object #0: kind=PERSON(1), score=0.537284, bbox=(190, 211, 1422, 1005)
Object #0: kind=PERSON(1), score=0.511961, bbox=(330, 194, 1291, 1037)
Object #0: kind=PERSON(1), score=0.573081, bbox=(184, 212, 1430, 1005)
Object #0: kind=PERSON(1), score=0.583500, bbox=(184, 210, 1438, 1007)
Object #0: kind=PERSON(1), score=0.652241, bbox=(206, 216, 1415, 1002)
Object #0: kind=PERSON(1), score=0.653459, bbox=(207, 223, 1414, 995)
Object #0: kind=PERSON(1), score=0.690037, bbox=(20