# Visual Wake Word Example



### Import Necessary Libraries

This cell imports essential libraries: Gradio for the interface, OpenCV for image processing, the Akida library for model execution, and NumPy and Plotly for data handling and visualization.

In [None]:
import gradio as gr
import cv2

from cnn2snn import set_akida_version, AkidaVersion
import akida

import numpy as np

import time

import plotly.graph_objects as go

### Gauge Creation Function

Defines a function using Plotly to create a gauge visualization for metrics such as frames per second during image classification.

In [None]:
def create_gauge(value):
    fig = go.Figure(go.Indicator(
        mode="gauge+number",
        value=value,
        gauge={'axis': {'range': [0, 30]}},
        domain={'x': [0, 1], 'y': [0, 1]},
    ))
    fig.update_layout(width=400, height=300)
    return fig

### Softmax Function for Arrays

Implements a softmax function to convert model outputs into probability distributions.

In [None]:
# Softmax for an array of values
def softmaxArray(values):
    # Assuming array shape is (1, 1, 1, x), flatten to get the values
    values = values.ravel()
    exp_values = np.exp(values)
    sum_exp = np.sum(exp_values)
    softmax_values = exp_values / sum_exp
    return softmax_values

### Image Configuration and Output Decoding

Sets up image parameters and label names, and includes a function to preprocess images and decode predictions into readable labels.

In [None]:
image_x = 96
image_y = 96
image_z = 3
labels = ["no person", "person"]
def decodeOutput(inp):
        global akida_model
        inp = cv2.resize(inp, (image_x, image_y))
        inp = inp.reshape((-1, image_x, image_y, image_z))
        timer_start = time.time()
        predictions = softmaxArray(akida_model.predict(inp))
        frame_time = time.time() - timer_start
        fps = 1 / frame_time if frame_time > 0 else 0
        confidences = {labels[i]: predictions[i] for i in range(len(predictions))}

        return confidences, fps

### Image Classification Wrapper

A function that processes an image, decodes it, and returns classification confidences with a gauge visualization of processing speed.

In [None]:
def classify_image(inp):

  confidences, fps = decodeOutput(inp)

  return confidences, create_gauge(round(fps, 2))

### Load Pre-trained Model

Loads a pre-trained quantized model for visual wake word detection via `akida_models`.

In [None]:
from akida_models.model_io import load_model
akida_model = load_model("models/tenn_spatiotemporal_eye_buffer_i8_w8_a8.fbz")
akida_model.summary()

Map the `akida_model` onto the device.

In [None]:
with set_akida_version(AkidaVersion.v2):
            devices = akida.devices()
            if len(devices) > 0:
                print(f'Available devices: {[dev.desc for dev in devices]}')
                device = devices[0]
                print(device.version)
                try:
                    akida_model.map(device)
                    print(f"Mapping to Akida device {device.desc}.")
                    mappedDevice = device.version
                except Exception as e:
                    print("Model not compatible with FPGA. Running on CPU.")
                    mappedDevice = "CPU"
            else:
                print("No Akida devices found, running on CPU.")
                mappedDevice = "CPU"

### Gradio Interface Setup

Creates a Gradio interface to capture webcam images, display device information, and stream classified images using the Akida model. The interface shows live predictions and frame processing speeds.

In [None]:
theme = gr.themes.Base(
    text_size="sm",
    spacing_size="sm",
    radius_size="sm",
)

with gr.Blocks(
    title="Brainchip",
    fill_width=True,
    fill_height=True,
    delete_cache=[180, 600],
    theme=theme
) as demo:
    gr.Markdown("""
        <h1 style="text-align: center;">Akida Cloud</h1>
        <br>
        """)
    with gr.Row():
        gr.Markdown("## Image Classification")
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(sources=["webcam"], type="numpy")
        with gr.Column():
            gr.Markdown(f"""Device: {mappedDevice}""")
            output_label = gr.Label(num_top_classes=3)
            plot = gr.Plot(label="Frames per second")
        dep = input_img.stream(classify_image, [input_img], [output_label, plot],
                                time_limit=30, stream_every=0.1, concurrency_limit=30)        


if __name__ == "__main__":
    demo.launch()