# Webapp sound event detection

<img src="https://github.com/retkowsky/sound-event-detection/blob/main/SED.png?raw=true" width=800>

In [1]:
# %pip install panns_inference

In [2]:
#%pip install gradio

In [3]:
import datetime
import gradio as gr
import librosa
import matplotlib.pyplot as plt
import numpy as np
import os
import panns_inference
import plotly.graph_objects as go
import plotly.io as pio
import sys
import torch

from PIL import Image
from panns_inference import AudioTagging, SoundEventDetection, labels

In [4]:
sys.version

'3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]'

In [5]:
print("Today:", datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S'))

Today: 26-Jul-2023 09:27:27


In [6]:
def get_device() -> str:
    """
    Get device type - either 'cuda' if GPU is available or 'cpu' otherwise.
    """
    import torch
    
    if torch.cuda.is_available():
        device = "cuda"
        print("GPU is available")
    else:
        device = "cpu"
        print("No GPU")
    
    return device

device = get_device()

No GPU


## Gradio webapp

In [7]:
def sed_function(audio_file: str) -> Image:
    """
    Plot sound events for the gradio webapp.

    Parameters:
        audio_file (str): Path to the audio file.

    Returns:
        PIL.Image: Image with the sound events plot.
    """
    # Load audio file
    (audio, _) = librosa.core.load(audio_file, sr=32000, mono=True)

    # Sound Event Detection
    sed = SoundEventDetection(
        checkpoint_path=None,
        device=device,
        interpolate_mode="nearest",
    )
    framewise_output = sed.inference(audio[None, :])[0]
    classwise_output = np.max(framewise_output, axis=0)
    idxes = np.argsort(classwise_output)[::-1]
    idxes = idxes[0:5]
    ix_to_lb = {i: label for i, label in enumerate(labels)}

    # Create the plot
    fig = go.Figure()

    for idx in idxes:
        fig.add_trace(
            go.Scatter(
                x=list(range(len(framewise_output))),
                y=framewise_output[:, idx],
                mode="lines",
                fill="tozeroy",
                name=ix_to_lb[idx],
            )
        )

    # Set ticktext based on the length of framewise_output
    if len(framewise_output) > 10000:
        space = 1000
    elif len(framewise_output) > 5000:
        space = 500
    else:
        space = 100
    ticktext = list(range(0, len(framewise_output), space))
    ticktext = [x / 100 for x in ticktext]

    # Update the plot layout
    fig.update_layout(
        title="Sound event detection",
        xaxis_title="Seconds",
        yaxis_title="Probability",
        showlegend=True,
        legend_title="Events",
        xaxis=dict(
            tickmode="array",
            tickvals=list(range(0, len(framewise_output), space)),
            ticktext=ticktext,
        ),
        yaxis=dict(range=[0, 1], rangemode="tozero"),
    )

    output_file = "sed.jpg"
    fig.write_image(output_file)
    img = Image.open(output_file)

    htmlfile = "sed.html"
    fig.write_html(htmlfile)

    return img

In [8]:
logo = "https://github.com/retkowsky/sound-event-detection/blob/main/SED.png?raw=true"
logo_image = "<center> <img src= {} width=600px></center>".format(logo)
title = "Sound Event Detection"

examples = [
    "audio/call.wav",
    "audio/city.wav",
    "audio/phone.wav",
]

inputs = gr.Audio(type="filepath", label="Your sound file")
outputs = gr.Image(type="pil", label="Sound Event Detection")

sed_webapp = gr.Interface(
    sed_function,
    inputs,
    outputs,
    description=logo_image,
    title=title,
    examples=examples,
    theme="abidlabs/Lime",  # https://huggingface.co/spaces/gradio/theme-gallery
)

sed_webapp.launch(share=True)

Downloading (…)_schema%401.0.0.json:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://51798e0bfc95168300.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)






Checkpoint path: /home/azureuser/panns_data/Cnn14_DecisionLevelMax.pth
Using CPU.
