# Whistle Detection with Continuous Kernel Convolutional Networks

In [None]:
from config import *
import os
from whistlenet.core.utils import project_root
import warnings
warnings.filterwarnings("ignore")

config_path = os.path.join(project_root(), "config","whistle_config.yaml")
config: Config = load_config(config_path)
torch_config: TorchConfig = config.torch
dataset_config: DatasetConfig = config.dataset
trainer_config: TrainerConfig = config.trainer
baseline_config: BaselineConfig = config.baseline
whistlenet_config: WhistlenetConfig = config.whistlenet

from config.enums import Optimizer

print(list(Optimizer))

In [2]:
import torch
import numpy as np
torch.manual_seed(torch_config.seed)
np.random.seed(torch_config.seed)

Dataset Preparation

In [None]:
from whistlenet.data import WhistleDataset
from whistlenet.core.utils import plot, NUM_FREQS
from whistlenet.core.utils.audio import SampleType

dataset = WhistleDataset(dataset_config)
dataset.summarize()
reshaped = dataset.train_data.reshape((dataset.train_data.data.shape[0], NUM_FREQS))
plot(reshaped, dataset.train_data.labels, SampleType)

In [None]:
from whistlenet.core.utils import Audio, project_root
projroot = project_root()
audio = Audio(name="KronosTest_RC22", datapath=f'{projroot}/data/whistle/raw/train_cut', labelpath=f'{projroot}/data/whistle/labels/train_cut')
audio.freq_plot()

Model Training

In [None]:
from whistlenet.models import WhistleNet, Baseline
from whistlenet.core import LightningTrainer

model = WhistleNet(in_channels=1, out_channels=1, config=whistlenet_config)
trainer = LightningTrainer(trainer_config)
trainer.fit(model,dataset)

Inference on test audio

In [None]:
import torch
import time
import lightning as L

audiolabels = audio.get_labels()
# checkpoint_path = os.path.join(trainer_config.ckpt_path, "epoch=7-step=14376.ckpt")
# model = WhistleNet.load_from_checkpoint(checkpoint_path, in_channels = 1, out_channels = 1, config=whistlenet_config)

def normalize(data: torch.Tensor, epsilon: float = 1e-6) -> torch.Tensor:
    min = data.min()
    max = data.max()
    data = (data - min) / (
        max - min + epsilon
    )  # Adding epsilon to avoid division by zero
    return data

def classify(window):
    with torch.inference_mode():
        start = time.time()
        confidence = model(window).item()
        print(f"Confidence: {confidence:.4f}")
        prediction = 1 if confidence > 0.5 else 0
        elapsed = time.time() - start
        print(f"Prediction: {prediction}, elapsed: {elapsed:.4f}")
        return prediction

In [None]:
for i in range(audio.frames):
    window = torch.from_numpy(audio.S[0,:,i].reshape(1,1,NUM_FREQS))
    window = normalize(window)
    print(f"frame {i}, time {audio.frame2time(i):.2f}")
    print(f"label: {audiolabels[0,i]}")
    classify(window)
    print()

## Export to ONNX

In [8]:
file = f"{projroot}/models/{model.name}/whistle.onnx"

torch.onnx.export(
    model,
    model.example_input[0],
    f = file,
    input_names=["input"],
    output_names=["output"],
)