In [14]:
import sys
import os
import torch
from backend.vgg_model import VGGSmall

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

MODEL_PATH = "../model/vgg_frog_model.pth"   # <<--- REQUIRED

model = VGGSmall()
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model = model.to(device)
model.eval()

# Go up one directory from /notebooks into repo root
repo_root = os.path.abspath("..")
if repo_root not in sys.path:
    sys.path.append(repo_root)

print("PYTHONPATH:", sys.path)

Using device: cpu
PYTHONPATH: ['c:\\Users\\prest\\.conda\\envs\\EchelonAcoustics312\\python312.zip', 'c:\\Users\\prest\\.conda\\envs\\EchelonAcoustics312\\DLLs', 'c:\\Users\\prest\\.conda\\envs\\EchelonAcoustics312\\Lib', 'c:\\Users\\prest\\.conda\\envs\\EchelonAcoustics312', '', 'c:\\Users\\prest\\.conda\\envs\\EchelonAcoustics312\\Lib\\site-packages', 'c:\\Users\\prest\\.conda\\envs\\EchelonAcoustics312\\Lib\\site-packages\\win32', 'c:\\Users\\prest\\.conda\\envs\\EchelonAcoustics312\\Lib\\site-packages\\win32\\lib', 'c:\\Users\\prest\\.conda\\envs\\EchelonAcoustics312\\Lib\\site-packages\\Pythonwin', 'c:\\Users\\prest\\OneDrive\\SAIT\\EchelonAcoustics']


In [15]:
import os
import torch
import librosa
import numpy as np
import json
import matplotlib.pyplot as plt
from backend.vgg_model import VGGSmall

MODEL_FILE = "../model/vgg_frog_model.pth"
LABEL_MAP_FILE = "../model/label_mapping.json"
TEST_DIR = "../data/frog_test_clips/"
TARGET_SR = 22050
N_MELS = 128

#### Load Model + Labels

In [16]:
#model = torch.load(MODEL_FILE, map_location="cpu")
model = VGGSmall()
model.load_state_dict(torch.load(MODEL_FILE, map_location="cpu"))
model.eval()

with open(LABEL_MAP_FILE, "r") as f:
    label_map = json.load(f)

#### Run Inference on Test Clips

In [17]:
def predict(path):
    y, sr = librosa.load(path, sr=TARGET_SR)
    y = y[:22050*5]  # trim to 5 seconds

    # Create mel spectrogram
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS)
    S_dB = librosa.power_to_db(S, ref=np.max)

    # --- CRITICAL FIX: match training spectrogram size (128x128) ---
    FIXED_FRAMES = 128
    current_frames = S_dB.shape[1]

    if current_frames < FIXED_FRAMES:
        pad = FIXED_FRAMES - current_frames
        S_dB = np.pad(S_dB, ((0, 0), (0, pad)), mode="constant")
    else:
        S_dB = S_dB[:, :FIXED_FRAMES]

    # Convert to tensor with correct shape
    S_dB = torch.tensor(S_dB).unsqueeze(0).unsqueeze(0).float().to(device)

    with torch.no_grad():
        logits = model(S_dB)
        probs = torch.softmax(logits, dim=1)
        conf, pred = torch.max(probs, dim=1)

    return label_map[str(int(pred.item()))], float(conf.item())

#### Test All Files

In [18]:
for f in os.listdir(TEST_DIR):
    path = os.path.join(TEST_DIR, f)
    species, conf = predict(path)
    print(f"{f}: {species} ({conf:.3f})")

bufo_boreas_western_toad.wav: boreal_toad (1.000)
bufo_cognatus_great_plains_toad.wav: no_frog (0.996)
bufo_hemiophrys_canadian_toad.wav: boreal_toad (0.555)
rana_pipiens_northern_leopard_frog.wav: chorus_frog (1.000)
rana_sylvatica_wood_frog.wav: boreal_toad (1.000)
