Refs:

https://gpmainpa.wordpress.com/2021/06/17/o-sauim-de-maos-douradas-muda-sua-vocalizacao/

https://www.kaggle.com/models/google/bird-vocalization-classifier

https://research.google/blog/whistles-songs-boings-and-biotwangs-recognizing-whale-vocalizations-with-ai/

https://research.google/blog/in-search-of-a-generalizable-method-for-source-free-domain-adaptation/

https://github.com/google-research/perch/blob/main/embed_audio.ipynb

https://arxiv.org/abs/2312.07439

https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=c72bb6c83db08e17e2736bc1d2cf439fe1f71905

https://colab.research.google.com/github/climatechange-ai-tutorials/bioacoustic-monitoring/blob/main/Agile_Modeling_for_Bioacoustic_Monitoring.ipynb

https://arxiv.org/pdf/2505.03071v4

https://arxiv.org/pdf/2212.09058v1

https://www.frontiersin.org/journals/bird-science/articles/10.3389/fbirs.2024.1380636/full

https://arxiv.org/pdf/2508.04665v1

file:///home/juan/Research/Articles/Ecological%20Informatics/2021%20-%20Applications%20of%20machine%20learning%20to%20ecological%20modelling.pdf


In [None]:
import shutil
import os

# Remove the directory and all its contents
if os.path.exists('embedding_vectors'):
    shutil.rmtree('embedding_vectors')
    print("Folder 'embedding_vectors' removed.")

In [None]:
import csv
import pandas as pd
import librosa
import gdown
import zipfile
from math import log, exp, comb
import scipy
from scipy import signal
from joblib import dump, load
from sklearn.svm import OneClassSVM
from scipy.special import expit  # sigmoid
from sklearn.model_selection import train_test_split
from sklearn.metrics import (make_scorer, f1_score, accuracy_score, confusion_matrix,
                             roc_auc_score, roc_curve, auc, precision_score,
                             recall_score, average_precision_score, precision_recall_curve)
from sklearn.utils import shuffle
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler # for PCA
from tqdm.notebook import trange, tqdm
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np

Auxiliar functions


In [None]:
def metrics_calculation(y_test, y_pred, decision_scores, label="Anomaly"):

    # print(f"True Labels (0=label, 1=Sauim):\n{y_test}")
    # print(f"Predicted Labels (0=label, 1=Sauim):\n{y_pred}")

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)

    # Plot Confusion Matrix
    plt.figure(figsize=(4.2, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', annot_kws={"size": 18},  cbar=False,
                xticklabels=[label, 'Sauim'], yticklabels=[label, 'Sauim'])
    # plt.ylabel('Actual Label', fontsize=18)
    # plt.xlabel('Predicted Label', fontsize=18)
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)  # rotation=0 keeps them horizontal
    # plt.title(label, fontsize=20)
    plt.savefig("ConfusionMatrix_"+label+".pdf", format='pdf', dpi=300, bbox_inches='tight')
    plt.show()

    accuracy = accuracy_score(y_test, y_pred) # Accuracy (may be misleading with imbalanced data)
    f1 = f1_score(y_test, y_pred, pos_label=1, average='weighted') # F1-score for the 'sauim' class (positive class)

    # Precision and Recall
    precision = precision_score(y_test, y_pred, pos_label=1)
    recall = recall_score(y_test, y_pred, pos_label=1)
    # ROC AUC Curve (useful if you want to choose a threshold based on decision_function)
    # For ROC AUC, 1 represents the positive class (sauim)
    fpr, tpr, thresholds = roc_curve(y_test, decision_scores, pos_label=1)
    roc_auc = auc(fpr, tpr)

    print(f"Acc: {accuracy:.2f}, Prec: {precision:.2f}, Rec: {recall:.2f}, F1: {f1:.2f}, roc_auc: {roc_auc:.2f}")
    return fpr, tpr, roc_auc, cm

Automatically download the necessary files

In [None]:
!wget https://github.com/juancolonna/Sauim/raw/main/embedding_vectors.zip -O embedding_vectors.zip

# Open and extract files
with zipfile.ZipFile('embedding_vectors.zip', 'r') as zip_ref:
    zip_ref.extractall('.')

print(f'Files extracted')

filtered_files = True

if filtered_files:
    sauim_vectors = np.load('embedding_vectors/sauim_vectors_filtered.npy')
    anurans_vectors = np.load('embedding_vectors/anurans_vectors_filtered.npy')
    background_vectors = np.load('embedding_vectors/background_vectors_filtered.npy')
    birds_vectors = np.load('embedding_vectors/birds_vectors_filtered.npy')
    anthrophony_vectors = np.load('embedding_vectors/anthrophony_vectors_filtered.npy')
    geophony_vectors = np.load('embedding_vectors/geophony_vectors_filtered.npy')
else:
    sauim_vectors = np.load('embedding_vectors/sauim_vectors.npy')
    anurans_vectors = np.load('embedding_vectors/anurans_vectors.npy')
    background_vectors = np.load('embedding_vectors/background_vectors.npy')
    birds_vectors = np.load('embedding_vectors/birds_vectors.npy')
    anthrophony_vectors = np.load('embedding_vectors/anthrophony_vectors.npy')
    geophony_vectors = np.load('embedding_vectors/geophony_vectors.npy')

In [None]:
sr = 32000
window_len = int(5*sr)
step = 1
hop_len = int(step*sr) # emulates an sliding window of 5 sec length and 1 sec step

One-class classification

In [None]:
X_train = np.array(sauim_vectors)

# For this example, we'll train on the 'normal_data'
X_train_normal, X_test_normal = train_test_split(X_train, test_size=0.3, random_state=42)

# Create labels
X_test_background_combined = np.concatenate([X_test_normal, background_vectors])

# Create labels, 1 means normal data (sauim)
y_train_normal = np.ones(X_train_normal.shape[0]).astype(int)
y_test_normal = np.ones(X_test_normal.shape[0]).astype(int)

y_test_background_combined = np.concatenate([y_test_normal, np.zeros(background_vectors.shape[0])]).astype(int)

print(f"Shape of X_train_normal (used for fitting): {X_train_normal.shape}")
print(f"Shape of X_test_normal (part of test set): {X_test_normal.shape}")
print(f"Shape of X_test_combined (combined test set): {X_test_background_combined.shape}")

One-class classifier train

In [None]:
# --- 3. Instantiate the One-Class SVM Model ---
# Key parameters:
#   nu: An upper bound on the fraction of training errors (outliers) and a lower
#       bound of the fraction of support vectors.
#       A common range is 0.01 to 0.1.
#   kernel: 'rbf'
#   gamma: 'auto' is `1 / n_features`
# nu with filter 0.022, 0.0345
# nu without filter 0.07

# Save the fitted model
if filtered_files:
    clf = OneClassSVM(kernel='rbf', nu=0.022, gamma='auto') # We might need to tune this with GridSearchCV/RandomizedSearchCV if performance isn't good.
    clf.fit(X_train_normal)
    dump(clf, "ocsvm_filtered.joblib")
else:
    clf = OneClassSVM(kernel='rbf', nu=0.07, gamma='auto') # We might need to tune this with GridSearchCV/RandomizedSearchCV if performance isn't good.
    clf.fit(X_train_normal)
    dump(clf, "ocsvm.joblib")

## Score on background noise

The .predict() method returns -1 for outliers/anomalies and 1 for inliers/normal points.

The .decision_function() method returns the signed distance to the hyperplane.

Positive values indicate inliers, negative values indicate outliers.

In [None]:
predictions = clf.predict(X_test_background_combined)
decision_scores = clf.decision_function(X_test_background_combined)

# Convert predictions to 0 for normal, 1 for anomaly to match y_true_combined
# Note: OCSVM predicts 1 for inliers (normal) and -1 for outliers (anomalies)
# So, we map 1 -> 1 and -1 -> 0
y_pred = np.where(predictions == -1, 0, 1)

# --- 6. Evaluate the Model ---
fpr1, tpr1, roc_auc_background, cm1 = metrics_calculation(y_test_background_combined,
                                                          y_pred,
                                                          decision_scores,
                                                          label="Background")

Permutation Importance (global)
Avalia a queda de desempenho quando embaralha cada feature. Fácil, rápido e robusto.

O que medir? Use AUC-ROC/PR sobre y_test_combined usando -decision_function(X) (maior = mais anômalo).

Prós: Simples, interpreta globalmente.

Contras: Requer rótulos (mesmo que sintéticos) e é global (não explica um ponto específico).

In [None]:
# def anomaly_score(X):  # maior = mais anômalo
#     return (clf.decision_function(X)).ravel()

# base_scores = anomaly_score(X_test_background_combined)
# base_auc = roc_auc_score(y_test_background_combined, base_scores)

# print(f"Baseline AUC: {base_auc:.2f}")

# rng = np.random.RandomState(42)
# n_repeats = 10
# importances = []

# for j in range(X_test_background_combined.shape[1]):
#     auc_drops = []
#     for _ in range(n_repeats):
#         Xp = X_test_background_combined.copy()
#         Xp[:, j] = shuffle(Xp[:, j], random_state=rng)
#         auc_p = roc_auc_score(y_test_background_combined, anomaly_score(Xp))
#         auc_drops.append(base_auc - auc_p)
#     importances.append(np.mean(auc_drops))

# importances = np.array(importances)
# rank = np.argsort(importances)[::-1]
# for k in rank[:10]:
#     print(f"feat {k}: ΔAUC = {importances[k]:.4f}")


## Anuran comparison

In [None]:
# X_test_anuran_anomaly = np.array(anurans_embedding_vectors)
X_test_anuran_combined = np.concatenate([X_test_normal, anurans_vectors])
y_test_anuran_anomaly = np.zeros(anurans_vectors.shape[0]).astype(int)
y_test_anuran_combined = np.concatenate([y_test_normal, y_test_anuran_anomaly]).astype(int)

print(f"Shape of X_test_anuran_combined (combined test set): {X_test_anuran_combined.shape}")

predictions = clf.predict(X_test_anuran_combined)
decision_scores = clf.decision_function(X_test_anuran_combined)

y_pred = np.where(predictions == -1, 0, 1)

fpr2, tpr2, roc_auc_anuran, cm2 = metrics_calculation(y_test_anuran_combined, y_pred,
                                               decision_scores,
                                               label="Anurans")

## bird comparison

In [None]:
X_test_birds_combined = np.concatenate([X_test_normal, birds_vectors])
y_test_birds_anomaly = np.zeros(birds_vectors.shape[0]).astype(int)
y_test_birds_combined = np.concatenate([y_test_normal, y_test_birds_anomaly]).astype(int)

print(f"Shape of X_test_birds_combined (combined test set): {X_test_birds_combined.shape}")

predictions = clf.predict(X_test_birds_combined)
decision_scores = clf.decision_function(X_test_birds_combined)

y_pred = np.where(predictions == -1, 0, 1)

fpr3, tpr3, roc_auc_birds, cm3 = metrics_calculation(y_test_birds_combined, y_pred,
                                               decision_scores,
                                               label="Birds")

### Anthrophony

In [None]:
X_test_anthrophony_combined = np.concatenate([X_test_normal, anthrophony_vectors])
y_test_anthrophony_anomaly = np.zeros(anthrophony_vectors.shape[0]).astype(int)
y_test_anthrophony_combined = np.concatenate([y_test_normal, y_test_anthrophony_anomaly]).astype(int)

print(f"Shape of X_test_anthrophony_combined (combined test set): {X_test_anthrophony_combined.shape}")

predictions = clf.predict(X_test_anthrophony_combined)
decision_scores = clf.decision_function(X_test_anthrophony_combined)

y_pred = np.where(predictions == -1, 0, 1)

fpr4, tpr4, roc_auc_anthrophony, cm4 = metrics_calculation(y_test_anthrophony_combined, y_pred,
                                               decision_scores,
                                               label="Anthrophony")

### Geophony

In [None]:
X_test_geophony_combined = np.concatenate([X_test_normal, geophony_vectors])
y_test_geophony_anomaly = np.zeros(geophony_vectors.shape[0]).astype(int)
y_test_geophony_combined = np.concatenate([y_test_normal, y_test_geophony_anomaly]).astype(int)

print(f"Shape of X_test_geophony_combined (combined test set): {X_test_geophony_combined.shape}")

predictions = clf.predict(X_test_geophony_combined)
decision_scores = clf.decision_function(X_test_geophony_combined)

y_pred = np.where(predictions == -1, 0, 1)

fpr5, tpr5, roc_auc_geophony, cm5 = metrics_calculation(y_test_geophony_combined, y_pred,
                                               decision_scores,
                                               label="Geophony")

## Comparison all datasets

In [None]:
# plt.figure(figsize=(5.4, 5))
# plt.plot(fpr1, tpr1, lw=1.5, label='Background (AUC = %0.2f)' % roc_auc_background)
# plt.plot(fpr2, tpr2, lw=1.5, label='Anurans (AUC = %0.2f)' % roc_auc_anuran)
# plt.plot(fpr3, tpr3, lw=1.5, label='Birds (AUC = %0.2f)' % roc_auc_birds)
# plt.plot(fpr4, tpr4, lw=1.5, label='Anthrophony (AUC = %0.2f)' % roc_auc_anthrophony)
# plt.plot(fpr4, tpr4, lw=1.5, label='Geophony (AUC = %0.2f)' % roc_auc_geophony)
# plt.plot([0, 1], [0, 1], color='navy', lw=1.5, linestyle='--')
# plt.xlim([-0.01, 1.0])
# plt.ylim([0.0, 1.01])
# plt.legend(loc="lower right", fontsize=10)
# plt.ylabel('True Positive Rate', fontsize=12)
# plt.xlabel('False Positive Rate', fontsize=12)
# plt.xticks(fontsize=12)
# plt.yticks(fontsize=12)  # rotation=0 keeps them horizontal
# plt.grid(True, linestyle='--', alpha=0.7)
# plt.savefig("ROC_curves.pdf", format='pdf', dpi=300, bbox_inches='tight')
# plt.show()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset

fig, ax = plt.subplots(figsize=(5.4, 5))

# --- curvas principais ---
ax.plot(fpr1, tpr1, lw=1.5, label=f'Background (AUC = {roc_auc_background:0.2f})')
ax.plot(fpr2, tpr2, lw=1.5, label=f'Anurans (AUC = {roc_auc_anuran:0.2f})')
ax.plot(fpr3, tpr3, lw=1.5, label=f'Birds (AUC = {roc_auc_birds:0.2f})')
ax.plot(fpr4, tpr4, lw=1.5, label=f'Anthrophony (AUC = {roc_auc_anthrophony:0.2f})')
ax.plot(fpr5, tpr5, lw=1.5, label=f'Geophony (AUC = {roc_auc_geophony:0.2f})')  # <-- use fpr5/tpr5 se for outra curva
ax.plot([0, 1], [0, 1], color='navy', lw=1.5, linestyle='--')

ax.set_xlim(-0.01, 1.0)
ax.set_ylim(0.0, 1.01)
ax.legend(loc="lower right", fontsize=10)
ax.set_ylabel('True Positive Rate', fontsize=12)
ax.set_xlabel('False Positive Rate', fontsize=12)
ax.tick_params(labelsize=12)
ax.grid(True, linestyle='--', alpha=0.7)
ax.set_axisbelow(True)  # grid atrás das linhas

# --- inset (zoom) ---
# Inset posicionado no topo central
axins = zoomed_inset_axes(ax, zoom=3.5, loc='upper center', borderpad=1.0)
axins.set_facecolor('none')  # evita "lavar" o gráfico principal
ax.grid(alpha=0.4)
# Replote as MESMAS curvas dentro do inset
axins.plot(fpr1, tpr1, lw=1.2)
axins.plot(fpr2, tpr2, lw=1.2)
axins.plot(fpr3, tpr3, lw=1.2)
axins.plot(fpr4, tpr4, lw=1.2)
axins.plot(fpr5, tpr5, lw=1.2)
axins.plot([0, 1], [0, 1], lw=1.0, linestyle='--')

# Defina a região a "dar zoom": canto superior direito (FPR ~1, TPR ~1)
x1, x2 = -0.01, 0.07   # ajuste conforme seu dado
y1, y2 = 0.93, 1.01
axins.set_xlim(x1, x2)
axins.set_ylim(y1, y2)

# Aparência do inset
axins.grid(False)
axins.tick_params(labelsize=9)
axins.set_xticklabels([])
axins.set_yticklabels([])

# Conecte o retângulo de zoom ao gráfico principal
# use cantos direitos do inset para linhas (1=UR, 4=LR)
mark_inset(ax, axins, loc1=2, loc2=3, fc="none", ec="0.5", lw=1.5, linestyle='--')

plt.savefig("ROC_curves_filtered.pdf", format='pdf', dpi=300, bbox_inches='tight')
plt.show()


## PCA

In [None]:
data_points = np.concatenate([X_train,
                              background_vectors,
                              anurans_vectors,
                              birds_vectors,
                              anthrophony_vectors,
                              geophony_vectors])
m = X_train.shape[0]
n = background_vectors.shape[0]
p = anurans_vectors.shape[0]
q = birds_vectors.shape[0]
r = anthrophony_vectors.shape[0]
s = geophony_vectors.shape[0]

scaler = StandardScaler()
data_points_scaled = scaler.fit_transform(data_points)

pca = PCA(n_components=2) # Reduzir para 2 componentes para plotagem 2D
X_train_pca = pca.fit_transform(data_points_scaled) # Ajustar e transformar

plt.figure(figsize=(10, 8), dpi=100)
sns.set_style("whitegrid") # Um estilo de plotagem agradável

# Plotar os dados de treino normais
plt.scatter(X_train_pca[0:m, 0], X_train_pca[0:m, 1],
            label='Sauim calls', alpha=0.8, s=50, color='blue', edgecolor='w')

# Plotar os dados de anomalia
plt.scatter(X_train_pca[m:n+m, 0], X_train_pca[m:n+m, 1],
            label='Background forest', alpha=0.6, s=100, color='red', marker='X', edgecolor='black')

plt.scatter(X_train_pca[m+n:n+m+p, 0], X_train_pca[m+n:n+m+p, 1],
            label='Anurans', alpha=0.6, s=100, color='green', marker='*', edgecolor='black')

plt.scatter(X_train_pca[m+n+p:n+m+p+q, 0], X_train_pca[m+n+p:n+m+p+q, 1],
            label='Birds', alpha=0.6, s=60, color='black', marker='o', edgecolor='black')

plt.scatter(X_train_pca[m+n+p+q:n+m+p+q+r, 0], X_train_pca[m+n+p+q:n+m+p+q+r, 1],
            label='Anthrophony', alpha=0.6, s=60, color='orange', marker='.', edgecolor='black')

plt.scatter(X_train_pca[m+n+p+q+r:n+m+p+q+r+s, 0], X_train_pca[m+n+p+q+r:n+m+p+q+r+s, 1],
            label='Geophony', alpha=0.6, s=60, color='pink', marker='^', edgecolor='black')

plt.xlabel(f'PC 1 (explained variance ratio {pca.explained_variance_ratio_[0]*100:.2f}%)',fontsize=18)
plt.ylabel(f'PC 2 (explained variance ratio {pca.explained_variance_ratio_[1]*100:.2f}%)',fontsize=18)
plt.legend(loc='lower right' , fontsize=14)
plt.grid(True, linestyle='--', alpha=0.7)
plt.axhline(0, color='gray', linewidth=0.8)
plt.axvline(0, color='gray', linewidth=0.8)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)  # rotation=0 keeps them horizontal
plt.savefig("PCA_filtered.pdf", format='pdf', dpi=300, bbox_inches='tight')
plt.show()

# --- Informações adicionais do PCA ---
print("Variância explicada por cada componente principal:")
print(pca.explained_variance_ratio_)
print(f"Variância acumulada explicada pelas 2 primeiras componentes: {pca.explained_variance_ratio_.sum()*100:.2f}%")

*   inside normal region (f(x) > 0)
*   on boundary (f(x) == 0)
*   outside/anomaly (f(x) < 0)

In [None]:
from matplotlib.lines import Line2D

Z = clf.decision_function(X_train)
X_pca = pca.transform(X_train)

# Labels: 1 = inside, 0 = boundary, -1 = outside
labels = np.where(Z > 0, 1, np.where(Z < 0, -1, 0))

# Define 3 colors: [-1, 0, 1] -> [red, black, green]
cmap = ListedColormap(['red', 'black', 'green'])
color_index = labels + 1  # -1→0, 0→1, 1→2

print(f"Outliers: {np.sum(labels == -1)}")

plt.figure(figsize=(10, 8), dpi=100)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=color_index, cmap=cmap, s=60, edgecolor='k')

# Add sample numbers for outliers (red points, label == -1)
for i, (x, y, lbl) in enumerate(zip(X_pca[:, 0], X_pca[:, 1], labels)):
    if lbl == -1:  # outlier
        print(i)
        plt.text(x + 0.01, y + 0.01, str(i), fontsize=12, color='darkred')

plt.xlabel("PCA 1",fontsize=18)
plt.ylabel("PCA 2",fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

# ---- Legend em vez de título ----
legend_elements = [
    Line2D([0], [0], marker='o', color='w', label='Outlier',
           markerfacecolor='red', markeredgecolor='k', markersize=8),
    Line2D([0], [0], marker='o', color='w', label='Normal',
           markerfacecolor='green', markeredgecolor='k', markersize=8),
]
plt.legend(handles=legend_elements, loc='best', frameon=True, fontsize=14)

# (removido) plt.title("One-Class SVM regions: red=outlier, black=boundary, green=normal")

plt.savefig("PCA_detections_filtered.pdf", format='pdf', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# for i in [i/10000 for i in range(100, 510)]:
#     clf = OneClassSVM(kernel='rbf', nu=i, gamma='auto') # We might need to tune this with GridSearchCV/RandomizedSearchCV if performance isn't good.
#     clf.fit(X_train_normal)

#     Z = clf.decision_function(X_train)
#     X_pca = pca.transform(X_train)

#     # Labels: 1 = inside, 0 = boundary, -1 = outside
#     labels = np.where(Z > 0, 1, np.where(Z < 0, -1, 0))

#     # Define 3 colors: inside, boundary, outside
#     # For example: green (inside), black (boundary), red (outside)
#     cmap = ListedColormap(['red', 'black', 'green'])  # order matches [-1, 0, 1]

#     # Map labels to index in the colormap
#     color_index = labels + 1  # so: -1 → 0, 0 → 1, 1 → 2
#     print(f"i:{i}\t Outliers: {np.sum(labels == -1)}")