In [None]:
from google.colab import drive
import zipfile
import os

# Mount Google Drive
drive.mount('/content/drive')

# Set correct path to your ZIP file in Drive
zip_path = "/content/drive/MyDrive/whale.zip"  # <-- update if needed
extract_path = "/content/whale_dataset"

# Unzip dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# Preview folder structure
for root, dirs, files in os.walk(extract_path):
    print(f"📁 {root}")
    for d in dirs: print("  ├─", d)
    break


Mounted at /content/drive
📁 /content/whale_dataset
  ├─ test
  ├─ train
  ├─ val


In [None]:
# Install the PANNs inference library
!pip install panns-inference librosa torchaudio --quiet

# Load AudioTagging model from panns-inference
import torch
from panns_inference import AudioTagging

at = AudioTagging(checkpoint_path=None, device='cuda' if torch.cuda.is_available() else 'cpu')


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import librosa
import numpy as np

def extract_embedding(file_path, at_model, sr=32000, min_duration=1.0):
    waveform, _ = librosa.load(file_path, sr=sr, mono=True)

    min_length = int(sr * min_duration)
    if len(waveform) < min_length:
        # Pad with zeros if too short
        pad_width = min_length - len(waveform)
        waveform = np.pad(waveform, (0, pad_width), mode='constant')

    waveform = waveform[None, :]  # Add batch dimension
    _, embedding = at_model.inference(waveform)
    return embedding.squeeze()



In [None]:
import os
import glob

def process_dataset(folder_path, at_model):
    X, y = [], []
    classes = sorted(os.listdir(folder_path))
    class_to_idx = {cls: idx for idx, cls in enumerate(classes)}

    for cls in classes:
        class_folder = os.path.join(folder_path, cls)
        for wav_file in glob.glob(os.path.join(class_folder, "*.wav")):
            embedding = extract_embedding(wav_file, at_model)
            X.append(embedding)
            y.append(class_to_idx[cls])

    return np.array(X), np.array(y), class_to_idx


In [None]:
train_dir = "/content/whale_dataset/train"
val_dir = "/content/whale_dataset/val"
test_dir = "/content/whale_dataset/test"

X_train, y_train, class_map = process_dataset(train_dir, at)
X_val, y_val, _ = process_dataset(val_dir, at)
X_test, y_test, _ = process_dataset(test_dir, at)

print("✅ Embeddings extracted")
print("Train:", X_train.shape)
print("Val:", X_val.shape)
print("Test:", X_test.shape)


✅ Embeddings extracted
Train: (154, 2048)
Val: (34, 2048)
Test: (35, 2048)


In [None]:
!pip install xgboost --quiet

from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

xgb = XGBClassifier(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.9,
    colsample_bytree=0.8,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42
)

xgb.fit(X_train, y_train)

# Validation
y_val_pred = xgb.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, y_val_pred))


Parameters: { "use_label_encoder" } are not used.



Validation Accuracy: 0.9117647058823529


In [None]:
# Test
y_test_pred = xgb.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))

target_names = [k for k, v in sorted(class_map.items(), key=lambda x: x[1])]
print(classification_report(y_test, y_test_pred, target_names=target_names))



Test Accuracy: 0.8857142857142857
                precision    recall  f1-score   support

common-dolphin       1.00      0.86      0.92         7
humpback-whale       0.70      1.00      0.82         7
  killer-whale       1.00      0.50      0.67         6
   pilot-whale       0.88      1.00      0.93         7
   sperm-whale       1.00      1.00      1.00         8

      accuracy                           0.89        35
     macro avg       0.92      0.87      0.87        35
  weighted avg       0.91      0.89      0.88        35



In [None]:
import joblib
joblib.dump(xgb, "whale_xgb_model.pkl")
joblib.dump(class_map, "class_map.pkl")

['class_map.pkl']

In [None]:
from google.colab import files
files.download("whale_xgb_model.pkl")
files.download("class_map.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>