<a href="https://colab.research.google.com/github/noahdanieldsouza/PAM-classification/blob/main/run_classifier_on_embeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torchaudio numpy

In [None]:
!pip install git+https://github.com/google-research/perch-hoplite.git@782acd0e409eb27df51a695de4cb6608dae0db25

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Correct way to copy a folder:
!cp -r  /content/drive/MyDrive/classifier_training/DB /content

In [None]:
import os

from matplotlib import pyplot as plt
import numpy as np

from perch_hoplite.agile import audio_loader
from perch_hoplite.agile import classifier
from perch_hoplite.agile import classifier_data
from perch_hoplite.agile import embedding_display
from perch_hoplite.agile import source_info
from perch_hoplite.db  import brutalism
from perch_hoplite.db import score_functions
from perch_hoplite.db  import search_results
from perch_hoplite.db import sqlite_usearch_impl
from perch_hoplite.zoo import model_configs

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
#@title Load model and connect to database. { vertical-output: true }

#@markdown Location of database containing audio embeddings.
db_path = '/content/DB'  #@param {type:'string'}
#@markdown Identifier (eg, name) to attach to labels produced during validation.
annotator_id = 'linnaeus'  #@param {type:'string'}

db = sqlite_usearch_impl.SQLiteUsearchDB.create(db_path)
db_model_config = db.get_metadata('model_config')
embed_config = db.get_metadata('audio_sources')
model_class = model_configs.get_model_class(db_model_config.model_key)
embedding_model = model_class.from_config(db_model_config.model_config)
audio_sources = source_info.AudioSources.from_config_dict(embed_config)
if hasattr(embedding_model, 'window_size_s'):
  window_size_s = embedding_model.window_size_s
else:
  window_size_s = 5.0
audio_filepath_loader = audio_loader.make_filepath_loader(
    audio_sources=audio_sources,
    window_size_s=window_size_s,
    sample_rate_hz=embedding_model.sample_rate,
)

print("Embeddings in DB:", len(db.get_embedding_ids()))
print(db.get_classes())

In [None]:

for dataset in db.get_dataset_names():
  print(f'\nDataset \'{dataset}\':')
  print('\tnum embeddings: ', db.get_embeddings_by_source(dataset, source_id=None).shape[0])

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
#@title Load model and connect to database. { vertical-output: true }

#@markdown Location of database containing audio embeddings.
db_path = '/content/drive/MyDrive/sept26/DB'  #@param {type:'string'}
#@markdown Identifier (eg, name) to attach to labels produced during validation.
annotator_id = 'linnaeus'  #@param {type:'string'}

db = sqlite_usearch_impl.SQLiteUsearchDB.create(db_path)
db_model_config = db.get_metadata('model_config')


In [None]:

import os
import torch
import torchaudio
import numpy as np
import csv
import gc
import tempfile

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

from perch_hoplite.agile import colab_utils, embed, source_info
from perch_hoplite.db import sqlite_usearch_impl
from perch_hoplite.zoo import model_configs
from perch_hoplite.agile.classifier import LinearClassifier

# --- Paths ---
db_path = '/content/drive/MyDrive/sept26/DB'
classifier_path = '/content/drive/MyDrive/full_labeled_fish/DB/agile_classifier_v2.pt'


# --- Load DB + model ---
db = sqlite_usearch_impl.SQLiteUsearchDB.create(db_path)
db_model_config = db.get_metadata('model_config')
model_class = model_configs.get_model_class(db_model_config['model_key'])
embedding_model = model_class.from_config(db_model_config['model_config'])
embedding_ids = db.get_embedding_ids()
print(f"✅ loaded {len(embedding_ids)} embeddings")
id = embedding_ids[0]
vector = db.get_embedding(id)
print(f"✅ vector {vector}")

# --- Load classifier ---
classifier = LinearClassifier.load(classifier_path)
class_names = classifier.classes
print("✅ Loaded classifier with classes:", class_names)

predictions = []

for emb_id in embedding_ids:
    print (f"processing embedding: {emb_id}")
    vector = db.get_embedding(emb_id)
    logits = classifier(vector)
    #print(f"✅ logits: {logits}")
    probs = np.exp(logits) / np.sum(np.exp(logits))
    #print(f"✅ probs: {probs}")
    pred_idx = np.argmax(probs)
    pred_label = class_names[pred_idx]
    confidence = probs[pred_idx]
    if confidence > .6:
      print(f"✅ Predicted class: {pred_label} with confidence {confidence:.4f}")
      print(db.get_embedding_source(emb_id))
      predictions.append([db.get_embedding_source(emb_id), pred_label, confidence])



In [None]:
predictions.sort(key=lambda x: x[2], reverse=True)

for x in predictions:
    if x[1] != "boat":
        print(x[0].source_id, x[1], x[2])
