# RunModel\n\nThis notebook embeds all WAV files in a folder into a Hoplite DB, loads a previously-trained AGILE linear classifier, and writes an inference CSV.\n

In [None]:
# @title Imports\nfrom etils import epath\nimport os\n\nfrom perch_hoplite.agile import audio_loader\nfrom perch_hoplite.agile import classifier\nfrom perch_hoplite.agile import colab_utils\nfrom perch_hoplite.agile import embed\nfrom perch_hoplite.agile import source_info\nfrom perch_hoplite.agile.classifier import LinearClassifier\nfrom perch_hoplite.zoo import model_configs\n

In [None]:
# @title Configuration { vertical-output: true }\n\n# -----------------------------\n# PATHS (Local vs Colab)\n# -----------------------------\n\n# For running locally (Rod)\nbase_agile_path = epath.Path(\n    '/Users/Rodrigo/Library/CloudStorage/GoogleDrive-royanedel@marfutura.org/Mi unidad/Agile'\n)\n\n# For running in Colab\n# from google.colab import drive\n# drive.mount('/content/drive')\n# base_agile_path = epath.Path('/content/drive/Shareddrives/MAR FUTURA/Agile')\n\n# -----------------------------\n# USER SETTINGS\n# -----------------------------\n\n# Folder containing audio to classify.\ninput_audio_dir = str(base_agile_path / 'Data2')  # @param {type:'string'}\ndataset_fileglob = '*.[wW][aA][vV]'  # @param {type:'string'}\ndataset_name = 'RunDataset'  # @param {type:'string'}\n\n# Where to store the embedding DB for this run. Use a new folder to keep runs isolated.\ndb_path = str(base_agile_path / 'RunDB')  # @param {type:'string'}\n\n# Saved classifier created in CreateModel.ipynb (LinearClassifier.save).\nclassifier_path = str(base_agile_path / 'Data' / 'agile_classifier_v2.pt')  # @param {type:'string'}\n\n# Output CSV path.\noutput_csv_filepath = str(base_agile_path / 'RunResults' / 'inference.csv')  # @param {type:'string'}\n\n# Embedding model choice MUST match how you embedded when you trained the classifier.\nmodel_choice = 'perch_8'  #@param['perch_v2','perch_8', 'humpback', 'multispecies_whale', 'surfperch', 'birdnet_V2.3']\n\n# Optional sharding (keep consistent with training if possible).\nuse_file_sharding = True  # @param {type:'boolean'}\nshard_length_in_seconds = 5  # @param {type:'number'}\n\n# Inference threshold. Higher => fewer detections.\nlogit_threshold = 2  # @param\nlabels = None  # @param\n\n# Create output folder.\nepath.Path(output_csv_filepath).parent.mkdir(parents=True, exist_ok=True)\nepath.Path(db_path).mkdir(parents=True, exist_ok=True)\n\naudio_glob = source_info.AudioSourceConfig(\n    dataset_name=dataset_name,\n    base_path=input_audio_dir,\n    file_glob=dataset_fileglob,\n    min_audio_len_s=1.0,\n    target_sample_rate_hz=-2,\n    shard_len_s=float(shard_length_in_seconds) if use_file_sharding else None,\n)\n\nconfigs = colab_utils.load_configs(\n    source_info.AudioSources((audio_glob,)),\n    db_path,\n    model_config_key=model_choice,\n    db_key='sqlite_usearch',\n)\n\n# Correcting the model handle for surfperch\nif model_choice == 'surfperch':\n  configs.model_config.model_config.tfhub_path = 'google/surfperch/1'\n\nprint('input_audio_dir:', input_audio_dir)\nprint('db_path:', db_path)\nprint('classifier_path:', classifier_path)\nprint('output_csv_filepath:', output_csv_filepath)\n

In [None]:
#@title Embed folder, load classifier, and run inference { vertical-output: true }\n\n# 1) Connect/create DB\ndb = configs.db_config.load_db()\nprint('Initialized DB located at', configs.db_config.db_config.db_path)\n\n# 2) Embed all files in the folder\nprint(f'Embedding dataset: {audio_glob.dataset_name}')\nworker = embed.EmbedWorker(\n    audio_sources=configs.audio_sources_config,\n    db=db,\n    model_config=configs.model_config,\n)\nworker.process_all(target_dataset_name=audio_glob.dataset_name)\nprint('Embedding complete, total embeddings:', db.count_embeddings())\n\n# 3) Load embedding model (needed for audio loader in some workflows; kept for parity)\ndb_model_config = db.get_metadata('model_config')\nembed_config = db.get_metadata('audio_sources')\nmodel_class = model_configs.get_model_class(db_model_config.model_key)\nembedding_model = model_class.from_config(db_model_config.model_config)\naudio_sources = source_info.AudioSources.from_config_dict(embed_config)\nwindow_size_s = getattr(embedding_model, 'window_size_s', 5.0)\n_ = audio_loader.make_filepath_loader(\n    audio_sources=audio_sources,\n    window_size_s=window_size_s,\n    sample_rate_hz=embedding_model.sample_rate,\n)\n\n# 4) Load trained classifier and write inference CSV\nlinear_classifier = LinearClassifier.load(classifier_path)\nclassifier.write_inference_csv(\n    linear_classifier,\n    db,\n    output_csv_filepath,\n    logit_threshold,\n    labels=labels,\n)\nprint('Done. Wrote:', output_csv_filepath)\n