# Check what the example code does in a megadetector env
https://pypi.org/project/megadetector/

In [1]:
# Hush ye annoyances
import warnings
warnings.filterwarnings('ignore')
print("=== Warnings Ignored ===")



In [2]:
# Single dependency library
!pip install --quiet megadetector > /dev/null 2>&1 # Put thy annoyances elsewhere
print("=== MegaDetector Installed ===")

=== MegaDetector Installed ===


In [6]:
# Data Handling
import pandas as pd
import numpy as np

# IO - getting files and images from MongoDB and S3
from pymongo import MongoClient
from kaggle_secrets import UserSecretsClient
from pathlib import Path

import os
import re
import shutil
import json
import time

# Megadetector
from megadetector.utils import url_utils, path_utils
from megadetector.visualization import visualization_utils as vis_utils
from megadetector.detection.run_detector_batch import load_and_run_detector_batch, write_results_to_file

# Pretty Views
from IPython.display import display
from IPython.display import JSON

print("=== Libraries Loaded ===")

=== Libraries Loaded ===


In [23]:
# Get the stored mongo uri secret
user_secrets = UserSecretsClient()
mongo_uri = user_secrets.get_secret("MONGO_URI")

# Connect to the MongoDB client
client = MongoClient(mongo_uri)
 
# Access the database and collection
db = client['test']
collection = db['cameratrapmedias'] 
 
# Query the collection to retrieve records with image URLs, metadata, and the first index of 'relativePath'
data = list(collection.aggregate([
    {
        '$project': {
            '_id': 0,
            'mediaID': 1,
            'publicURL': 1
        }
    },
    # { '$limit': 150 }
]))
 
# Convert the data to a pandas DataFrame for exploration
df = pd.DataFrame(data)

# Export the small array to a CSV file for preview
df.to_csv('ur_test_medias.csv', index=False)

# Preview df
display(df.info(), df.describe().loc['unique'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   mediaID    150 non-null    object
 1   publicURL  150 non-null    object
dtypes: object(2)
memory usage: 2.5+ KB


None

mediaID      150
publicURL    150
Name: unique, dtype: object

In [24]:
# planning to save all to working directory images
image_folder = Path('images')
image_folder.mkdir(parents=True, exist_ok=True)

# step 1: Create the mapping {url: target_file}
url_to_target_file = {
    row['publicURL']: str(image_folder / f"{row['mediaID']}.jpg")
    for _, row in df.iterrows()
}

# Step 2: Call the downloader
results = url_utils.parallel_download_urls(
    url_to_target_file,
    verbose=False,
    overwrite=False,
    n_workers=20,
    pool_type='thread'
)

# Step 3 (optional): inspect results
results_df = pd.DataFrame(results)
results_df['status'].value_counts()

Pool closed and joined for parallel URL downloads


status
success    149
skipped      1
Name: count, dtype: int64

In [26]:
# Try running from command line
os.environ["WANDB_MODE"] = "disabled"
!python -m megadetector.detection.run_detector_batch \
    MDV5A \
    "/kaggle/working/images" \
    "/kaggle/working/test_output.json" \
    --output_relative_filenames \
    --threshold 0.1 \
    --checkpoint_frequency 10000 \
    --checkpoint_path "/kaggle/working/md_checkpoint.json" \
    --quiet \
    --previous_results_file "/kaggle/working/test_output.json"
    # --resume_from_checkpoint /kaggle/working/md_checkpoint.json  # Uncomment if resuming

Bypassing download of already-downloaded file md_v5a.0.0.pt
Model v5a.0.0 available at /tmp/megadetector_models/md_v5a.0.0.pt
150 image files found in the input directory
Loaded previous results for 1 images from /kaggle/working/test_output.json
Based on previous results file, processing 149 of 150 images
The checkpoint file will be written to /kaggle/working/md_checkpoint.json
PyTorch reports 0 available CUDA devices
GPU available: False
2025-06-23 01:10:33.307123: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750641033.357290     349 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750641033.371963     349 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been regis