# Benchmarking MongoDB

In this notebook we run some benchmarks with MongoDB.

In [None]:
results = {}

In [None]:
# Ensure MongoDB is running
! cd .. && docker compose up -d

In [None]:
# Ensure the database is empty
import pymongo

client = pymongo.MongoClient()
client.drop_database("boom")

In [None]:
# Create collections and indexes
database = client.get_database("boom")
ztf_collection = database.get_collection("ztf_alerts")
ned_collection = database.get_collection("ned_alerts")

In [None]:
# Load ZTF alerts into the database
import glob
import fastavro
from tqdm.auto import tqdm

ztf_avro_fpaths = glob.glob("../data/ztf_public_20250614/*.avro")

print(f"Found {len(ztf_avro_fpaths)} ZTF alerts")

print("Converting to documents")
documents = []
for alert_avro_fpath in tqdm(ztf_avro_fpaths):
    with open(alert_avro_fpath, "rb") as f:
        reader = fastavro.reader(f)
        for alert in reader:
            alert_fmt = {
                "object_id": alert["objectId"],
                "cand_id": alert["candid"],
                "candidate": alert["candidate"],
                # Coordinates are a GeoJSON object
                "coordinates": {
                    "type": "Point",
                    "coordinates": [
                        alert["candidate"]["ra"],
                        alert["candidate"]["dec"],
                    ],
                },
            }
            documents.append(alert_fmt)

In [None]:
# Insert all the alerts into the database
print("Inserting ZTF alerts into the database")

import time

t0 = time.time()
ztf_collection.insert_many(documents)

t1 = time.time()

results["ztf_alerts_insert_time_s"] = t1 - t0

In [None]:
# Load NED alerts into the database

In [None]:
# Run cross-matching

In [None]:
# Run through some filters

In [None]:
# Shut down Docker containers
! cd .. && docker compose down