# Babamul streaming example

In this notebook we'll read from Babamul streams to fetch alerts of interest
and save them locally,
e.g., to use in a machine learning pipeline.

In [None]:
# First, load secrets from a local .env file if present
import dotenv

dotenv.load_dotenv()

In [None]:
# Define some parameters
limit = 5
group_id = "example-group"
topics = ["babamul.lsst.ztf-match.hosted", "babamul.ztf.lsst-match.hosted"]

In [None]:
import os
from datetime import UTC, datetime

import polars as pl

import babamul


def alert_is_relevant(
    alert: babamul.ZtfAlert | babamul.LsstAlert,
) -> tuple[bool, dict]:
    alert: dict = alert.model_dump()
    candidate = alert["candidate"]
    properties = alert.get("properties", {})
    # 1. Real vs Bogus (filter out low drb)
    drb = candidate.get("drb", 0.0)
    if drb < 0.8:
        return False, {"reason": "Low drb", "drb": drb}
    # 2. Age of transient (jd - jdstarthist < 30 days)
    jd = candidate["jd"]
    jdstarthist = candidate.get("jdstarthist", candidate["jd"])
    age = jd - jdstarthist
    if age > 30:
        return False, {"reason": "Old transient", "age_days": age}
    # 3. Not a moving object (ssdistnr is null or very high)
    ssdistnr = candidate.get("ssdistnr", None)
    if ssdistnr is not None and ssdistnr < 12 and ssdistnr >= 0:
        return False, {"reason": "Possible asteroid", "ssdistnr": ssdistnr}
    # 4. Not a star
    is_star = properties.get("star", False)
    if is_star:
        return False, {"reason": "Stellar source", "is_star": is_star}
    # 5. Not near bright star
    near_brightstar = properties.get("near_brightstar", False)
    if near_brightstar:
        return False, {
            "reason": "Near bright star",
            "near_brightstar": near_brightstar,
        }
    # 6. Positive subtraction
    if candidate["isdiffpos"] is False:
        return False, {
            "reason": "Negative subtraction",
            "isdiffpos": candidate["isdiffpos"],
        }
    # 7. Stationary source (at least 2 detection with sufficient time separation)
    is_stationary = properties.get("stationary", False)
    if not is_stationary:
        return False, {
            "reason": "Non-stationary source",
            "is_stationary": is_stationary,
        }
    return True, {}


alerts = []

with babamul.AlertConsumer(
    topics=topics,
    offset="latest",
    group_id=group_id,
    timeout=30,
) as consumer:
    n = 0
    for alert in consumer:
        print("Checking alert type:", alert.__class__.__name__)
        if not isinstance(alert, (babamul.ZtfAlert, babamul.LsstAlert)):
            continue
        if alert_is_relevant(alert)[0]:
            print(f"Relevant alert: {alert.candid}")
            alerts.append(alert)
            n += 1
            if n >= limit:
                break

# TODO: Save alerts we like to a local folder for later analysis
# Could be a DataFrame first, then save a single Parquet file, or partitioned
# TODO: Candidates should be flattened for easier querying?
# We could also simply save as JSON or Avro and use DuckDB for querying
alerts_dir = "data/alerts"
os.makedirs(alerts_dir, exist_ok=True)
now_timestamp = datetime.now(tz=UTC).timestamp()
fpath = f"data/alerts/{group_id}-{now_timestamp}.parquet"
df = pl.DataFrame([alert.model_dump() for alert in alerts])
# TODO: Show an alert cutout

In [None]:
# TODO: Analyze all alerts from all time saved in the alerts directory