# Gaia-DP1 Cross-Matching for Classifications
#### Authors: Tatiana Acero-Cuellar, Siddharth Chaini

## 0. Imports

In [None]:
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery.gaia import Gaia

import matplotlib.pyplot as plt
import lsst.daf.butler as dafButler
import astropy
import numpy as np
import os
import pandas as pd
import gc

import pandas as pd
import getpass
import matplotlib
from astropy.time import Time
from tqdm.auto import tqdm

import io
import re

In [None]:
butler = dafButler.Butler("dp1", collections="LSSTComCam/DP1")

In [None]:
obj_refs = butler.query_datasets("dia_source", limit=None)

In [None]:
matches = pd.DataFrame()
total_sources = 0
for i in tqdm(range(len(obj_refs))):
    try:
        diasource = butler.get(obj_refs[i]).to_pandas()
        max_exp_ra, min_exp_ra = diasource.ra.max(), diasource.ra.min()
        max_exp_dec, min_exp_dec = diasource.dec.max(), diasource.dec.min()

        query = (
            "SELECT cls.source_id, best_class_name, s.ref_epoch, s.ra, s.dec FROM gaiadr3.vari_classifier_result AS cls "
            "LEFT OUTER JOIN gaiadr3.gaia_source AS s ON s.source_id=cls.source_id "
            f"WHERE ra BETWEEN {min_exp_ra} AND {max_exp_ra} "
            f"AND dec BETWEEN {min_exp_dec} AND {max_exp_dec}"
        )
        # Launch the query asynchronously
        job = Gaia.launch_job_async(query=query)
        results = job.get_results()

        results = results.to_pandas()

        catalog = SkyCoord(ra=results["ra"], dec=results["dec"], unit=u.deg)
        detections_cat = SkyCoord(ra=diasource.ra, dec=diasource.dec, unit=u.deg)

        idx, sep2d, dist3d = detections_cat.match_to_catalog_sky(catalog)
        space_match_threshold = 1 * u.arcsec
        diasource_mask = sep2d < space_match_threshold
        print(
            f"{np.sum(diasource_mask)} of {len(detections_cat)} matched after applying spatial threshold"
        )

        total_sources = total_sources + np.sum(diasource_mask)
        if np.sum(diasource_mask) > 0:
            # Get all matched
            matched_diasource_idx = idx[
                diasource_mask
            ]  # index in stars of matched dia_detections
            dia_idx_diasource = diasource.index[diasource_mask]

            diasource.loc[dia_idx_diasource, "id_match"] = results.loc[
                matched_diasource_idx, "source_id"
            ].to_numpy()
            diasource.loc[dia_idx_diasource, "best_class_name"] = results.loc[
                matched_diasource_idx, "best_class_name"
            ].to_numpy()

            matches = pd.concat([matches, diasource[~diasource["id_match"].isna()]])

    except Exception as e:
        print(e)
print(f"Total of matches {total_sources}")
matches.reset_index(drop=True).to_csv("gaia_matches.csv", index=False)