# Alerce alert import

Start with collating the three different tables into a single table with the three sets of data nested under the object level.

Then we hipscat-import.

In [None]:
import pandas as pd
import pickle
from tqdm import tqdm


import glob
import hipscat_import.pipeline as runner
from hipscat_import.catalog.arguments import ImportArguments


In [None]:
for index in tqdm(range(0,182)):
    lc_file_name = f"/data3/epyc/data3/hipscat/raw/alerts/lcs_{index:04}.picklelcs_{index:04}.pickle"

    with open(lc_file_name, "rb") as pickle_file:
        lightcurves = pickle.load(pickle_file)


    lightcurves = lightcurves.groupby("oid").agg({'ra':"mean", 'dec':"mean",'candid':list, 'mjd':list, 'fid':list, 'pid':list, 'diffmaglim':list, 'isdiffpos':list, 'nid':list,
            'magpsf':list, 'sigmapsf':list, 'magap':list, 'sigmagap':list, 'distnr':list, 'rb':list,
           'rbversion':list, 'drb':list, 'drbversion':list, 'magapbig':list, 'sigmagapbig':list, 'rfid':list,
           'magpsf_corr':list, 'sigmapsf_corr':list, 'sigmapsf_corr_ext':list, 'corrected':list,
           'dubious':list, 'parent_candid':list, 'has_stamp':list, 'step_id_corr':list}).rename(columns={"ra": "mean_ra", "dec":"mean_dec"})



    nondet_file_name = f"/data3/epyc/data3/hipscat/raw/alerts/nondet_{index:04}.picklenondet_{index:04}.pickle"

    with open(nondet_file_name, "rb") as pickle_file:
        nondet = pickle.load(pickle_file)


    nondet = nondet.groupby("oid").agg({'mjd':list, 'fid':list, 'diffmaglim':list})

    ref_file_name = f"/data3/epyc/data3/hipscat/raw/alerts/ref_{index:04}.pickleref_{index:04}.pickle"

    with open(ref_file_name, "rb") as pickle_file:
        ref_frame = pickle.load(pickle_file)


    ref_frame = ref_frame.groupby("oid").agg({"rfid":list,"candid":list,"fid":list,"rcid":list,"field":list,"magnr":list,
                                              "sigmagnr":list,"chinr":list,"sharpnr":list,"ranr":list,"decnr":list,
                                              "mjdstartref":list,"mjdendref":list,"nframesref":list})

    agged = pd.merge(pd.merge(lightcurves, nondet, on="oid", suffixes=("_lc", "_nondet")), ref_frame, on="oid", suffixes=(None, "_ref"))
    agged.to_parquet(f"/data3/epyc/data3/hipscat/raw/alerts/nested_pq/file_{index:04}.pickle")

In [None]:
files = glob.glob("/data3/epyc/data3/hipscat/raw/alerts/nested_pq/file_*")
files.sort()
print(f"found {len(files)} files")

args = ImportArguments(
    output_artifact_name="alerce_sample",
    input_file_list=files,
    file_reader="parquet",
    ra_column="mean_ra",
    dec_column="mean_dec",
    pixel_threshold=1_000_000,
    tmp_dir="/data3/epyc/data3/hipscat/tmp/",
    highest_healpix_order=6,
    dask_n_workers=10,
    dask_threads_per_worker=1,
    dask_tmp="/data3/epyc/data3/hipscat/tmp/",
    output_path="/data3/epyc/data3/hipscat/test_catalogs/alerce/",
    completion_email_address="delucchi@andrew.cmu.edu",
    simple_progress_bar=True,
)
runner.pipeline(args)
