In [1]:
!git clone https://github.com/alercebroker/pipeline

Cloning into 'pipeline'...
remote: Enumerating objects: 93767, done.[K
remote: Counting objects: 100% (136/136), done.[K
remote: Compressing objects: 100% (87/87), done.[K
remote: Total 93767 (delta 64), reused 72 (delta 48), pack-reused 93631 (from 4)[K
Receiving objects: 100% (93767/93767), 163.36 MiB | 24.87 MiB/s, done.
Resolving deltas: 100% (74570/74570), done.
Updating files: 100% (2538/2538), done.


In [3]:
!pip install -e /kaggle/working/pipeline/lc_classifier

Obtaining file:///kaggle/working/pipeline/lc_classifier
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Processing ./pipeline/P4J (from lc-classifier==27.5.7a8)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Processing ./pipeline/mhps (from lc-classifier==27.5.7a8)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting astropy<7.0,>=6.1 (from lc-classifier==27.5.7a8)
  Downloading astropy-6.1.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting celerite2<0.4.0,>=0.3.1 (from lc-classifier==27.5.7a8)
 

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import os
import glob
from tqdm.auto import tqdm

In [7]:
sys.path.append("/kaggle/working/pipeline/lc_classifier")
from lc_classifier.features.composites.elasticc import ElasticcFeatureExtractor
from lc_classifier.features.core.base import AstroObject

In [11]:
def get_detections(chosen_df, snid):
    detections_colnames = ["SNID", "MJD", "BAND", "FLUXCAL", "FLUXCALERR", "PHOTFLAG"]
    tot_pts = chosen_df["MJD"].shape[0]
    dat = []
    dat.append([snid] * (tot_pts))
    for col in detections_colnames[1:]:
        dat.append(chosen_df.loc[col])
    dat = np.array(dat).T
    detections = (
        pd.DataFrame(data=dat, columns=detections_colnames)
        .sort_values(by="MJD")
        .reset_index(drop=True)
    )
    return detections


def get_meta(chosen_df, snid):
    metacols = [
        "SNID",
        "RA",
        "DEC",
        "SNTYPE",
        "NOBS",
        "PTROBS_MIN",
        "PTROBS_MAX",
        "MWEBV",
        "MWEBV_ERR",
        "REDSHIFT_HELIO",
        "REDSHIFT_HELIO_ERR",
        "REDSHIFT_FINAL",
        "REDSHIFT_FINAL_ERR",
        "VPEC",
        "VPEC_ERR",
    ]

    names = chosen_df.loc[metacols].index.to_numpy()
    values = chosen_df.loc[metacols].to_numpy()
    names = np.append(names, "aid")
    values = np.append(values, np.nan)
    dat = np.stack([names, values]).T
    metadata = pd.DataFrame(data=dat, columns=["name", "value"])
    return metadata


def get_sniddata(df_parquet, snid):
    chosen_df = df_parquet[df_parquet["SNID"] == snid].iloc[0]
    detections = get_detections(chosen_df, snid)
    metadata = get_meta(chosen_df, snid)

    return detections, metadata

In [12]:
def alerce_lcobj(detections, metadata):
    # assert detections["SNID"].unique().shape[0] == 1
    detections.rename(
        columns={
            "MJD": "mjd",
            "BAND": "fid",
            "FLUXCAL": "brightness",
            "FLUXCALERR": "e_brightness",
        },
        inplace=True,
    )
    detections["candid"] = None
    detections["tid"] = "elasticc_telescope"
    detections["sid"] = "elasticc_survey"
    detections["pid"] = "elasticc_program"

    detections["ra"] = float(metadata[metadata["name"] == "RA"]["value"].values[0])
    detections["dec"] = float(metadata[metadata["name"] == "DEC"]["value"].values[0])
    detections["unit"] = "diff_flux"

    is_detected = detections["PHOTFLAG"] > 0
    detections.drop(columns=["PHOTFLAG"], inplace=True)
    forced_photometry = detections[~is_detected]
    detections = detections[is_detected]

    astro_object = AstroObject(
        detections=detections, forced_photometry=forced_photometry, metadata=metadata
    )

    return astro_object


def alerce_feature_listobjs(
    astro_objects, feature_extractor=ElasticcFeatureExtractor(), progress_bar=False
):
    # astro_objects is list of astro_object e.g. [astro_object1, astro_object2, etc.]
    feature_extractor.compute_features_batch(
        [elasticc_object], progress_bar=progress_bar
    )
    return astro_objects

In [9]:
# glob.glob("/global/cfs/cdirs/desc-td/ELASTICC2_parquet/*.parquet")

df = pd.read_parquet("/kaggle/input/elasticc2-parquet/Cepheid.parquet")
# dftrain = pd.read_parquet("/kaggle/input/elasticc2-train02-parquet/Cepheid.parquet")

In [10]:
snids = df["SNID"].to_numpy()

In [16]:
detections, metadata = get_sniddata(df, snids[2])

In [17]:
elasticc_object = alerce_lcobj(detections, metadata)

In [18]:
feature_extractor = ElasticcFeatureExtractor()
feature_extractor.compute_features_batch([elasticc_object], progress_bar=True)
print(elasticc_object.features)

  0%|          | 0/1 [00:00<?, ?it/s]


TypeError: loop of ufunc does not support argument 0 of type float which has no callable cos method