# ALeRCE classes

https://github.com/ZwickyTransientFacility/ztf-avro-alert

Transient events:
1. **SLSN:** Super Luminous Supernova
1. **SNII:** Supernova II
1. **SNIIb:** Supernova IIb
1. **SNIIn:** Supernova IIn
1. **SNIa:** Supernova Ia
1. **SNIbc:** Supernova Ibc

Variable events:
1. **LPV:** Long Period Variable
1. **Ceph:** Cepheid Variable Star
1. **RRL:** RRLyrae Variable Star
1. **DSCT:** Delta Scuti Star
1. **EB/EW:** Eclipsing Binaries/Eclipsing W Ursa Majoris
1. **Periodic-Other:** Periodic-Other
1. **EA:** Eclipsing Algol (optional)
1. **RSCVn:** RS Canum Venaticorum (optional)

Stochastic events:
1. **Blazar:** Blazar
1. **QSO:** Quasi-Stellar Object
1. **AGN:** Active Galactic Nuclei
1. **YSO:** Young Stellar Object
1. **CV/Nova:** Cataclysmic Variable Star/Nova

Others (to remove):
1. **TDE:** Tidal disruption event
1. **ZZ:** ZZ Ceti Stars

# Columns names
1. **oid:** object id
1. **classALeRCE:** object class name
1. **fid:** band index, g=1, r=2

In [None]:
import sys
sys.path.append('../')
sys.path.append('../../')

In [None]:
import numpy as np
import pandas as pd

#survey_name = 'alerceZTFv5.1'
survey_name = 'alerceZTFv7.1' # use this dataset
df_index_names = {
    'oid':'oid', # object id
    'label':'classALeRCE', # object class name
    'ra':'ra',
    'dec':'dec',
    'band':'fid', # band
    'obs_day':'mjd', # days
    'obs':'magpsf_corr', # observations
    'obs_error':'sigmapsf_corr', # observation errors
}

### load files
load_root_dir = f'../data/{survey_name}'
labels_df = pd.read_parquet(f'{load_root_dir}/labels.parquet')
print(f'labels_df; columns={list(labels_df.columns)}; id={labels_df.index.name}')

features_labeled_df = pd.read_parquet(f'{load_root_dir}/features_labeled.parquet')
print(f'features_labeled_df; id={features_labeled_df.index.name}')
for k,c in enumerate(list(features_labeled_df.columns)):
    print(f'({k}) - {c}')

features_nonlabeled_df = pd.read_parquet(f'{load_root_dir}/features_nonlabeled.parquet')
#print(f'features_nonlabeled_df - columns: {list(features_nonlabeled_df.columns)} - id: {features_nonlabeled_df.index.name}')

In [None]:
%load_ext autoreload
%autoreload 2
from fuzzytools.level_bars import LevelBar

classes, counts = np.unique(labels_df[df_index_names['label']].values, return_counts=True)
population_cdict = {c:counts[kc] for kc,c in enumerate(classes)}
print(LevelBar(population_cdict, ncols=60))

In [None]:
print(labels_df.info())
labels_df[:20]

In [None]:
print(features_labeled_df.info())
features_labeled_df[:10]

In [None]:
print(features_nonlabeled_df.info())
features_nonlabeled_df[:10]

In [None]:
%load_ext autoreload
%autoreload 2
from mismatch import _C
from mismatch.utils import get_object_features
from dask import dataframe as dd

### example using dask
features_labeled_ddf = dd.from_pandas(features_labeled_df, npartitions=_C.N_DASK) # dask dataframe can be faster
features_nonlabeled_ddf = dd.from_pandas(features_nonlabeled_df, npartitions=_C.N_DASK) # dask dataframe can be faster
labels_ddf = dd.from_pandas(labels_df, npartitions=_C.N_DASK) # dask dataframe can be faster

In [None]:
### get all features from an object
obj_name = 'ZTF18abvpirg' # from labeled-set
features, c, features_names = get_object_features(features_labeled_ddf, labels_ddf, obj_name)
fdict = {f:features[kf] for kf,f in enumerate(features_names)}
print(f'obj={obj_name}; class={c}; features={len(features)}')
for k,key in enumerate(fdict.keys()):
    print(f'({k}); {key}={fdict[key]}')

In [None]:
### get features per band from an object
obj_name = 'ZTF18abvpirg' # from labeled-set
band = 1
features, c, features_names = get_object_features(features_labeled_ddf, labels_ddf, obj_name, band=band)
fdict = {f:features[kf] for kf,f in enumerate(features_names)}
print(f'obj={obj_name}; class={c}; features={len(features)}')
for k,key in enumerate(fdict.keys()):
    print(f'({k}) {key}={fdict[key]}')

In [None]:
%load_ext autoreload
%autoreload 2
from mismatch import _C
from mismatch.utils import get_object_features
from dask import dataframe as dd

### get features non-band-wise features
obj_name = 'ZTF18abvpirg' # from labeled-set
band = -1
features, c, features_names = get_object_features(features_labeled_ddf, labels_ddf, obj_name, band=band)
fdict = {f:features[kf] for kf,f in enumerate(features_names)}
print(f'obj={obj_name}; class={c}; features={len(features)}')
for k,key in enumerate(fdict.keys()):
    print(f'({k}); {key}={fdict[key]}')

In [None]:
### get features from a non-labeled sample. returned class is None
obj_name = 'ZTF17aaacvqh' # from non-labeled-set
features, c, features_names = get_object_features(features_nonlabeled_ddf, labels_ddf, obj_name, band=1)
fdict = {f:features[kf] for kf,f in enumerate(features_names)}
print(f'obj={obj_name}; class={c}; features={len(features)}')
for k,key in enumerate(fdict.keys()):
    print(f'({k}); {key}={fdict[key]}')