# ALeRCE classes

https://github.com/ZwickyTransientFacility/ztf-avro-alert

1. **AGN:** Active Galactic Nuclei
1. **Blazar:** Blazar
1. **CV/Nova:** Cataclysmic Variable Star/Nova
1. **Ceph:** Cepheid Variable Star
1. **DSCT:** Delta Scuti Star
1. **EA:** Eclipsing Algol
1. **EB/EW:** Eclipsing Binaries/Eclipsing W Ursa Majoris
1. **LPV:** Long Period Variable
1. **Periodic-Other:** Periodic-Other
1. **QSO:** Quasi-Stellar Object
1. **RRL:** RRLyrae Variable Star
1. **RSCVn:** RS Canum Venaticorum
1. **SLSN:** Super Luminous Supernova
1. **SNII:** Supernova II
1. **SNIIb:** Supernova IIb
1. **SNIIn:** Supernova IIn
1. **SNIa:** Supernova Ia
1. **SNIbc:** Supernova Ibc
1. **TDE:** Tidal disruption event (to remove)
1. **YSO:** Young Stellar Object
1. **ZZ:** ZZ Ceti Stars (to remove)

# Columns names
1. **oid:** object id
1. **classALeRCE:** object class name
1. **fid:** band index, g=1, r=2

In [1]:
import sys
sys.path.append('../')
sys.path.append('../../')

In [2]:
import numpy as np
import pandas as pd

#survey_name = 'alerceZTFv5.1'
survey_name = 'alerceZTFv7.1' # use this dataset
df_index_names = {
    'oid':'oid', # object id
    'label':'classALeRCE', # object class name
    'ra':'ra',
    'dec':'dec',
    'band':'fid', # band
    'obs_day':'mjd', # days
    'obs':'magpsf_corr', # observations
    'obs_error':'sigmapsf_corr', # observation errors
}

### load files
load_root_dir = f'../data/{survey_name}'
labels_df = pd.read_parquet(f'{load_root_dir}/labels.parquet')
print(f'labels_df; columns={list(labels_df.columns)}; id={labels_df.index.name}')

features_train_df = pd.read_parquet(f'{load_root_dir}/features_train.parquet')
print(f'features_train_df; id={features_train_df.index.name}')
for k,c in enumerate(list(features_train_df.columns)):
    print(f'({k}) - {c}')

features_test_df = pd.read_parquet(f'{load_root_dir}/features_test.parquet')
#print(f'features_test_df - columns: {list(features_test_df.columns)} - id: {features_test_df.index.name}')

labels_df; columns=['classALeRCE', 'ra', 'dec', 'period', 'source', 'id_source', 'class_source', 'separation_arcsec']; id=oid
features_train_df; id=oid
(0) - Amplitude_1
(1) - Amplitude_2
(2) - AndersonDarling_1
(3) - AndersonDarling_2
(4) - Autocor_length_1
(5) - Autocor_length_2
(6) - Beyond1Std_1
(7) - Beyond1Std_2
(8) - Con_1
(9) - Con_2
(10) - Eta_e_1
(11) - Eta_e_2
(12) - ExcessVar_1
(13) - ExcessVar_2
(14) - GP_DRW_sigma_1
(15) - GP_DRW_sigma_2
(16) - GP_DRW_tau_1
(17) - GP_DRW_tau_2
(18) - Gskew_1
(19) - Gskew_2
(20) - Harmonics_mag_1_1
(21) - Harmonics_mag_1_2
(22) - Harmonics_mag_2_1
(23) - Harmonics_mag_2_2
(24) - Harmonics_mag_3_1
(25) - Harmonics_mag_3_2
(26) - Harmonics_mag_4_1
(27) - Harmonics_mag_4_2
(28) - Harmonics_mag_5_1
(29) - Harmonics_mag_5_2
(30) - Harmonics_mag_6_1
(31) - Harmonics_mag_6_2
(32) - Harmonics_mag_7_1
(33) - Harmonics_mag_7_2
(34) - Harmonics_mse_1
(35) - Harmonics_mse_2
(36) - Harmonics_phase_2_1
(37) - Harmonics_phase_2_2
(38) - Harmonics_phase_3

In [3]:
%load_ext autoreload
%autoreload 2
from fuzzytools.level_bars import LevelBar

classes, counts = np.unique(labels_df[df_index_names['label']].values, return_counts=True)
population_cdict = {c:counts[kc] for kc,c in enumerate(classes)}
print(LevelBar(population_cdict, ncols=60))

ModuleNotFoundError: No module named 'mismatch.level_bars'

In [None]:
print(labels_df.info())
labels_df[:20]

In [None]:
print(features_train_df.info())
features_train_df[:10]

In [None]:
print(features_test_df.info())
features_test_df[:10]

In [None]:
%load_ext autoreload
%autoreload 2
from mismatch import _C
from mismatch.utils import get_object_features
from dask import dataframe as dd

### example using dask
features_train_ddf = dd.from_pandas(features_train_df, npartitions=_C.N_DASK) # dask dataframe can be faster
features_test_ddf = dd.from_pandas(features_test_df, npartitions=_C.N_DASK) # dask dataframe can be faster
labels_ddf = dd.from_pandas(labels_df, npartitions=_C.N_DASK) # dask dataframe can be faster

In [None]:
### get all features from an object
obj_name = 'ZTF18abvpirg' # from train
features, c, features_names = get_object_features(features_train_ddf, labels_ddf, obj_name)
fdict = {f:features[kf] for kf,f in enumerate(features_names)}
print(f'obj={obj_name}; class={c}; features={len(features)}')
for k,key in enumerate(fdict.keys()):
    print(f'({k}); {key}={fdict[key]}')

In [None]:
### get features per band from an object
obj_name = 'ZTF18abvpirg' # from train
band = 1
features, c, features_names = get_object_features(features_train_ddf, labels_ddf, obj_name, band=band)
fdict = {f:features[kf] for kf,f in enumerate(features_names)}
print(f'obj={obj_name}; class={c}; features={len(features)}')
for k,key in enumerate(fdict.keys()):
    print(f'({k}) {key}={fdict[key]}')

In [None]:
%load_ext autoreload
%autoreload 2
from mismatch import _C
from mismatch.utils import get_object_features
from dask import dataframe as dd

### get features non-band-wise features
obj_name = 'ZTF18abvpirg' # from train
band = -1
features, c, features_names = get_object_features(features_train_ddf, labels_ddf, obj_name, band=band)
fdict = {f:features[kf] for kf,f in enumerate(features_names)}
print(f'obj={obj_name}; class={c}; features={len(features)}')
for k,key in enumerate(fdict.keys()):
    print(f'({k}); {key}={fdict[key]}')

In [None]:
### get features from a non-labeled sample. returned class is None
obj_name = 'ZTF17aaacvqh' # from test
features, c, features_names = get_object_features(features_test_ddf, labels_ddf, obj_name, band=1)
fdict = {f:features[kf] for kf,f in enumerate(features_names)}
print(f'obj={obj_name}; class={c}; features={len(features)}')
for k,key in enumerate(fdict.keys()):
    print(f'({k}); {key}={fdict[key]}')