# ALeRCE classes

https://github.com/ZwickyTransientFacility/ztf-avro-alert

1. **AGN:** Active Galactic Nuclei
1. **Blazar:** Blazar
1. **CV/Nova:** Cataclysmic Variable Star/Nova
1. **Ceph:** Cepheid Variable Star
1. **DSCT:** Delta Scuti Star
1. **EA:** Eclipsing Algol
1. **EB/EW:** Eclipsing Binaries/Eclipsing W Ursa Majoris
1. **LPV:** Long Period Variable
1. **Periodic-Other:** Periodic-Other
1. **QSO:** Quasi-Stellar Object
1. **RRL:** RRLyrae Variable Star
1. **RSCVn:** RS Canum Venaticorum
1. **SLSN:** Super Luminous Supernova
1. **SNII:** Supernova II
1. **SNIIb:** Supernova IIb
1. **SNIIn:** Supernova IIn
1. **SNIa:** Supernova Ia
1. **SNIbc:** Supernova Ibc
1. **TDE:** Tidal disruption event (to remove)
1. **YSO:** Young Stellar Object
1. **ZZ:** ZZ Ceti Stars (to remove)

In [None]:
import sys
sys.path.append('../../') # or just install the module
sys.path.append('../../../fuzzy-tools') # or just install the module

In [None]:
%load_ext autoreload
%autoreload 2
from lchandler.surveyexport.alerce_utils import process_df_labels, process_df_detections, keep_only_valid_objs
import numpy as np
import pandas as pd

load_rootdir = '../../../surveys-data'
survey_name = 'alerceZTFv7.1'
uses_corr = False # False only can be used with SNe objects
clean_invalid_objs = True # delete a lot of objects
df_index_names = {
    'oid':'oid', # object id
    'oid_det':'objectId', # object id
    'label':'classALeRCE', # object class name
    'ra':'ra',
    'dec':'dec',
    'band':'fid', # band
    'obs_day':'mjd', # days
    'obs':'magpsf_corr' if uses_corr else 'magpsf', # observations
    'obs_error':'sigmapsf_corr' if uses_corr else 'sigmapsf', # observation errors
}
detections_cols = ['objectId', 'fid', 'mjd', df_index_names['obs'], df_index_names['obs_error']]

### load files and processing
detections_df = pd.read_parquet(f'{load_rootdir}/{survey_name}/detections_with_xmatch')
detections_df, det_objs = process_df_detections(detections_df, df_index_names['oid_det'], df_index_names['oid'], detections_cols, uses_corr=uses_corr, clean_invalid_objs=clean_invalid_objs)
print(f'[detections_df] columns={list(detections_df.columns)} - id={detections_df.index.name}')

labels_df = pd.read_csv(f'{load_rootdir}/{survey_name}/dfcrossmatches_prioritized_v7.0.1.csv')
labels_df, label_objs = process_df_labels(labels_df, df_index_names['oid'], det_objs)
print(f'[labels] columns={list(labels_df.columns)} - id={labels_df.index.name}')

outliers_df = pd.read_csv(f'{load_rootdir}/{survey_name}/outliers.csv')

### filter
valid_objs = list(set(det_objs) & set(label_objs))
labels_df = keep_only_valid_objs(labels_df, valid_objs)
detections_df = keep_only_valid_objs(detections_df, valid_objs)

### print info
classes = set(labels_df[df_index_names['label']].values)
print('classes:', classes)

In [None]:
detections_df.info()
detections_df[:10]

In [None]:
labels_df.info()
labels_df[:10]

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
from lchandler.surveyexport.dictionary_creator import LightCurveDictionaryCreator

band_dictionary = {
    'g':1,
    'r':2,
}
lcDictionaryCreator = LightCurveDictionaryCreator(survey_name, detections_df, labels_df, band_dictionary, df_index_names,
    dataframe_obs_uses_flux=False,
    zero_point=48.6,
    )
#print(lcDictionaryCreator)
lcDictionaryCreator.plot_class_distribution(uses_log_scale=1)

In [None]:
l = lcDictionaryCreator.get_obj_names('SNIIb')
print(f'{l} ({len(l)}#)')

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
DF_SN_LIST = ['SLSN' ,'SNII', 'SNIIb', 'SNIIn', 'SNIa', 'SNIbc']

#mode = 'raw'
#mode = 'simple'
#mode = 'transients'
#mode = 'RRCeph'
mode = 'onlySNe'
#mode = 'onlySNe-SLSN'

if mode=='simple':
    invalid_classes = []
    query_classes = []
    to_merge_classes_dic = {'SN':DF_SN_LIST,}
    
elif mode=='onlySNe':
    kwargs = {
        'invalid_classes':[],
        'query_classes':DF_SN_LIST,
        'merge_classes_dict':{
            'SNII*':['SNII', 'SNIIb', 'SNIIn'],
        },
    }
elif mode=='onlySNe-SLSN':
    kwargs = {
        'invalid_classes':[],
        'query_classes':[sn for sn in DF_SN_LIST if not sn=='SLSN'],
        'merge_classes_dict':{
            'SNII*':['SNII', 'SNIIb', 'SNIIn'],
        },
    }

elif mode=='onlySNIa':
    kwargs = {
        'invalid_classes':[],
        'query_classes':DF_SN_LIST,
        'merge_classes_dict':{
            'non-SNIa':['SLSN' ,'SNII', 'SNIIb', 'SNIIn', 'SNIbc'],
        },
    }

elif mode=='RRCeph':
    invalid_classes = []
    query_classes = ['RRL','Ceph']
    to_merge_classes_dic = {}

lcDictionaryCreator.update_labels_df(**kwargs)
lcDictionaryCreator.plot_class_distribution(
    figsize=(6,3),
    uses_log_scale=1,
    )
#assert 0

In [None]:
%load_ext autoreload
%autoreload 2

description = 'ZTF Alerce'
save_folder = f'../../../surveys-save'
lcdataset = lcDictionaryCreator.export_dictionary(description, save_folder,
    band_names=list(band_dictionary.keys()),
    #to_export_bands=['g','r'],
    filename_extra_parameters={'mode':mode},
    outliers_df=outliers_df,
    #saves_dict=True,
    )

In [None]:
print(lcdataset)

In [None]:
import fuzzytools.lists as lists
import matplotlib.pyplot as plt
from lchandler.plots.lc import plot_lightcurve

lcset = lcdataset['raw']
lcobj, lcobj_name = lcset.get_random_lcobj()
lcobj_name = 'ZTF20aadvaoi'
lcobj = lcset[lcobj_name].copy()
figsize = (12,5)
dpi = 200
fig, ax = plt.subplots(1,1, figsize=figsize, dpi=dpi)
for kb,b in enumerate(lcset.band_names):
    plot_lightcurve(ax, lcobj, b, f'{b} obs')
ax.set_title(f'set={lcset.survey}; obj={lcobj_name} [{lcset.class_names[lcobj.y]}]')
ax.set_xlabel('observation-time [days]')
ax.set_ylabel('observation [flux]')
ax.legend()
ax.grid(alpha=0.0)