# ALeRCE classes

https://github.com/ZwickyTransientFacility/ztf-avro-alert

1. **AGN:** Active Galactic Nuclei
1. **Blazar:** Blazar
1. **CV/Nova:** Cataclysmic Variable Star/Nova
1. **Ceph:** Cepheid Variable Star
1. **DSCT:** Delta Scuti Star
1. **EA:** Eclipsing Algol
1. **EB/EW:** Eclipsing Binaries/Eclipsing W Ursa Majoris
1. **LPV:** Long Period Variable
1. **Periodic-Other:** Periodic-Other
1. **QSO:** Quasi-Stellar Object
1. **RRL:** RRLyrae Variable Star
1. **RSCVn:** RS Canum Venaticorum
1. **SLSN:** Super Luminous Supernova
1. **SNII:** Supernova II
1. **SNIIb:** Supernova IIb
1. **SNIIn:** Supernova IIn
1. **SNIa:** Supernova Ia
1. **SNIbc:** Supernova Ibc
1. **TDE:** Tidal disruption event (to remove)
1. **YSO:** Young Stellar Object
1. **ZZ:** ZZ Ceti Stars (to remove)

In [None]:
import sys
sys.path.append('../../')

In [None]:
%load_ext autoreload
%autoreload 2
from vstars.alerce_utils import process_df_labels, process_df_detections, keep_only_valid_objs
import numpy as np
import pandas as pd

load_rootdir = '../../../../tesis/surveys_data'
survey_name = 'alerceZTFv7.1'
uses_corr = False # uses_corr=False only can be used with SNe objects
df_index_names = {
    'oid':'oid', # object id
    'oid_det':'objectId', # object id
    'label':'classALeRCE', # object class name
    'ra':'ra',
    'dec':'dec',
    'band':'fid', # band
    'obs_day':'mjd', # days
    'obs':'magpsf_corr' if uses_corr else 'magpsf', # observations
    'obs_error':'sigmapsf_corr' if uses_corr else 'sigmapsf', # observation errors
}
detections_cols = ['objectId', 'fid', 'mjd', df_index_names['obs'], df_index_names['obs_error']]

### load files and processing
detections_df = pd.read_parquet(f'{load_rootdir}/{survey_name}/detections_with_xmatch')
detections_df, det_objs = process_df_detections(detections_df, df_index_names['oid_det'], df_index_names['oid'], detections_cols, uses_corr=uses_corr)
print(f'detections_df - columns: {list(detections_df.columns)} - id: {detections_df.index.name}')

labels_df = pd.read_csv(f'{load_rootdir}/{survey_name}/dfcrossmatches_prioritized_v7.0.1.csv')
labels_df, label_objs = process_df_labels(labels_df, df_index_names['oid'], det_objs)
print(f'labels - columns: {list(labels_df.columns)} - id: {labels_df.index.name}')

### filter
valid_objs = sorted(list(set(det_objs) & set(label_objs)))
labels_df = keep_only_valid_objs(labels_df, valid_objs)
detections_df = keep_only_valid_objs(detections_df, valid_objs)

### print info
classes = sorted(set(labels_df[df_index_names['label']].values))
print('classes:', classes)

In [None]:
%load_ext autoreload
%autoreload 2
from vstars.level_bars import LevelBar

classes, counts = np.unique(labels_df[df_index_names['label']].values, return_counts=True)
population_cdict = {c:counts[kc] for kc,c in enumerate(classes)}
print(LevelBar(population_cdict, ncols=60))

In [None]:
detections_df.info()
detections_df[-20:]

In [None]:
labels_df.info()
labels_df[:20]

In [None]:
%load_ext autoreload
%autoreload 2
from vstars.alerce_utils import get_valid_classes_objs, keep_only_valid_objs
    
target_classes = [
    #'EA', # Eclipsing Binaries
    'EB/EW', # Eclipsing Binaries
    'Ceph', # Cefeidas
    'RRL', # RR Lyrae
    'DSCT', # Delta Scuti
    'LPV', # Long Period Variables
]
valid_objs = get_valid_classes_objs(labels_df, df_index_names, target_classes)
print(valid_objs[:10])

In [None]:
new_detections_df = keep_only_valid_objs(detections_df, valid_objs)
print(new_detections_df.info())
new_detections_df[:50]

In [None]:
new_labels_df = keep_only_valid_objs(labels_df, valid_objs)
print(new_labels_df.info())
new_labels_df[:50]

In [None]:
print(new_labels_df.loc['ZTF18aaavkyj'])
print(new_detections_df.loc['ZTF18aaavkyj'])

In [None]:
import pandas as pd

### save files
extra_name = '' if uses_corr else '_noncorr'
save_root_dir = f'../../data/{survey_name}'
new_labels_df.to_parquet(f'{save_root_dir}/labels_vs{extra_name}.parquet')
new_detections_df.to_parquet(f'{save_root_dir}/detections_vs{extra_name}.parquet')