# ALeRCE classes

https://github.com/ZwickyTransientFacility/ztf-avro-alert

1. **AGN:** Active Galactic Nuclei
1. **Blazar:** Blazar
1. **CV/Nova:** Cataclysmic Variable Star/Nova
1. **Ceph:** Cepheid Variable Star
1. **DSCT:** Delta Scuti Star
1. **EA:** Eclipsing Algol
1. **EB/EW:** Eclipsing Binaries/Eclipsing W Ursa Majoris
1. **LPV:** Long Period Variable
1. **Periodic-Other:** Periodic-Other
1. **QSO:** Quasi-Stellar Object
1. **RRL:** RRLyrae Variable Star
1. **RSCVn:** RS Canum Venaticorum
1. **SLSN:** Super Luminous Supernova
1. **SNII:** Supernova II
1. **SNIIb:** Supernova IIb
1. **SNIIn:** Supernova IIn
1. **SNIa:** Supernova Ia
1. **SNIbc:** Supernova Ibc
1. **TDE:** Tidal disruption event (to remove)
1. **YSO:** Young Stellar Object
1. **ZZ:** ZZ Ceti Stars (to remove)

In [1]:
import sys
sys.path.append('../../')

In [2]:
%load_ext autoreload
%autoreload 2
from vstars.alerce_utils import process_df_labels, process_df_detections, keep_only_valid_objs
import numpy as np
import pandas as pd

load_rootdir = '../../../../tesis/surveys_data'
survey_name = 'alerceZTFv7.1'
uses_corr = True # uses_corr=False only can be used with SNe objects
df_index_names = {
    'oid':'oid', # object id
    'oid_det':'objectId', # object id
    'label':'classALeRCE', # object class name
    'ra':'ra',
    'dec':'dec',
    'band':'fid', # band
    'obs_day':'mjd', # days
    'obs':'magpsf_corr' if uses_corr else 'magpsf', # observations
    'obs_error':'sigmapsf_corr' if uses_corr else 'sigmapsf', # observation errors
}
detections_cols = ['objectId', 'fid', 'mjd', df_index_names['obs'], df_index_names['obs_error']]

### load files and processing
detections_df = pd.read_parquet(f'{load_rootdir}/{survey_name}/detections_with_xmatch')
detections_df, det_objs = process_df_detections(detections_df, df_index_names['oid_det'], df_index_names['oid'], detections_cols, uses_corr=uses_corr)
print(f'detections_df - columns: {list(detections_df.columns)} - id: {detections_df.index.name}')

labels_df = pd.read_csv(f'{load_rootdir}/{survey_name}/dfcrossmatches_prioritized_v7.0.1.csv')
labels_df, label_objs = process_df_labels(labels_df, df_index_names['oid'], det_objs)
print(f'labels - columns: {list(labels_df.columns)} - id: {labels_df.index.name}')

### filter
valid_objs = list(set(det_objs) & set(label_objs))
labels_df = keep_only_valid_objs(labels_df, valid_objs)
detections_df = keep_only_valid_objs(detections_df, valid_objs)

### print info
classes = set(labels_df[df_index_names['label']].values)
print('classes:', classes)

detections_df - columns: ['fid', 'mjd', 'magpsf_corr', 'sigmapsf_corr'] - id: oid
labels - columns: ['classALeRCE', 'ra', 'dec', 'period', 'source', 'id_source', 'class_source', 'separation_arcsec'] - id: oid
classes: {'Ceph', 'RRL', 'SLSN', 'NLAGN', 'YSO', 'DSCT', 'ZZ', 'NLQSO', 'SNIIb', 'LPV', 'QSO', 'CV/Nova', 'SNII', 'SNIa', 'Blazar', 'SNIbc', 'Periodic-Other', 'TDE', 'RSCVn', 'SNIIn', 'EB/EW', 'AGN', 'EA'}


In [3]:
%load_ext autoreload
%autoreload 2
from vstars.level_bars import LevelBar

classes, counts = np.unique(labels_df[df_index_names['label']].values, return_counts=True)
population_cdict = {c:counts[kc] for kc,c in enumerate(classes)}
print(LevelBar(population_cdict, ncols=60))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
|█▍                                                        | AGN - 2,724/110,043 (2.48%)
|▌                                                         | Blazar - 987/110,043 (0.90%)
|▍                                                         | CV/Nova - 907/110,043 (0.82%)
|▎                                                         | Ceph - 665/110,043 (0.60%)
|▍                                                         | DSCT - 804/110,043 (0.73%)
|██▋                                                       | EA - 5,131/110,043 (4.66%)
|█████████████████▎                                        | EB/EW - 32,825/110,043 (29.83%)
|███████                                                   | LPV - 13,403/110,043 (12.18%)
|                                                          | NLAGN - 6/110,043 (0.01%)
|                                                          | NLQSO - 49/110,043 (0.04%)
|▏                 

In [4]:
detections_df.info()
detections_df[-20:]

<class 'pandas.core.frame.DataFrame'>
Index: 3785884 entries, ZTF17aaafyya to ZTF20abcxmfu
Data columns (total 4 columns):
fid              int64
mjd              float64
magpsf_corr      float64
sigmapsf_corr    float64
dtypes: float64(3), int64(1)
memory usage: 144.4+ MB


Unnamed: 0_level_0,fid,mjd,magpsf_corr,sigmapsf_corr
oid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ZTF20abcxmfu,2,59086.386597,18.835959,0.082238
ZTF20abcxmfu,2,59063.37816,18.837938,0.093653
ZTF20abcxmfu,2,59087.373218,18.735105,0.054769
ZTF20abcxmfu,2,59026.47537,18.59994,0.069426
ZTF20abcxmfu,2,59078.334687,18.791681,0.057921
ZTF20abcxmfu,2,59063.372917,18.729684,0.105329
ZTF20abcxmfu,2,59075.331447,18.804337,0.064222
ZTF20abcxmfu,2,59036.376898,18.645068,0.095453
ZTF20abcxmfu,2,59072.331921,18.73131,0.072716
ZTF20abcxmfu,2,59107.27588,18.874474,0.110856


In [5]:
labels_df.info()
labels_df[:20]

<class 'pandas.core.frame.DataFrame'>
Index: 110043 entries, ZTF19aapcxhy to ZTF18abgqxlw
Data columns (total 8 columns):
classALeRCE          110043 non-null object
ra                   110043 non-null float64
dec                  110043 non-null float64
period               67457 non-null object
source               110043 non-null object
id_source            110043 non-null object
class_source         110043 non-null object
separation_arcsec    110043 non-null float64
dtypes: float64(3), object(5)
memory usage: 7.6+ MB


Unnamed: 0_level_0,classALeRCE,ra,dec,period,source,id_source,class_source,separation_arcsec
oid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ZTF19aapcxhy,AGN,154.202129,18.723076,,Oh2015,5.877420127343739e+17,AGN_galaxy_dominated,0.227455
ZTF18abtyspw,AGN,25.660298,0.087434,,Oh2015,5.880155092805878e+17,AGN_galaxy_dominated,0.141792
ZTF18abwtbad,AGN,51.846346,0.739559,,Oh2015,5.877315136939624e+17,AGN_galaxy_dominated,0.084636
ZTF18acvgdfy,AGN,134.407409,5.472596,,Oh2015,5.877327033915148e+17,AGN_galaxy_dominated,0.08719
ZTF18aadyxlg,AGN,125.577004,33.09112,,Oh2015,5.880133827239608e+17,AGN_galaxy_dominated,0.07357
ZTF19aapuscr,AGN,199.102327,-2.090396,,Oh2015,5.87724649802236e+17,AGN_galaxy_dominated,0.038667
ZTF19aanxuxz,AGN,212.636953,-2.82253,,Oh2015,5.877297769066006e+17,AGN_galaxy_dominated,0.181073
ZTF19aaohxwd,AGN,175.347895,21.936834,,Oh2015,5.877420610690746e+17,AGN_galaxy_dominated,0.150095
ZTF18aceqjzp,AGN,139.30376,37.075558,,Oh2015,5.882978641805313e+17,AGN_galaxy_dominated,0.456351
ZTF18accdsrj,AGN,146.293754,39.018848,,Oh2015,5.87735044693623e+17,AGN_galaxy_dominated,0.245466


In [6]:
%load_ext autoreload
%autoreload 2
from vstars.alerce_utils import get_valid_classes_objs, keep_only_valid_objs
    
target_classes = [
    #'EA', # Eclipsing Binaries
    'EB/EW', # Eclipsing Binaries
    'Ceph', # Cefeidas
    'RRL', # RR Lyrae
    'DSCT', # Delta Scuti
    'LPV', # Long Period Variables
]
valid_objs = get_valid_classes_objs(labels_df, df_index_names, target_classes)
print(valid_objs[:10])

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
['ZTF18abwwdsc', 'ZTF18aaakigd', 'ZTF18aaavkyj', 'ZTF18abwwdxw', 'ZTF19aaocniv', 'ZTF17aacemqz', 'ZTF18aaiyfjx', 'ZTF19aaczymt', 'ZTF18abvpirg', 'ZTF18abccnft']


In [7]:
new_detections_df = keep_only_valid_objs(detections_df, valid_objs)
print(new_detections_df.info())
new_detections_df[:50]

<class 'pandas.core.frame.DataFrame'>
Index: 2667429 entries, ZTF17aaafyya to ZTF20abceckn
Data columns (total 4 columns):
fid              int64
mjd              float64
magpsf_corr      float64
sigmapsf_corr    float64
dtypes: float64(3), int64(1)
memory usage: 101.8+ MB
None


Unnamed: 0_level_0,fid,mjd,magpsf_corr,sigmapsf_corr
oid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ZTF17aaafyya,1,58791.283368,15.262989,0.004987
ZTF17aaafyya,1,59081.470544,15.281165,0.007919
ZTF17aaafyya,1,59067.40397,15.245437,0.000493
ZTF17aaafyya,1,58793.249965,15.205705,0.010043
ZTF17aaafyya,1,58793.239028,15.228786,0.009911
ZTF17aaafyya,1,58747.324676,15.22664,0.009151
ZTF17aaafyya,1,58334.472708,15.233562,0.009774
ZTF17aaafyya,1,58677.470833,15.27661,0.009193
ZTF17aaafyya,1,58332.463079,15.212075,0.003076
ZTF17aaafyya,1,58831.202326,15.287754,0.005032


In [8]:
new_labels_df = keep_only_valid_objs(labels_df, valid_objs)
print(new_labels_df.info())
new_labels_df[:50]

<class 'pandas.core.frame.DataFrame'>
Index: 82237 entries, ZTF18abwwdsc to ZTF18abgqxlw
Data columns (total 8 columns):
classALeRCE          82237 non-null object
ra                   82237 non-null float64
dec                  82237 non-null float64
period               59854 non-null object
source               82237 non-null object
id_source            82237 non-null object
class_source         82237 non-null object
separation_arcsec    82237 non-null float64
dtypes: float64(3), object(5)
memory usage: 5.6+ MB
None


Unnamed: 0_level_0,classALeRCE,ra,dec,period,source,id_source,class_source,separation_arcsec
oid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ZTF18abwwdsc,Ceph,326.755845,-8.060673,2.18895,CRTSnorth,1007116010622.0,Cep-II,0.676851
ZTF18aaakigd,Ceph,65.440016,34.069809,2.1136,CRTSnorth,1135020009815.0,ACEP,0.390734
ZTF18aaavkyj,Ceph,192.885044,24.122642,1.09572,CRTSnorth,1123064031941.0,ACEP,0.29111
ZTF18abwwdxw,Ceph,324.865973,-17.296145,1.11714,CRTSnorth,1018112055304.0,ACEP,1.881608
ZTF19aaocniv,Ceph,248.580619,-16.015739,1.3049582,CRTSnorth,1015086048064.0,Cep-II,0.405458
ZTF17aacemqz,Ceph,117.977671,27.561363,16.2488,CRTSnorth,1126038065052.0,Cep-II,1.092943
ZTF18aaiyfjx,Ceph,245.46437,38.90959,1.04986,CRTSnorth,1138071060852.0,Cep-II,0.610218
ZTF19aaczymt,Ceph,228.810756,-14.901231,4.3680244,CRTSnorth,1015079060794.0,Cep-II,0.422664
ZTF18abvpirg,Ceph,351.9553,-9.388094,119.947,CRTSnorth,1009125035570.0,Cep-II,0.266861
ZTF18abccnft,Ceph,330.037982,9.169603,1.11004,CRTSnorth,1109117020606.0,ACEP,0.759991


In [10]:
print(new_labels_df.loc['ZTF18aaavkyj'])
print(new_detections_df.loc['ZTF18aaavkyj'])

classALeRCE                     Ceph
ra                           192.885
dec                          24.1226
period                       1.09572
source                     CRTSnorth
id_source            1123064031941.0
class_source                    ACEP
separation_arcsec            0.29111
Name: ZTF18aaavkyj, dtype: object
              fid           mjd  magpsf_corr  sigmapsf_corr
oid                                                        
ZTF18aaavkyj    1  58567.446736    15.051076       0.014877
ZTF18aaavkyj    1  58588.213599    14.874550       0.024162
ZTF18aaavkyj    2  58589.241782    14.827786       0.011289


In [11]:
import pandas as pd

### save files
save_root_dir = f'../../data/{survey_name}'
new_labels_df.to_parquet(f'{save_root_dir}/labels_vs.parquet')
new_detections_df.to_parquet(f'{save_root_dir}/detections_vs.parquet')