# ALeRCE classes

https://github.com/ZwickyTransientFacility/ztf-avro-alert

1. **AGN:** Active Galactic Nuclei
1. **Blazar:** Blazar
1. **CV/Nova:** Cataclysmic Variable Star/Nova
1. **Ceph:** Cepheid Variable Star
1. **DSCT:** Delta Scuti Star
1. **EA:** Eclipsing Algol
1. **EB/EW:** Eclipsing Binaries/Eclipsing W Ursa Majoris
1. **LPV:** Long Period Variable
1. **Periodic-Other:** Periodic-Other
1. **QSO:** Quasi-Stellar Object
1. **RRL:** RRLyrae Variable Star
1. **RSCVn:** RS Canum Venaticorum
1. **SLSN:** Super Luminous Supernova
1. **SNII:** Supernova II
1. **SNIIb:** Supernova IIb
1. **SNIIn:** Supernova IIn
1. **SNIa:** Supernova Ia
1. **SNIbc:** Supernova Ibc
1. **TDE:** Tidal disruption event (to remove)
1. **YSO:** Young Stellar Object
1. **ZZ:** ZZ Ceti Stars (to remove)

In [1]:
import numpy as np
import pandas as pd

def subset_df_columns(df, subset_cols):
    df_cols = list(df.columns)
    return df[[c for c in subset_cols if c in df_cols]]

def set_index(df, index_name):
    if not df.index.name is None and df.index.name==index_name:
        return df
    df_cols = list(df.columns)
    assert index_name in df_cols
    return df.set_index([index_name])

def df_to_float32(df):
    for c in df.columns:
        if df[c].dtype=='float64':
            df[c] = df[c].astype(np.float32)

In [2]:
import numpy as np
import pandas as pd

load_root_dir = '../../TESIS/surveys_data'
survey_name = 'alerceZTF_v7.1'
df_index_names = {
    'oid':'oid', # object id
    'oid_det':'objectId', # object id
    'label':'classALeRCE', # object class name
    'ra':'ra',
    'dec':'dec',
    'band':'fid', # band
    'obs_day':'mjd', # days
    'obs':'magpsf_corr', # observations
    'obs_error':'sigmapsf_corr', # observation errors
}
subset_columns_names = {
    'labels':['oid', 'classALeRCE', 'ra', 'dec'],
    'detections':['objectId', 'fid', 'mjd', 'magpsf_corr', 'sigmapsf_corr'],
}

### load files and processing
labels_df = pd.read_csv(f'{load_root_dir}/{survey_name}/dfcrossmatches_prioritized_v7.0.1.csv')
print(f'labels - columns: {list(labels_df.columns)} - id: {labels_df.index.name}')
#labels_df = subset_df_columns(labels_df, subset_columns_names['labels']) # sub sample columns
labels_df = set_index(labels_df, df_index_names['oid']) # set index

detections_df = pd.read_parquet(f'{load_root_dir}/{survey_name}/detections_with_xmatch')
print(f'detections_df - columns: {list(detections_df.columns)} - id: {detections_df.index.name}')
detections_df = subset_df_columns(detections_df, subset_columns_names['detections']) # sub sample columns
detections_df = set_index(detections_df, df_index_names['oid_det']) # set index
detections_df.index.rename(df_index_names['oid'], inplace=True)
df_to_float32(detections_df)

### print info
classes = np.unique(labels_df[df_index_names['label']].values)
print('classes:', classes)

labels - columns: ['oid', 'classALeRCE', 'ra', 'dec', 'period', 'source', 'id_source', 'class_source', 'separation_arcsec'] - id: None
detections_df - columns: ['fid', 'isdiffpos', 'sigmapsf', 'field', 'fwhm', 'dec', 'magpsf', 'rcid', 'ra', 'sky', 'rb', 'ssmagnr', 'distpsnr3', 'sgscore2', 'maggaiabright', 'distpsnr2', 'distpsnr1', 'maggaia', 'exptime', 'drb', 'sgscore3', 'neargaia', 'sgscore1', 'mjd', 'corrected', 'magpsf_corr', 'sigmapsf_corr', 'sigmapsf_corr_ext', 'dubious', 'has_stamp'] - id: objectId
classes: ['AGN' 'Blazar' 'CV/Nova' 'Ceph' 'DSCT' 'EA' 'EB/EW' 'LPV' 'NLAGN' 'NLQSO'
 'Periodic-Other' 'QSO' 'RRL' 'RSCVn' 'SLSN' 'SNII' 'SNIIb' 'SNIIn' 'SNIa'
 'SNIbc' 'TDE' 'YSO' 'ZZ']


In [1]:
labels_df.info()
labels_df[:10]

NameError: name 'labels_df' is not defined

In [4]:
print('detections_df:', detections_df.info())
print(f'id: {detections_df.index.name}')
detections_df[:10]

<class 'pandas.core.frame.DataFrame'>
Index: 26688137 entries, ZTF17aaaemke to ZTF20abfpkfh
Data columns (total 4 columns):
 #   Column         Dtype  
---  ------         -----  
 0   fid            int64  
 1   mjd            float32
 2   magpsf_corr    float32
 3   sigmapsf_corr  float32
dtypes: float32(3), int64(1)
memory usage: 712.7+ MB
detections_df: None
id: oid


Unnamed: 0_level_0,fid,mjd,magpsf_corr,sigmapsf_corr
oid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ZTF17aaaemke,1,58700.460938,16.478577,0.008541
ZTF17aaaemke,1,58754.289062,16.520813,100.0
ZTF17aaaemke,1,58763.378906,16.586325,0.02466
ZTF17aaaemke,1,58718.414062,16.350399,100.0
ZTF17aaaemke,1,58372.402344,16.530643,0.014738
ZTF17aaaemke,1,58679.445312,16.080139,100.0
ZTF17aaaemke,1,58510.128906,16.741459,0.026502
ZTF17aaaemke,1,58736.460938,16.580482,0.025537
ZTF17aaaemke,1,58789.207031,16.571587,100.0
ZTF17aaaemke,1,58482.128906,16.707977,0.000809


In [5]:
def get_valid_classes_ids(df, target_classes, df_index_names):
    new_df = df.reset_index()
    valid_oids = new_df.loc[new_df[df_index_names['label']].isin(target_classes)][df_index_names['oid']]
    return list(valid_oids.values)
    
target_classes = [
    #'EB/EW', 'EA', # Eclipsing Binaries
    'EB/EW', # Eclipsing Binaries
    'Ceph', # Cefeidas
    'RRL', # RR Lyrae
    'DSCT', # Delta Scuti
    'LPV', # Long Period Variables
]
valid_oids = get_valid_classes_ids(labels_df, target_classes, df_index_names)
print(valid_oids)

['ZTF18ackejdy', 'ZTF18aczbbdx', 'ZTF18acswqul', 'ZTF18abwjgyy', 'ZTF19aadqfhh', 'ZTF18acsncor', 'ZTF18aaasdsx', 'ZTF19aauwhxy', 'ZTF19aaesgnu', 'ZTF19aaedxeu', 'ZTF19ablzhan', 'ZTF18abwwdsc', 'ZTF18aaakigd', 'ZTF18aaavkyj', 'ZTF18abwwdxw', 'ZTF19aaocniv', 'ZTF17aacemqz', 'ZTF18aaiyfjx', 'ZTF19aaczymt', 'ZTF18abvpirg', 'ZTF18abccnft', 'ZTF18aazzfhx', 'ZTF18abvtdoa', 'ZTF18absvbuc', 'ZTF18abmxafi', 'ZTF18acpewdw', 'ZTF18abhqdkc', 'ZTF18acrmlma', 'ZTF18accipen', 'ZTF18abetcxj', 'ZTF19aafeivz', 'ZTF18aagwwiq', 'ZTF18adhatmc', 'ZTF17aadfivr', 'ZTF19acbihek', 'ZTF18abmnocu', 'ZTF18abnnbys', 'ZTF17aaadqhs', 'ZTF19aalddxp', 'ZTF18abtjmni', 'ZTF18aagrcvb', 'ZTF19acjntpb', 'ZTF17aabpwzy', 'ZTF17aacoobq', 'ZTF18abtpdzf', 'ZTF18actwqrc', 'ZTF18abomkrt', 'ZTF18abvfaju', 'ZTF18accfbxq', 'ZTF18abwerjv', 'ZTF18abxeian', 'ZTF18aaadqzu', 'ZTF17aaawdie', 'ZTF18acrulfg', 'ZTF18abvpmxu', 'ZTF18aaaatsz', 'ZTF18acszysu', 'ZTF19aaklsfm', 'ZTF18abgcnrf', 'ZTF18abmouca', 'ZTF18abnjutc', 'ZTF18acapcrd', 'ZTF19a

In [6]:
new_labels_df = labels_df[labels_df.index.isin(valid_oids)]
print(new_labels_df.info())
new_labels_df[:50]

<class 'pandas.core.frame.DataFrame'>
Index: 104649 entries, ZTF18ackejdy to ZTF18abgqxlw
Data columns (total 8 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   classALeRCE        104649 non-null  object 
 1   ra                 104649 non-null  float64
 2   dec                104649 non-null  float64
 3   period             78597 non-null   object 
 4   source             104649 non-null  object 
 5   id_source          104649 non-null  object 
 6   class_source       104649 non-null  object 
 7   separation_arcsec  104649 non-null  float64
dtypes: float64(3), object(5)
memory usage: 7.2+ MB
None


Unnamed: 0_level_0,classALeRCE,ra,dec,period,source,id_source,class_source,separation_arcsec
oid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ZTF18ackejdy,Ceph,354.975712,-9.083688,1.070942,CRTSnorth,1009126042551.0,ACEP,1.646404
ZTF18aczbbdx,Ceph,315.994366,4.286366,10.2204,CRTSnorth,1104113057592.0,Cep-II,1.703194
ZTF18acswqul,Ceph,315.99461,4.28677,10.2204,CRTSnorth,1104113057592.0,Cep-II,1.726474
ZTF18abwjgyy,Ceph,46.827247,31.046092,12.975054,CRTSnorth,1132015000747.0,Cep-II,0.889909
ZTF19aadqfhh,Ceph,63.379044,41.318778,2.31263,CRTSnorth,1140018080162.0,ACEP,1.35651
ZTF18acsncor,Ceph,111.727093,31.388196,93.7295,CRTSnorth,1132035011340.0,Cep-II,1.606843
ZTF18aaasdsx,Ceph,120.213171,39.555058,1.88394,CRTSnorth,1140034001926.0,Cep-II,0.719356
ZTF19aauwhxy,Ceph,214.990065,-3.835723,13.85,CRTSnorth,1004077045140.0,Cep-II,1.710969
ZTF19aaesgnu,Ceph,227.535605,-15.873556,1.7070289,CRTSnorth,1015079032004.0,Cep-II,0.150768
ZTF19aaedxeu,Ceph,253.172096,20.540109,7.56596,CRTSnorth,1121086028361.0,Cep-II,1.506175


In [7]:
new_detections_df = detections_df[detections_df.index.isin(valid_oids)]
print(new_detections_df.info())
new_detections_df[:50]

<class 'pandas.core.frame.DataFrame'>
Index: 20395950 entries, ZTF17aaaemke to ZTF20abefeou
Data columns (total 4 columns):
 #   Column         Dtype  
---  ------         -----  
 0   fid            int64  
 1   mjd            float32
 2   magpsf_corr    float32
 3   sigmapsf_corr  float32
dtypes: float32(3), int64(1)
memory usage: 544.6+ MB
None


Unnamed: 0_level_0,fid,mjd,magpsf_corr,sigmapsf_corr
oid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ZTF17aaaemke,1,58700.460938,16.478577,0.008541
ZTF17aaaemke,1,58754.289062,16.520813,100.0
ZTF17aaaemke,1,58763.378906,16.586325,0.02466
ZTF17aaaemke,1,58718.414062,16.350399,100.0
ZTF17aaaemke,1,58372.402344,16.530643,0.014738
ZTF17aaaemke,1,58679.445312,16.080139,100.0
ZTF17aaaemke,1,58510.128906,16.741459,0.026502
ZTF17aaaemke,1,58736.460938,16.580482,0.025537
ZTF17aaaemke,1,58789.207031,16.571587,100.0
ZTF17aaaemke,1,58482.128906,16.707977,0.000809


In [8]:
new_labels_df.loc['ZTF17aaaemke']

classALeRCE                    EB/EW
ra                            358.76
dec                         0.802021
period                      0.271886
source                     CRTSnorth
id_source            1101128014943.0
class_source                      EW
separation_arcsec           0.261602
Name: ZTF17aaaemke, dtype: object

In [9]:
import pandas as pd

### save files
save_root_dir = f'data/{survey_name}'
new_labels_df.to_csv(f'{save_root_dir}/labels_vs.csv')
new_detections_df.to_parquet(f'{save_root_dir}/detections_vs.parquet')