## Variables to set

In [1]:
#Variables to use (EDIT THIS TO CUSTOMIZE)
save_path = './' #Save your files here (Into two folders that are called Light_Curves and Padded_lcs) 
delete_prior_files = True #Delete previous files called Light_Curves and Padded_lcs
cadence = 100 #Minimum cadence quasars to use 
selected_filters = 'ugriz' #Which filter do you want to use (choose from the letters ugriz)

## Import Statements

In [2]:
#Use the LSST_AGN_DC_Pull Script
from LSST_AGN_DC_Pull import get_all_data_quasars,Select_Cadence_and_Features,ReShape_Light_Curves,Pad_Light_Curves
import pandas as pd
import numpy as np
import os
from shutil import rmtree
from tqdm import tqdm
from time import time
import pickle

## Loading the Data from the Site

In [3]:
#Load the forced_source table with the light curves and the object table with the attributes
#This is necessary because it contains the quasar classification and the redshift data that is important for later
forced_source_url = 'https://zenodo.org/records/6878414/files/ForcedSourceTable.parquet'
object_url = 'https://zenodo.org/records/6878414/files/ObjectTable.parquet'

#Now loading
start_time = time()
print('Loading LSST AGN Data from Site...')
object_df = pd.read_parquet(object_url)
fs_df = pd.read_parquet(forced_source_url)
print('Data Loaded in {}s'.format(time()-start_time))

Loading LSST AGN Data from Site...
Data Loaded in 384.4414412975311s


## Loading Data from Saved Folder

In [None]:
## Data challenge root data dir
data_dir = "C:/Users/rajua/3rd Sem Masters/Thesis Research/6878414/" #The directory of the saved file

object_df = pd.read_parquet(os.path.join(data_dir, 'ObjectTable.parquet'))

fs_df = pd.read_parquet(os.path.join(data_dir, 'ForcedSourceTable.parquet'))

## Processing Input Tables

In [4]:
#Saving all the quasar and their attributes
quasars = object_df[object_df['class'] == 'Qso']

# groupby forcedsource table by objectid
fs_gp = fs_df.groupby('objectId')

#Dropping Objects that don't have periodic data
lc_cols = [col for col in object_df.columns if 'Periodic' in col]
td_objects = object_df.dropna(subset=lc_cols, how='all').copy()

#Get all the quasar data
all_quasars_light_curves = get_all_data_quasars(quasars,td_objects,fs_gp,Filter)

Selecting Quasar Light Curves: 100%|████████████████████████████████████████████| 83130/83130 [04:25<00:00, 313.31it/s]


## Properties of Selected Cadence Quasars

In [5]:
#Selecting the ones with 100 cadences and getting their magnitudes, errors, and observation times
selected_quasar_light_curves = Select_Cadence_and_Features(all_quasars_light_curves,cadence,selected_filters)

Filtering for Cadence and Features: 100%|██████████████████████████████████████| 39173/39173 [00:06<00:00, 6447.19it/s]


In [None]:
#Saving the selected light curves (run if you want)
with open('unprocessed_lcs.pickle', 'wb') as handle:
    pickle.dump(selected_quasar_light_curves, handle)

In [None]:
#Loading the selected light curves
with open('unprocessed_lcs.pickle', 'rb') as handle:
    selected_quasar_light_curves = pickle.load(handle)

In [None]:
#Getting the ids associated with each quasar
selected_quasar_ids = []
for i in selected_quasar_light_curves:
    selected_quasar_ids.append(i.objectId.iloc[0])

#Getting the redshifts of these quasars
redshifts = []
for quasar_id in selected_quasar_ids:
    z = quasars[quasars.index == str(quasar_id)].z[0]
    redshifts.append(z)
redshifts_map = pd.DataFrame({'ID':selected_quasar_ids,'z':redshifts})
redshifts_map.to_csv(save_path+'Redshift_Map.csv',index = False)

## Processing the labels and shape of data and Creating Padding to Homogenize Length

In [6]:
#Reshaping and Homogenzing the light curves
reshaped_curves = ReShape_Light_Curves(selected_quasar_light_curves)
processed_curves = Pad_Light_Curves(reshaped_curves)

Formatting Output: 100%|████████████████████████████████████████████████████████████| 997/997 [00:02<00:00, 408.04it/s]
Padding Light Curves: 997it [00:02, 385.98it/s]


## Saving Files

In [7]:
#Create the necessary folders and deleting old files if they exist
for folder in ['Light_Curves','Padded_lc']:
    for Filter in selected_filters:
        if delete_prior_files:
            if folder in os.listdir():
                rmtree(save_path+folder)
    for Filter in selected_filters:
        if folder+'/'+Filter not in os.listdir():
            os.makedirs(save_path+folder+'/'+Filter)

#Save the formatted light curves    
for i in tqdm(range(len(processed_curves)),desc = 'Saving Light Curves'):
    for Filter in selected_filters:
        reshaped_curves[i][Filter].to_csv(f'{save_path}Light_Curves/{Filter}/{str(selected_quasar_ids[i])}.csv',index = False)
        processed_curves[i][Filter].to_csv(f'{save_path}Padded_lc/{Filter}/{str(selected_quasar_ids[i])}.csv',index = False)

print('Done..')

Saving Light Curves: 100%|███████████████████████████████████████████████████████████| 997/997 [00:11<00:00, 83.67it/s]
