In [1]:
# import packages and methods that will be used in this notebook

import healpy
import numpy as np
import GCRCatalogs
from astropy.coordinates import SkyCoord
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
import GCRCatalogs

# Step 1: Change the root_dir
# This can be done by setting the 'root_dir' in the configuration dictionary
# Replace '/your/new/root/dir' with the path to your new root directory
GCRCatalogs.set_root_dir('/nfs/turbo/lsa-regier/')
GCRCatalogs.get_root_dir()

'/nfs/turbo/lsa-regier/'

In [3]:
GCRCatalogs.get_public_catalog_names()


['desc_cosmodc2',
 'desc_dc2_run2.2i_dr6_object',
 'desc_dc2_run2.2i_dr6_object_with_truth_match',
 'desc_dc2_run2.2i_dr6_truth',
 'desc_dc2_run2.2i_truth_galaxy_summary',
 'desc_dc2_run2.2i_truth_sn_summary',
 'desc_dc2_run2.2i_truth_sn_variability',
 'desc_dc2_run2.2i_truth_star_summary',
 'desc_dc2_run2.2i_truth_star_variability']

In [4]:
truth_cat = GCRCatalogs.load_catalog('desc_dc2_run2.2i_dr6_truth')
truth_cat.list_all_quantities()

['cosmodc2_id',
 'truth_type',
 'mag_i',
 'flux_r',
 'ra',
 'match_sep',
 'mag_u',
 'flux_i',
 'is_nearest_neighbor',
 'rv',
 'flux_g',
 'mag_r',
 'match_objectId',
 'mag_y',
 'id_string',
 'host_galaxy',
 'flux_y',
 'flux_u',
 'id',
 'redshift',
 'av',
 'patch',
 'is_good_match',
 'dec',
 'mag_g',
 'is_unique_truth_entry',
 'cosmodc2_hp',
 'flux_z',
 'tract',
 'mag_z']

In [None]:
data = truth_cat.get_quantities([
    "id", "flux_u", "flux_g","flux_r","flux_i","flux_z","redshift"],filters=['truth_type == 2'])

In [38]:
import pandas as pd
df = pd.DataFrame(data)

In [36]:
df.describe()

Unnamed: 0,flux_u,id,flux_r,redshift,flux_i,flux_g,flux_z
count,4136988.0,4136988.0,4136988.0,4136988.0,4136988.0,4136988.0,4136988.0
mean,198194.1,28963130000.0,1014710.0,0.0,1234137.0,633081.8,1340739.0
std,6318576.0,13194210000.0,42259300.0,0.0,53182530.0,23896700.0,58471040.0
min,0.2066179,277362.0,17.73591,0.0,16.5943,2.402487,12.91049
25%,4.552544,31102090000.0,108.6661,0.0,269.747,37.71347,405.0174
50%,40.42743,31411760000.0,722.7829,0.0,1536.447,283.3616,2172.738
75%,906.8907,40741840000.0,8229.41,0.0,14652.83,4077.584,19497.0
max,2632770000.0,41021010000.0,24142550000.0,0.0,31689610000.0,12155550000.0,35464580000.0


In [26]:
# Your desired order of columns
desired_order = ["id", "flux_u", "flux_g","flux_r","flux_i","flux_z","redshift"]

# Reordering the columns according to the desired order
df_reordered = df[desired_order]

In [29]:
df_filtered = df_reordered[df_reordered['redshift'] != 0]

In [32]:
df_filtered

Unnamed: 0,id,flux_u,flux_g,flux_r,flux_i,flux_z,redshift


In [30]:
df_10000=df_filtered[:10000]

In [31]:
df_10000.to_csv('/home/../data/scratch/truth_data_10000.csv', index=False)

Or We can use pandas

In [16]:
import pandas as pd
# truth_catalog = GCRCatalogs.load_catalog('/home/../data/scratch/truth_tract2723.parquet')
# default way not working
df = pd.read_parquet('/home/../data/scratch/truth_tract2723.parquet')
df.head()

Unnamed: 0,id,id_string,host_galaxy,truth_type,ra,dec,redshift,flux_u,flux_g,flux_r,...,tract,patch,cosmodc2_hp,cosmodc2_id,mag_r,match_objectId,match_sep,is_good_match,is_nearest_neighbor,is_unique_truth_entry
0,10940305839,10940305839,-1,1,50.93795,-44.629795,1.050468,5678.708984,5577.517578,6334.502441,...,2723,6,10321,10940305839,21.895718,11975906419540343,0.118004,False,True,True
1,10937870093,10937870093,-1,1,50.91845,-44.628441,0.474819,146.518021,1341.131714,5984.994629,...,2723,6,10321,10937870093,21.95734,11975906419541206,0.059833,False,True,True
2,11563663598,11563663598,-1,1,50.622709,-44.633968,0.759036,134.074341,272.126617,766.903015,...,2723,16,10444,11563663598,24.188148,11976043858493441,0.205348,False,True,True
3,10938869183,10938869183,-1,1,50.622897,-44.629667,0.808502,932.008118,1224.280762,2598.827148,...,2723,16,10321,10938869183,22.863056,11976043858493443,0.154115,False,True,True
4,11564005688,11564005688,-1,1,50.442544,-44.639395,0.849298,35.554699,104.535225,462.301849,...,2723,16,10444,11564005688,24.737686,11976043858493737,0.173813,True,True,True


In [17]:
df.columns

Index(['id', 'id_string', 'host_galaxy', 'truth_type', 'ra', 'dec', 'redshift',
       'flux_u', 'flux_g', 'flux_r', 'flux_i', 'flux_z', 'flux_y', 'av', 'rv',
       'tract', 'patch', 'cosmodc2_hp', 'cosmodc2_id', 'mag_r',
       'match_objectId', 'match_sep', 'is_good_match', 'is_nearest_neighbor',
       'is_unique_truth_entry'],
      dtype='object')

In [20]:
df['truth_type'].unique()

array([1, 2, 3], dtype=int32)