In [1]:
from astropy.table import Table, join
import numpy as np

import time
from numpy.random import default_rng

import healpy as hp
import pandas as pd

import astropy.cosmology
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.table import Table
from astropy.cosmology import Planck18
from astropy.io import fits

import sys
sys.path.insert(0,'../code')
%load_ext autoreload
%autoreload 2
import utils
import generate_random
import correlations
import masks
import maps

In [2]:
from matplotlib import pyplot as plt

# Parameter setting

Saving

In [3]:
quants = {}

In [4]:
fn_quants = '../data/quantities.txt'
save = True

Names

In [5]:
name_catalog = '\emph{{Gaia}}-\emph{{unWISE}} Quasar Catalog'
abbrv_catalog = 'Quaia'

name_gpurer = "\emph{{Gaia}} DR3 Quasars `Purer' Sample"
abbrv_gpurer = "\emph{{Gaia}} DR3 `Purer'"

Functions

In [6]:
def get_ndens_map(ras, decs, NSIDE=64):
    map_nqso, _ = maps.get_map(NSIDE, ras, decs, null_val=0)
    map_ndens = map_nqso/area_per_pixel
    map_ndens_masked = hp.ma(map_ndens)
    map_ndens_masked.mask = map_ndens==0
    return map_ndens_masked

Map settings

In [7]:
NSIDE = 64
area_per_pixel = hp.nside2pixarea(NSIDE, degrees=True)
print(f"Area per pixel: {area_per_pixel:.3f} deg")

Area per pixel: 0.839 deg


Other prelims

In [8]:
G_hi = 20.5
G_lo = 20.0

In [9]:
rng = np.random.default_rng(seed=42)

# Load datasets

### Gaia

In [10]:
fn_gall = '../data/gaia_candidates_plus.fits.gz'
tab_gall = utils.load_table(fn_gall)
print(len(tab_gall))

6649162


In [11]:
fn_gpurer_ids = '../data/gaia_purer_sourceids.fits'
tab_gpurer_ids = utils.load_table(fn_gpurer_ids)
tab_gpurer = join(tab_gall, tab_gpurer_ids, keys='source_id', join_type='inner', metadata_conflicts='silent')
print(len(tab_gpurer))

1942825


In [12]:
fn_gsup = '../data/gaia_candidates_superset.fits'
tab_gsup = utils.load_table(fn_gsup)
print(len(tab_gsup))

1518782


In [13]:
fn_gclean = '../data/gaia_candidates_clean.fits'
tab_gclean = utils.load_table(fn_gclean)
print(len(tab_gclean))

1414385


In [14]:
fn_gcatlo = f'../data/catalog_G{G_lo}.fits'
tab_gcatlo = utils.load_table(fn_gcatlo)
print(len(tab_gcatlo))

755850


In [15]:
fn_gcathi = f'../data/catalog_G{G_hi}.fits'
tab_gcathi = utils.load_table(fn_gcathi)
print(len(tab_gcathi))

1295502


### SDSS and contaminants

These are SDSS objects that have (any) Gaia source matches. There is unWISE data where available, but the below catalogs are not limited to unWISE matches.

In [16]:
tab_squasars = utils.load_table(f'../data/quasars_sdss_xgaia_xunwise_good_nodup.fits')
print(f"Number of SDSS quasars: {len(tab_squasars)}")

Number of SDSS quasars: 379698


In [17]:
tab_sstars = utils.load_table(f'../data/stars_sdss_xgaia_xunwise_good_nodup.fits')
print(f"Number of SDSS stars: {len(tab_sstars)}")

Number of SDSS stars: 683221


In [18]:
tab_sgals = utils.load_table(f'../data/galaxies_sdss_xgaia_xunwise_good_nodup.fits')
print(f"Number of SDSS galaxies: {len(tab_sgals)}")

Number of SDSS galaxies: 717059


In [19]:
tab_mcs = utils.load_table(f'../data/mcs_xgaia.fits')
print(f"Number of MC stars: {len(tab_mcs)}")

Number of MC stars: 1362378


SDSS quasars with zwarning=0; gaia and unWISE data where available

In [20]:
tab_sqall = utils.load_table(f'../data/quasars_sdss_xgaiaall_xunwiseall_good.fits')
print(f"Number of all SDSS quasars: {len(tab_sqall)}")

Number of all SDSS quasars: 638083


SDSS quasars x catalog clean, for SPZ redshifts

In [21]:
tab_sqclean = utils.load_table(f'../data/quasars_sdss_clean.fits')
print(f"Number of all SDSS quasars: {len(tab_sqclean)}")

Number of all SDSS quasars: 243206


Only objects in superset (gaia qso candidates with unWISE data and QSOC redshifts and G lim)

In [22]:
fn_labeled_sup = '../data/labeled_superset.fits'
tab_labeled_sup = utils.load_table(fn_labeled_sup)
print(f"Number of labeled Gaia quasar candidates for decontamination: {len(tab_labeled_sup)}")

Number of labeled Gaia quasar candidates for decontamination: 259641


In [23]:
fn_eboss = '../data/eBOSS_QSO_full_ALLdata-vDR16_changecolname.fits'
tab_eboss = Table.read(fn_eboss)
i_imatch = (tab_eboss['IMATCH']==1) | (tab_eboss['IMATCH']==2)
i_comp = (tab_eboss['COMP_BOSS']>0.5)
i_sect = (tab_eboss['sector_SSR']>0.5)
i_clust = i_imatch & i_comp & i_sect
i_zwarning0 = tab_eboss['ZWARNING']==0
tab_eboss = tab_eboss[i_clust & i_zwarning0]

print(f"Number of eboss quasars: {len(tab_eboss)}")

Number of eboss quasars: 409341


Redshifts

In [24]:
redshift_estimator_name = 'kNN'
save_tag_model = f'_K27_std'
fn_spz_labeled = f'../data/redshift_estimates/redshifts_spz_labeled_{redshift_estimator_name}{save_tag_model}.fits'
tab_spz_labeled = utils.load_table(fn_spz_labeled)
print(f"N = {len(tab_spz_labeled)}")

N = 243206


# Quantities

## Numbers of objects

In [25]:
quants['N_gall'] = f"{len(tab_gall):,}"
print(quants['N_gall'])

6,649,162


In [26]:
i_gall_wqsoc = np.isfinite(tab_gall['redshift_qsoc'])
quants['N_gall_wqsoc'] = f"{np.sum(i_gall_wqsoc):,}"
print(quants['N_gall_wqsoc'])

6,375,063


In [27]:
quants['N_gpurer'] = f"{len(tab_gpurer):,}"
print(quants['N_gpurer'])

1,942,825


In [28]:
i_gpurer_wqsoc = np.isfinite(tab_gpurer['redshift_qsoc'])
quants['N_gpurer_wqsoc'] = f"{np.sum(i_gpurer_wqsoc):,}"
print(quants['N_gpurer_wqsoc'])

1,729,625


In [29]:
quants['N_gsup'] = f"{len(tab_gsup):,}"
print(quants['N_gsup'])

1,518,782


In [30]:
quants['N_gclean'] = f"{len(tab_gclean):,}"
print(quants['N_gclean'])

1,414,385


In [31]:
quants['p_cut_gsup_gclean'] = f"{100*(len(tab_gsup) - len(tab_gclean))/len(tab_gsup):.0f}"
print(quants['p_cut_gsup_gclean'])

7


In [32]:
quants['N_gcatlo'] = f"{len(tab_gcatlo):,}"
print(quants['N_gcatlo'])

755,850


In [33]:
quants['N_gcathi'] = f"{len(tab_gcathi):,}"
print(quants['N_gcathi'])

1,295,502


In [34]:
i_squasars_unwise = np.isfinite(tab_squasars['mag_w1_vg']) & ~tab_squasars.mask['mag_w1_vg'] & \
                    np.isfinite(tab_squasars['mag_w2_vg']) & ~tab_squasars.mask['mag_w2_vg']
quants['N_squasars_unwise'] = f"{np.sum(i_squasars_unwise):,}"
print(quants['N_squasars_unwise'])

343,074


In [35]:
i_sq_sup = tab_labeled_sup['class']=='q'
quants['N_squasars_sup'] = f"{np.sum(i_sq_sup):,}"
print(quants['N_squasars_sup'])

246,122


In [36]:
quants['N_sqall'] = f"{len(tab_sqall):,}"
print(quants['N_sqall'])

638,083


In [37]:
quants['N_sqclean'] = f"{len(tab_sqclean):,}"
print(quants['N_sqclean'])

243,206


In [38]:
quants['N_eboss'] = f"{len(tab_eboss):,}"
print(quants['N_eboss'])

409,341


stars and gals

In [39]:
i_sgals_unwise = np.isfinite(tab_sgals['mag_w1_vg']) & ~tab_sgals.mask['mag_w1_vg'] & \
                 np.isfinite(tab_sgals['mag_w2_vg']) & ~tab_sgals.mask['mag_w2_vg']
quants['N_sgals_unwise'] = f"{np.sum(i_sgals_unwise):,}"
print(quants['N_sgals_unwise'])

600,897


In [40]:
i_sg_sup = tab_labeled_sup['class']=='g'
quants['N_sgals_sup'] = f"{np.sum(i_sg_sup):,}"
print(quants['N_sgals_sup'])

1,316


In [41]:
i_sstars_unwise = np.isfinite(tab_sstars['mag_w1_vg']) & ~tab_sstars.mask['mag_w1_vg'] & \
                  np.isfinite(tab_sstars['mag_w2_vg']) & ~tab_sstars.mask['mag_w2_vg']
quants['N_stars_unwise'] = f"{np.sum(i_sstars_unwise):,}"
print(quants['N_stars_unwise'])

482,080


In [42]:
i_ss_sup = tab_labeled_sup['class']=='s'
quants['N_sstars_sup'] = f"{np.sum(i_ss_sup):,}"
print(quants['N_sstars_sup'])

2,276


In [43]:
i_mcs_unwise = np.isfinite(tab_mcs['mag_w1_vg']) & ~tab_mcs.mask['mag_w1_vg'] & \
               np.isfinite(tab_mcs['mag_w2_vg']) & ~tab_mcs.mask['mag_w2_vg']
quants['N_mcs_unwise'] = f"{np.sum(i_mcs_unwise):,}"
print(quants['N_mcs_unwise'])

11,770


In [44]:
i_ms_sup = tab_labeled_sup['class']=='m'
quants['N_mcs_sup'] = f"{np.sum(i_ms_sup):,}"
print(quants['N_mcs_sup'])

9,927


## Redshift info

### general

In [45]:
i_zfinite = np.isfinite(tab_gall['redshift_qsoc'])
quants['z_med_gall'] = f"{np.median(tab_gall['redshift_qsoc'][i_zfinite]):.2f}"
print(quants['z_med_gall'])

1.67


  a.partition(kth, axis=axis, kind=kind, order=order)


In [46]:
quants['z_med_gcatlo'] = f"{np.median(tab_gcatlo['redshift_spz']):.2f}"
print(quants['z_med_gcatlo'])

1.45


In [47]:
zintermediate = 2.5
quants['zintermediate'] = zintermediate
i_above_zintermediate_gcatlo = tab_gcatlo['redshift_spz'] > zintermediate
frac_above_zintermediate_gcatlo = np.sum(i_above_zintermediate_gcatlo)/len(i_above_zintermediate_gcatlo)
quants['N_above_zintermediate_gcatlo'] = f'{np.sum(i_above_zintermediate_gcatlo):,}'
quants['p_above_zintermediate_gcatlo'] = f'{100*frac_above_zintermediate_gcatlo:.0f}'
print(quants['N_above_zintermediate_gcatlo'], quants['p_above_zintermediate_gcatlo'])

76,804 10


### dz checks vs sdss

In [48]:
rand_ints_labeled = tab_spz_labeled['rand_ints']
i_train, i_valid, i_test = utils.split_train_val_test(rand_ints_labeled,
                                 frac_train=0.7, frac_val=0.15, frac_test=0.15)

In [49]:
z_sdss_test = tab_spz_labeled['z_sdss'][i_test]
z_spzraw_test = tab_spz_labeled['redshift_spz_raw'][i_test]
z_spz_test = tab_spz_labeled['redshift_spz'][i_test]
z_gaia_test = tab_spz_labeled['redshift_qsoc'][i_test]

In [50]:
dz_spzraw_test = (z_spzraw_test - z_sdss_test)/(1+z_sdss_test)
dz_spz_test = (z_spz_test - z_sdss_test)/(1+z_sdss_test)
dz_gaia_test = (z_gaia_test - z_sdss_test)/(1+z_sdss_test)

In [51]:
dz_threshs = {'dzlo': 0.01, 'dzmid': 0.1, 'dzhi': 0.2}
G_maxs = {'Gbright': 19.0, 'Glo': 20.0, 'Ghi': 20.5}
dz_arrs = {'zspz': dz_spz_test, 'zgaia': dz_gaia_test}

In [52]:
# also save values to dict with full precision
frac_outliers_dict = {}

In [53]:
for G_name, G_max in G_maxs.items():
    quants[G_name] = G_max
    for dz_thresh_name, dz_thresh in dz_threshs.items():
        quants[dz_thresh_name] = dz_thresh
        for z_name, dz_arr in dz_arrs.items():
        
            i_G = tab_spz_labeled['phot_g_mean_mag'][i_test] < G_max
            i_outliers = np.abs(dz_arr[i_G]) > dz_thresh
            key = f'p_outliers_{z_name}_{dz_thresh_name}_{G_name}'
            frac = np.sum(i_outliers)/len(i_outliers)
            val = f"{100*frac:.0f}"
            print(key, val)
            quants[key] = val
            frac_outliers_dict[key] = frac

            i_acc = np.abs(dz_arr[i_G]) <= dz_thresh
            key = f'p_acc_{z_name}_{dz_thresh_name}_{G_name}'
            frac = np.sum(i_acc)/len(i_acc)
            val = f"{100*frac:.0f}"
            print(key, val)
            quants[key] = val

p_outliers_zspz_dzlo_Gbright 15
p_acc_zspz_dzlo_Gbright 85
p_outliers_zgaia_dzlo_Gbright 10
p_acc_zgaia_dzlo_Gbright 90
p_outliers_zspz_dzmid_Gbright 6
p_acc_zspz_dzmid_Gbright 94
p_outliers_zgaia_dzmid_Gbright 7
p_acc_zgaia_dzmid_Gbright 93
p_outliers_zspz_dzhi_Gbright 4
p_acc_zspz_dzhi_Gbright 96
p_outliers_zgaia_dzhi_Gbright 7
p_acc_zgaia_dzhi_Gbright 93
p_outliers_zspz_dzlo_Glo 27
p_acc_zspz_dzlo_Glo 73
p_outliers_zgaia_dzlo_Glo 25
p_acc_zgaia_dzlo_Glo 75
p_outliers_zspz_dzmid_Glo 10
p_acc_zspz_dzmid_Glo 90
p_outliers_zgaia_dzmid_Glo 19
p_acc_zgaia_dzmid_Glo 81
p_outliers_zspz_dzhi_Glo 6
p_acc_zspz_dzhi_Glo 94
p_outliers_zgaia_dzhi_Glo 18
p_acc_zgaia_dzhi_Glo 82
p_outliers_zspz_dzlo_Ghi 39
p_acc_zspz_dzlo_Ghi 61
p_outliers_zgaia_dzlo_Ghi 38
p_acc_zgaia_dzlo_Ghi 62
p_outliers_zspz_dzmid_Ghi 17
p_acc_zspz_dzmid_Ghi 83
p_outliers_zgaia_dzmid_Ghi 30
p_acc_zgaia_dzmid_Ghi 70
p_outliers_zspz_dzhi_Ghi 9
p_acc_zspz_dzhi_Ghi 91
p_outliers_zgaia_dzhi_Ghi 28
p_acc_zgaia_dzhi_Ghi 72


Get numbers for all Gaia candidates with SDSS redshifts:

In [54]:
tab_squasars.columns

<TableColumns names=('ra','dec','source_id','ra_sdss','dec_sdss','objid','z_sdss','phot_g_mean_mag','phot_bp_mean_mag','phot_rp_mean_mag','phot_bp_n_obs','phot_rp_n_obs','dec_unwise','mag_w1_vg','mag_w2_vg','ra_unwise','unwise_objid','dist_arcsec')>

In [55]:
tab_squasars_lite = tab_squasars.copy()
tab_squasars_lite.keep_columns(['source_id', 'z_sdss'])
tab_gall_lite = tab_gall.copy()
tab_gall_lite.keep_columns(['source_id', 'redshift_qsoc', 'phot_g_mean_mag', 'flags_qsoc'])

In [56]:
tab_gall_xsquasars = join(tab_gall_lite, tab_squasars_lite, keys='source_id', join_type='inner')
print(len(tab_gall_xsquasars))

326067


In [57]:
tab_gall_xsquasars = tab_gall_xsquasars[np.isfinite(tab_gall_xsquasars['redshift_qsoc'])]
print(len(tab_gall_xsquasars))

320149


In [58]:
z_sdss_gall = tab_gall_xsquasars['z_sdss']
z_gaia_gall = tab_gall_xsquasars['redshift_qsoc']

dz_gaia_gall = (z_gaia_gall - z_sdss_gall)/(1+z_sdss_gall)

In [59]:
z_name = 'zgaia'
for G_name, G_max in G_maxs.items():
    for dz_thresh_name, dz_thresh in dz_threshs.items():
        
        i_G = tab_gall_xsquasars['phot_g_mean_mag'] < G_max
        i_outliers = np.abs(dz_gaia_gall[i_G]) > dz_thresh
        key = f'p_outliers_gall_{z_name}_{dz_thresh_name}_{G_name}'
        frac = np.sum(i_outliers)/len(i_outliers)
        val = f"{100*frac:.0f}"
        print(key, val)
        quants[key] = val 
        frac_outliers_dict[key] = frac

        i_acc = np.abs(dz_gaia_gall[i_G]) <= dz_thresh
        key = f'p_acc_gall_{z_name}_{dz_thresh_name}_{G_name}'
        frac = np.sum(i_acc)/len(i_acc)
        val = f"{100*frac:.0f}"
        print(key, val)
        quants[key] = val

p_outliers_gall_zgaia_dzlo_Gbright 9
p_acc_gall_zgaia_dzlo_Gbright 91
p_outliers_gall_zgaia_dzmid_Gbright 7
p_acc_gall_zgaia_dzmid_Gbright 93
p_outliers_gall_zgaia_dzhi_Gbright 6
p_acc_gall_zgaia_dzhi_Gbright 94
p_outliers_gall_zgaia_dzlo_Glo 25
p_acc_gall_zgaia_dzlo_Glo 75
p_outliers_gall_zgaia_dzmid_Glo 19
p_acc_gall_zgaia_dzmid_Glo 81
p_outliers_gall_zgaia_dzhi_Glo 18
p_acc_gall_zgaia_dzhi_Glo 82
p_outliers_gall_zgaia_dzlo_Ghi 38
p_acc_gall_zgaia_dzlo_Ghi 62
p_outliers_gall_zgaia_dzmid_Ghi 30
p_acc_gall_zgaia_dzmid_Ghi 70
p_outliers_gall_zgaia_dzhi_Ghi 28
p_acc_gall_zgaia_dzhi_Ghi 72


Ends up being the same as our cleaned sample lol oop (rounded to a percent)

In [60]:
G_names = ['Glo', 'Ghi']
for G_name in G_names:
    for dz_thresh_name, dz_thresh in dz_threshs.items():
        factor = frac_outliers_dict[f'p_outliers_gall_zgaia_{dz_thresh_name}_{G_name}']/ \
                 frac_outliers_dict[f'p_outliers_zspz_{dz_thresh_name}_{G_name}']
        key = f'factor_reduction_outliers_{dz_thresh_name}_{G_name}'
        factor_rounded = round(factor*2)/2
        val = rf'${{\sim}}{factor_rounded:g}\times$'
        print(factor)
        print(key,':', val)
        quants[key] = val

0.9334183980787354
factor_reduction_outliers_dzlo_Glo : ${\sim}1\times$
1.8206339547196377
factor_reduction_outliers_dzmid_Glo : ${\sim}2\times$
3.051113068194445
factor_reduction_outliers_dzhi_Glo : ${\sim}3\times$
0.9713798263548138
factor_reduction_outliers_dzlo_Ghi : ${\sim}1\times$
1.783354861845533
factor_reduction_outliers_dzmid_Ghi : ${\sim}2\times$
3.1889830596917235
factor_reduction_outliers_dzhi_Ghi : ${\sim}3\times$


For all magnitudes, and sub-split for those with reliable redshifts:

In [61]:
idx_reliable_gall = (tab_gall['flags_qsoc']==0) | (tab_gall['flags_qsoc']==16) 
print(np.sum(idx_reliable_gall), len(idx_reliable_gall), np.sum(idx_reliable_gall)/len(idx_reliable_gall))

1302677 6649162 0.1959159665533792


In [62]:
quants['p_zreliable_gall'] = f'{100*np.sum(idx_reliable_gall)/len(idx_reliable_gall):.0f}'
print(quants['p_zreliable_gall'])

20


In [63]:
idx_reliable = (tab_gall_xsquasars['flags_qsoc']==0) | (tab_gall_xsquasars['flags_qsoc']==16) 
print(np.sum(idx_reliable), len(idx_reliable), np.sum(idx_reliable)/len(idx_reliable))

115901 320149 0.3620220584790207


In [64]:
for dz_thresh_name, dz_thresh in dz_threshs.items():
       
    i_acc = np.abs(dz_gaia_gall) <= dz_thresh
    key = f'p_acc_gall_{z_name}_{dz_thresh_name}'
    val = f"{100*np.sum(i_acc)/len(i_acc):.0f}"
    print(key, val)
    quants[key] = val
    
    i_acc = np.abs(dz_gaia_gall[idx_reliable]) <= dz_thresh
    key = f'p_acc_gall_zreliable_{z_name}_{dz_thresh_name}'
    val = f"{100*np.sum(i_acc)/len(i_acc):.0f}"
    print(key, val)
    quants[key] = val

p_acc_gall_zgaia_dzlo 53
p_acc_gall_zreliable_zgaia_dzlo 86
p_acc_gall_zgaia_dzmid 62
p_acc_gall_zreliable_zgaia_dzmid 91
p_acc_gall_zgaia_dzhi 65
p_acc_gall_zreliable_zgaia_dzhi 92


### Decontamination

In [65]:
i_makes_pmcut = utils.cut_pm_G(tab_gsup)
quants['N_removed_pmcut'] = np.sum(~i_makes_pmcut)
print(quants['N_removed_pmcut'])
frac_removed_pmcut = np.sum(~i_makes_pmcut)/len(i_makes_pmcut)
print(frac_removed_pmcut)
key = 'p_removed_pmcut'
val = f'{frac_removed_pmcut*100:.1f}'
print(key, val)
quants[key] = val

39470
0.025987929801643685
p_removed_pmcut 2.6


In [66]:
_, _, i_test = utils.split_train_val_test(tab_labeled_sup['rand_ints'], frac_train=0.7, frac_test=0.3, frac_val=0)

In [67]:
i_contam_labeled = (tab_labeled_sup['class'][i_test]=='s') | (tab_labeled_sup['class'][i_test]=='g')
print(np.sum(i_contam_labeled))

i_contam_gclean = np.isin(tab_labeled_sup['source_id'][i_test][i_contam_labeled], tab_gclean['source_id'])
print(np.sum(i_contam_gclean))
print(np.sum(i_contam_labeled)/np.sum(i_contam_gclean))

1119
264
4.238636363636363


In [68]:
key = 'factor_reduction_contaminants'
fac = np.sum(i_contam_labeled)/np.sum(i_contam_gclean)
fac_rounded = round(fac*2)/2
val = rf'${{\sim}}{fac_rounded:g}\times$'
print(fac)
print(key, val)
quants[key] = val

4.238636363636363
factor_reduction_contaminants ${\sim}4\times$


In [69]:
i_q_labeled = tab_labeled_sup['class']=='q'
print(np.sum(i_q_labeled))

i_q_gclean = np.isin(tab_labeled_sup['source_id'][i_q_labeled], tab_gclean['source_id'])
print(np.sum(i_q_gclean))

246122
243206


In [70]:
key = 'p_sqall_excluded_clean'
p = np.sum(i_q_labeled)/np.sum(i_q_gclean)-1
print(p)
val = f'{p*100:.1f}'
print(key, val)
quants[key] = val

0.011989835777077795
p_sqall_excluded_clean 1.2


### Area and volume

In [71]:
def v_shells_of_z(z_arr, z_bins, fsky, cosmo):
    
    v_at_z_bins = np.empty(len(z_bins), dtype=u.Quantity)
    for i in range(len(z_bins)):
        v = cosmo.comoving_volume(z_bins[i])
        v_at_z_bins[i] = v.to(u.Gpc**3)
    v_at_z_bins *= fsky
    v_shells = v_at_z_bins[1:] - v_at_z_bins[:-1] 

    return np.array(v_shells)

In [72]:
def ndens_of_z(z_arr, z_bins, fsky, cosmo):
    
    v_shells = v_shells_of_z(z_arr, z_bins, fsky, cosmo)
    ndens = []
    for i in range(len(z_bins)-1):
        N_inbin = np.sum((z_arr >= z_bins[i]) & (z_arr < z_bins[i+1]))
        ndens_inbin = N_inbin/v_shells[i]
        ndens.append(ndens_inbin)
    return ndens

In [73]:
# eqn 1.7.32 of https://arxiv.org/pdf/1606.00180.pdf
def volume_effective_Gpcperh(z_arr, z_bins, cosmo, fsky, P0):
    
    ndens_at_z_bins = ndens_of_z(z_arr, z_bins, fsky, cosmo)
    ndens_at_z_bins = np.array(ndens_at_z_bins, dtype=u.Quantity)

    nPs = np.array([n*P0 for n in ndens_at_z_bins])
    prefacs = (nPs /(1 + nPs))**2
    v_shells = v_shells_of_z(z_arr, z_bins, fsky, cosmo)    
    v_Gpc = np.sum(prefacs*v_shells)

    v_Gpc = v_Gpc.to(u.Gpc**3) # this should be just in Gpc
    v_Gpcperh = v_Gpc * cosmo.h**3 # (Gpc) -> (Gpc/h)^3, mult by h^3 
    print(f"Sky fraction is {fsky:.3f}")
    print(f"Effective volume is {v_Gpc:.3f} = {v_Gpcperh.value:.3f} (Gpc/h)^3")
    return v_Gpcperh.value

In [74]:
cosmo = Planck18
area_allsky = 41252.96125*(u.deg**2)

TODO: estimating p0 now, what should be doing?

In [75]:
fn_dustmap = f'../data/maps/map_dust_NSIDE{NSIDE}.npy'
map_dust = maps.get_dust_map(NSIDE=NSIDE, R=3.1, fn_map=fn_dustmap)

Dustmap already exists, loading from ../data/maps/map_dust_NSIDE64.npy


Gaia:

In [76]:
Av_hi = 0.5
npix_below_Avhi = np.sum(map_dust < Av_hi)
area_below_Avhi = area_per_pixel*u.deg**2 * npix_below_Avhi
quants['Avhi'] = Av_hi
area_below_Avhi_fmt = rf'{area_below_Avhi.value:.2f} deg$^2$'
quants['area_below_Avhi'] = area_below_Avhi_fmt
print(quants['area_below_Avhi'])

29154.54 deg$^2$


In [77]:
fsky_below_Avhi = area_below_Avhi/area_allsky
quants['fsky_below_Avhi'] = f'{fsky_below_Avhi:.2f}'
print(quants['fsky_below_Avhi'])

0.71


In [78]:
z_arr = tab_gcatlo['redshift_spz']
z_min, z_max = np.min(z_arr), np.max(z_arr)
z_bins = np.arange(0, z_max+0.1, 0.1)
P0 = 4e4 * u.Mpc**3 / cosmo.h**3 
#P0 = P0.to(u.Gpc**3)
#print(P0)
vol_gcatlo = volume_effective_Gpcperh(z_arr, z_bins, cosmo, fsky_below_Avhi, P0)
vol_gcatlo_fmt = rf'{vol_gcatlo:.2f} $(h^{{-1}}\,Gpc)^3$'
quants['volume_effective_gcatlo_below_Avhi'] = vol_gcatlo_fmt
print(quants['volume_effective_gcatlo_below_Avhi'])

Sky fraction is 0.707
Effective volume is 10.598 Gpc3 = 3.283 (Gpc/h)^3
3.28 $(h^{-1}\,Gpc)^3$


In [79]:
z_arr = tab_gcathi['redshift_spz']
z_min, z_max = np.min(z_arr), np.max(z_arr)
z_bins = np.arange(0, z_max+0.1, 0.1)
P0 = 4e4 * u.Mpc**3 / cosmo.h**3 
#P0 = P0.to(u.Gpc**3)
#print(P0)
vol_gcathi = volume_effective_Gpcperh(z_arr, z_bins, cosmo, fsky_below_Avhi, P0)
vol_gcathi_fmt = rf'{vol_gcathi:.2f} $(h^{{-1}}\,Gpc)^3$'
quants['volume_effective_gcathi_below_Avhi'] = vol_gcathi_fmt
print(quants['volume_effective_gcathi_below_Avhi'])

Sky fraction is 0.707
Effective volume is 25.356 Gpc3 = 7.854 (Gpc/h)^3
7.85 $(h^{-1}\,Gpc)^3$


SDSS:

In [80]:
# area via: # https://academic.oup.com/mnras/article/514/3/3396/6566355?login=true
area_sdss = 4808*(u.deg**2)

In [81]:
area_sdss_fmt = rf'{area_sdss.value:g} deg$^2$'
quants['area_sdss'] = area_sdss_fmt
print(quants['area_sdss'])

4808 deg$^2$


In [82]:
fsky_sdss = area_sdss/area_allsky
quants['fsky_sdss'] = f'{fsky_sdss:.2f}'
print(quants['fsky_sdss'])

0.12


In [83]:
z_arr = tab_sqall['z_sdss']
z_min, z_max = np.min(z_arr), np.max(z_arr)
z_bins = np.arange(0, z_max+0.1, 0.1)

In [84]:
z_arr = tab_sqall['z_sdss']
z_min, z_max = np.min(z_arr), np.max(z_arr)
z_bins = np.arange(0, z_max+0.1, 0.1)
P0 = 4e4 * u.Mpc**3 / cosmo.h**3 
vol_sdss = volume_effective_Gpcperh(z_arr, z_bins, cosmo, fsky_sdss, P0)
vol_sdss_fmt = rf'{vol_sdss:.2f} $(h^{{-1}}\,Gpc)^3$'
quants['volume_effective_sdss'] = vol_sdss_fmt
print(quants['volume_effective_sdss'])

Sky fraction is 0.117
Effective volume is 18.456 Gpc3 = 5.717 (Gpc/h)^3
5.72 $(h^{-1}\,Gpc)^3$


Gaia vs SDSS

In [85]:
fac_area = area_below_Avhi/area_sdss
fac_area_rounded = round(fac_area.value*2)/2
val = rf'${{\sim}}{fac_area_rounded:g}\times$'
quants['factor_area_belowAvhi_sdss'] = val
print(quants['factor_area_belowAvhi_sdss'])

${\sim}6\times$


In [86]:
fac_vol = vol_gcathi/vol_sdss
val = rf'${{\sim}}{fac_vol:.1f}\times$'
quants['factor_volume_effective_gcathi_sdss'] = val
print(quants['factor_volume_effective_gcathi_sdss'])

${\sim}1.4\times$


eBOSS

In [87]:
# area via: # https://academic.oup.com/mnras/article/514/3/3396/6566355?login=true
area_eboss = 4808*(u.deg**2)

In [88]:
area_eboss_fmt = rf'{area_eboss.value:g} deg$^2$'
quants['area_eboss'] = area_eboss_fmt
print(quants['area_eboss'])

4808 deg$^2$


In [89]:
fsky_eboss = area_eboss/area_allsky
quants['fsky_eboss'] = f'{fsky_eboss:.2f}'
print(quants['fsky_eboss'])

0.12


In [90]:
z_arr = tab_eboss['Z']
z_min, z_max = np.min(z_arr), np.max(z_arr)
z_bins = np.arange(0, z_max+0.1, 0.1)

In [91]:
P0 = 4e4 * u.Mpc**3 / cosmo.h**3 
vol_eboss = volume_effective_Gpcperh(z_arr, z_bins, cosmo, fsky_eboss, P0)
vol_eboss_fmt = rf'{vol_eboss:.2f} $(h^{{-1}}\,Gpc)^3$'
quants['volume_effective_eboss'] = vol_eboss_fmt
print(quants['volume_effective_eboss'])

Sky fraction is 0.117
Effective volume is 11.016 Gpc3 = 3.412 (Gpc/h)^3
3.41 $(h^{-1}\,Gpc)^3$


Gaia vs eBOSS

In [92]:
fac_area = area_below_Avhi/area_eboss
fac_area_rounded = round(fac_area.value*2)/2
val = rf'${{\sim}}{fac_area_rounded:g}\times$'
quants['factor_area_belowAvhi_eboss'] = val
print(quants['factor_area_belowAvhi_eboss'])

${\sim}6\times$


In [93]:
fac_vol = vol_gcathi/vol_sdss
val = rf'${{\sim}}{fac_vol:.1f}\times$'
quants['factor_volume_effective_gcathi_eboss'] = val
print(quants['factor_volume_effective_gcathi_eboss'])

${\sim}1.4\times$


## Selection function / residuals

In [94]:
#fn_rand_Glo = f'../data/randoms/randomQ_stardustm10mcs_NSIDE64_G{G_lo}_10x.fits'
fn_rand_Glo = f'../data/randoms/random_G{G_lo}_10x.fits'

tab_rand_Glo = Table.read(fn_rand_Glo)
map_rand_Glo = get_ndens_map(tab_rand_Glo['ra'], tab_rand_Glo['dec'])

map_gcatlo = get_ndens_map(tab_gcatlo['ra'], tab_gcatlo['dec'])

norm_factor = len(tab_gcatlo)/len(tab_rand_Glo)

rmse_frac = utils.compute_rmse(norm_factor*map_rand_Glo, map_gcatlo, fractional=True)
quants['rmse_fractional_residuals_Glo'] = f'{rmse_frac:.2f}'
print(quants['rmse_fractional_residuals_Glo'])

0.26


TODO: run when get sel func for G20.5

In [95]:
# fn_rand_Ghi = f'../data/randoms/randomQ_stardustm10mcs_NSIDE64_G{G_hi}_10x.fits'
fn_rand_Ghi = f'../data/randoms/random_G{G_hi}_10x.fits'

tab_rand_Ghi = Table.read(fn_rand_Ghi)
map_rand_Ghi = get_ndens_map(tab_rand_Ghi['ra'], tab_rand_Ghi['dec'])

map_gcathi = get_ndens_map(tab_gcathi['ra'], tab_gcathi['dec'])

norm_factor = len(tab_gcathi)/len(tab_rand_Ghi)

rmse_frac = utils.compute_rmse(norm_factor*map_rand_Ghi, map_gcathi, fractional=True)
quants['rmse_fractional_residuals_Ghi'] = f'{rmse_frac:.2f}'
print(quants['rmse_fractional_residuals_Ghi'])

0.28


In [96]:
tag_decontam = '_mag0.1_lm5_postpm'
fn_cuts = f'../data/color_cuts{tag_decontam}.txt'
cuts = np.genfromtxt(fn_cuts, delimiter=',', names=True)
cut_names = list(cuts.dtype.names)

In [97]:
color_label_dict = {'g_w1': 'G-W1', 
                    'w1_w2': 'W1-W2',
                    'bp_g': 'BP-G'}

In [98]:
color_names = list(cut_names.copy())
color_names.remove('intercept')

In [99]:
N_colors = len(color_names)
tol = 1e-8
cut_strs = []
for cut in cuts:
    cut_terms = []
    for cn in color_names:
        i = cut_names.index(cn)
        if abs(cut[i])<tol:
            continue
        if abs(cut[i]-1.0)<tol:
            cut_terms.append(f'({color_label_dict[cn]})')
        else:
            cut_terms.append(f'{cut[i]}\,({color_label_dict[cn]})')
    i_intercept = cut_names.index('intercept')
    # & symbols are for equation alignment
    cut_str = ' + '.join(cut_terms) + f' &> {cut[i_intercept]:g}'
    cut_strs.append(cut_str)

In [100]:
quants['color_cut_str'] = ' \\\\ '.join(cut_strs)
print(quants['color_cut_str'])

(G-W1) &> 2.15 \\ (W1-W2) &> 0.4 \\ (BP-G) &> -0.3 \\ (G-W1) + 1.2\,(W1-W2) &> 3.4


## Write dict to file

In [101]:
with open(fn_quants, 'w') as f:
    for key, val in quants.items():
        print(key, val)
        if save:
            f.write(f'{key} = {val}\n')
if save:
    print()
    print(f"Saved to {fn_quants}!")

N_gall 6,649,162
N_gall_wqsoc 6,375,063
N_gpurer 1,942,825
N_gpurer_wqsoc 1,729,625
N_gsup 1,518,782
N_gclean 1,414,385
p_cut_gsup_gclean 7
N_gcatlo 755,850
N_gcathi 1,295,502
N_squasars_unwise 343,074
N_squasars_sup 246,122
N_sqall 638,083
N_sqclean 243,206
N_eboss 409,341
N_sgals_unwise 600,897
N_sgals_sup 1,316
N_stars_unwise 482,080
N_sstars_sup 2,276
N_mcs_unwise 11,770
N_mcs_sup 9,927
z_med_gall 1.67
z_med_gcatlo 1.45
zintermediate 2.5
N_above_zintermediate_gcatlo 76,804
p_above_zintermediate_gcatlo 10
Gbright 19.0
dzlo 0.01
p_outliers_zspz_dzlo_Gbright 15
p_acc_zspz_dzlo_Gbright 85
p_outliers_zgaia_dzlo_Gbright 10
p_acc_zgaia_dzlo_Gbright 90
dzmid 0.1
p_outliers_zspz_dzmid_Gbright 6
p_acc_zspz_dzmid_Gbright 94
p_outliers_zgaia_dzmid_Gbright 7
p_acc_zgaia_dzmid_Gbright 93
dzhi 0.2
p_outliers_zspz_dzhi_Gbright 4
p_acc_zspz_dzhi_Gbright 96
p_outliers_zgaia_dzhi_Gbright 7
p_acc_zgaia_dzhi_Gbright 93
Glo 20.0
p_outliers_zspz_dzlo_Glo 27
p_acc_zspz_dzlo_Glo 73
p_outliers_zgaia_dzlo_Gl

# Table from public catalog

In [121]:
fn_public = f'../data/quaia_G20.5.fits'
tab_public = utils.load_table(fn_public)
columns = tab_public.columns
columns_list = list(columns)

In [122]:
columns_tex = [column.replace("_", "\\_") for column in columns]
# first \ escapes turning \t into tab lol
columns_show = [f'\\texttt{{{column_tex}}}' for column_tex in columns_tex]

In [123]:
symbols_show = [utils.label2symbol_dict[column] for column in columns]
units = [tab_public[column].info.unit for column in columns]
units_show = [utils.unit2latex_dict[str(unit)] if str(unit) in utils.unit2latex_dict else str(unit) for unit in units]
descriptions_show = [tab_public[column].info.description for column in columns]

In [124]:
idx_show = 0
values = [tab_public[column][idx_show] for column in columns]
values_show = [f'{values[i]:{utils.label2format_dict[columns_list[i]]}}' for i in range(len(columns))]

In [125]:
table_table = Table([columns_show, symbols_show, units_show, descriptions_show, values_show], 
                    names=('column name', 'symbol', 'units', 'description', 'example entry value'))

In [126]:
table_table

column name,symbol,units,description,example entry value
str29,str21,str8,str61,str16
\texttt{source\_id},,,\emph{Gaia} DR3 source identifier,6459630980096
\texttt{unwise\_objid},,,unWISE DR1 source identifier,0453p000o0014479
\texttt{redshift\_quaia},$z_\mathrm{Quaia}$,,spectrophotometric redshift estimate,0.416867
\texttt{redshift\_quaia\_err},,,$1\sigma$ uncertainty on spectrophotometric redshift estimate,0.062063
\texttt{ra},,deg,right ascension,44.910498
\texttt{dec},,deg,declination,0.189649
\texttt{l},,deg,galactic longitude,176.659434
\texttt{b},,deg,galactic latitude,-48.835164
\texttt{phot\_g\_mean\_mag},$G$,mag,\emph{Gaia} $G$-band mean magnitude,20.173105
\texttt{phot\_bp\_mean\_mag},$BP$,mag,\emph{Gaia} integrated $BP$ mean magnitude,20.200150


In [127]:
table_column_names = table_table.colnames

In [128]:
table_contents = '\\begin{tabular}'
table_contents += '{|'+'|'.join(['l' for _ in table_column_names]) + '|}\n'
table_contents += '\hline\n'
table_contents += ' & '.join(table_column_names) + ' \\\\\n'
table_contents += '\hline\n'
for i in range(len(table_table)):
    row_vals = [table_table[tcn][i] for tcn in table_column_names]
    row = ' & '.join(row_vals) + ' \\\\\n'
#     if i<len(table_table)-1:
#         row += ' \\\\ \n'
    print(row,end='') # end makes print not print an additional newline
    table_contents += row
table_contents += '\hline\n'
table_contents += '\end{tabular}'

\texttt{source\_id} &  &  & \emph{Gaia} DR3 source identifier & 6459630980096 \\
\texttt{unwise\_objid} &  &  & unWISE DR1 source identifier & 0453p000o0014479 \\
\texttt{redshift\_quaia} & $z_\mathrm{Quaia}$ &  & spectrophotometric redshift estimate & 0.416867 \\
\texttt{redshift\_quaia\_err} &  &  & $1\sigma$ uncertainty on spectrophotometric redshift estimate & 0.062063 \\
\texttt{ra} &  & deg & right ascension & 44.910498 \\
\texttt{dec} &  & deg & declination & 0.189649 \\
\texttt{l} &  & deg & galactic longitude & 176.659434 \\
\texttt{b} &  & deg & galactic latitude & -48.835164 \\
\texttt{phot\_g\_mean\_mag} & $G$ & mag & \emph{Gaia} $G$-band mean magnitude & 20.173105 \\
\texttt{phot\_bp\_mean\_mag} & $BP$ & mag & \emph{Gaia} integrated $BP$ mean magnitude & 20.200150 \\
\texttt{phot\_rp\_mean\_mag} & $RP$ & mag & \emph{Gaia} integrated $RP$ mean magnitude & 18.871586 \\
\texttt{mag\_w1\_vg} & $W1$ & mag & unWISE $W1$ magnitude & 14.774343 \\
\texttt{mag\_w2\_vg} & $W2$ & mag 

In [129]:
fn_table = '../data/catalog_table.txt'
with open(fn_table, 'w') as f:
    if save:
        f.write(table_contents)
if save:
    print(f"Saved to {fn_table}!")

Saved to ../data/catalog_table.txt!
