In [1]:
from astropy.table import Table, join
import numpy as np

import time
from numpy.random import default_rng

import healpy as hp
import pandas as pd

import astropy.cosmology
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.table import Table
from astropy.cosmology import Planck18
from astropy.io import fits

import sys
sys.path.insert(0,'../code')
%load_ext autoreload
%autoreload 2
import utils
import generate_random
import correlations
import masks
import maps

In [2]:
from matplotlib import pyplot as plt

# Parameter setting

Saving

In [3]:
quants = {}

In [4]:
fn_quants = '../data/quantities.txt'
save = True

Names

In [5]:
name_catalog = '\emph{{Gaia}}-unWISE Quasmology Catalog'
abbrv_catalog = 'GW-Quasmo'

name_gpurer = '\emph{{Gaia}} Purer Sample'
abbrv_gpurer = '\emph{{Gaia}} Purer'

Functions

In [6]:
def get_ndens_map(ras, decs, NSIDE=64):
    map_nqso, _ = maps.get_map(NSIDE, ras, decs, null_val=0)
    map_ndens = map_nqso/area_per_pixel
    map_ndens_masked = hp.ma(map_ndens)
    map_ndens_masked.mask = map_ndens==0
    return map_ndens_masked

Map settings

In [7]:
NSIDE = 64
area_per_pixel = hp.nside2pixarea(NSIDE, degrees=True)
print(f"Area per pixel: {area_per_pixel:.3f} deg")

Area per pixel: 0.839 deg


Other prelims

In [8]:
G_hi = 20.5
G_lo = 20.0

In [9]:
rng = np.random.default_rng(seed=42)

# Load datasets

### Gaia

In [10]:
fn_gall = '../data/gaia_candidates.fits.gz'
tab_gall = utils.load_table(fn_gall)
print(len(tab_gall))

6649162


In [11]:
fn_gpurer_ids = '../data/gaia_purer_sourceids.fits'
tab_gpurer_ids = utils.load_table(fn_gpurer_ids)
tab_gpurer = join(tab_gall, tab_gpurer_ids, keys='source_id', join_type='inner', metadata_conflicts='silent')
print(len(tab_gpurer))

1942825


In [12]:
fn_gsup = '../data/gaia_candidates_superset.fits'
tab_gsup = utils.load_table(fn_gsup)
print(len(tab_gsup))

1518782


In [13]:
fn_gclean = '../data/gaia_candidates_clean.fits'
tab_gclean = utils.load_table(fn_gclean)
print(len(tab_gclean))

1414385


In [14]:
fn_gcatlo = f'../data/catalog_G{G_lo}.fits'
tab_gcatlo = utils.load_table(fn_gcatlo)
print(len(tab_gcatlo))

755850


In [15]:
fn_gcathi = f'../data/catalog_G{G_hi}.fits'
tab_gcathi = utils.load_table(fn_gcathi)
print(len(tab_gcathi))

1295502


### SDSS

These are SDSS objects that have (any) Gaia source matches. There is unWISE data where available, but the below catalogs are not limited to unWISE matches.

In [16]:
tab_squasars = utils.load_table(f'../data/quasars_sdss_xgaia_xunwise_good_nodup.fits')
print(f"Number of SDSS quasars: {len(tab_squasars)}")
print(f"Number with unWISE info:", np.sum(np.isfinite(tab_squasars['mag_w1_vg']) & ~tab_squasars.mask['mag_w1_vg']))

Number of SDSS quasars: 379698
Number with unWISE info: 350070


In [17]:
tab_sstars = utils.load_table(f'../data/stars_sdss_xgaia_xunwise_good_nodup.fits')
print(f"Number of SDSS stars: {len(tab_sstars)}")
print(f"Number with unWISE info:", np.sum(np.isfinite(tab_sstars['mag_w1_vg']) & ~tab_sstars.mask['mag_w1_vg']))

Number of SDSS stars: 683221
Number with unWISE info: 482080


In [18]:
tab_sgals = utils.load_table(f'../data/galaxies_sdss_xgaia_xunwise_good_nodup.fits')
print(f"Number of SDSS galaxies: {len(tab_sgals)}")
print(f"Number with unWISE info:", np.sum(np.isfinite(tab_sgals['mag_w1_vg']) & ~tab_sgals.mask['mag_w1_vg']))

Number of SDSS galaxies: 717059
Number with unWISE info: 600897


SDSS quasars with zwarning=0; gaia and unWISE data where available

In [19]:
tab_sqall = utils.load_table(f'../data/quasars_sdss_xgaiaall_xunwiseall_good.fits')
print(f"Number of all SDSS quasars: {len(tab_sqall)}")

Number of all SDSS quasars: 638083


Only objects in superset (gaia qso candidates with unWISE data and QSOC redshifts and G lim)

In [20]:
fn_labeled_sup = '../data/labeled_superset.fits'
tab_labeled_sup = utils.load_table(fn_labeled_sup)
print(f"Number of labeled Gaia quasar candidates for decontamination: {len(tab_labeled_sup)}")

Number of labeled Gaia quasar candidates for decontamination: 259641


Redshifts

In [21]:
redshift_estimator_name = 'kNN'
save_tag_model = f'_K27_std'
fn_spz_labeled = f'../data/redshift_estimates/redshifts_spz_labeled_{redshift_estimator_name}{save_tag_model}.fits'
tab_spz_labeled = utils.load_table(fn_spz_labeled)
print(f"N = {len(tab_spz_labeled)}")

N = 243206


# Quantities

## Numbers of objects

In [22]:
quants['N_gall'] = f"{len(tab_gall):,}"
print(quants['N_gall'])

6,649,162


In [23]:
i_gall_wqsoc = np.isfinite(tab_gall['redshift_qsoc'])
quants['N_gall_wqsoc'] = f"{np.sum(i_gall_wqsoc):,}"
print(quants['N_gall_wqsoc'])

6,375,063


In [24]:
quants['N_gpurer'] = f"{len(tab_gpurer):,}"
print(quants['N_gpurer'])

1,942,825


In [25]:
i_gpurer_wqsoc = np.isfinite(tab_gpurer['redshift_qsoc'])
quants['N_gpurer_wqsoc'] = f"{np.sum(i_gpurer_wqsoc):,}"
print(quants['N_gpurer_wqsoc'])

1,729,625


In [26]:
quants['N_gsup'] = f"{len(tab_gsup):,}"
print(quants['N_gsup'])

1,518,782


In [27]:
quants['N_gclean'] = f"{len(tab_gclean):,}"
print(quants['N_gclean'])

1,414,385


In [28]:
quants['p_cut_gsup_gclean'] = f"{100*(len(tab_gsup) - len(tab_gclean))/len(tab_gsup):.0f}"
print(quants['p_cut_gsup_gclean'])

7


In [29]:
quants['N_gcatlo'] = f"{len(tab_gcatlo):,}"
print(quants['N_gcatlo'])

755,850


In [30]:
quants['N_gcathi'] = f"{len(tab_gcathi):,}"
print(quants['N_gcathi'])

1,295,502


In [31]:
i_squasars_unwise = np.isfinite(tab_squasars['mag_w1_vg']) & ~tab_squasars.mask['mag_w1_vg'] & \
                    np.isfinite(tab_squasars['mag_w2_vg']) & ~tab_squasars.mask['mag_w2_vg']
quants['N_squasars_unwise'] = f"{np.sum(i_squasars_unwise):,}"
print(quants['N_squasars_unwise'])

343,074


In [32]:
i_sq_sup = tab_labeled_sup['class']=='q'
quants['N_squasars_sup'] = f"{np.sum(i_sq_sup):,}"
print(quants['N_squasars_sup'])

246,122


In [33]:
quants['N_sqall'] = f"{len(tab_sqall):,}"
print(quants['N_sqall'])

638,083


## Redshift info

### general

In [34]:
i_zfinite = np.isfinite(tab_gall['redshift_qsoc'])
quants['z_med_gall'] = f"{np.median(tab_gall['redshift_qsoc'][i_zfinite]):.2f}"
print(quants['z_med_gall'])

1.67


  a.partition(kth, axis=axis, kind=kind, order=order)


In [35]:
quants['z_med_gcatlo'] = f"{np.median(tab_gcatlo['redshift_spz']):.2f}"
print(quants['z_med_gcatlo'])

1.45


In [36]:
zintermediate = 2.5
quants['zintermediate'] = zintermediate
i_above_zintermediate_gcatlo = tab_gcatlo['redshift_spz'] > zintermediate
quants['N_above_zintermediate_gcatlo'] = np.sum(i_above_zintermediate_gcatlo)
quants['p_above_zintermediate_gcatlo'] = np.sum(i_above_zintermediate_gcatlo)/len(i_above_zintermediate_gcatlo)
print(quants['N_above_zintermediate_gcatlo'], quants['p_above_zintermediate_gcatlo'])

76804 0.10161275385327777


### dz checks vs sdss

In [37]:
rand_ints_labeled = tab_spz_labeled['rand_ints']
i_train, i_valid, i_test = utils.split_train_val_test(rand_ints_labeled,
                                 frac_train=0.7, frac_val=0.15, frac_test=0.15)

In [38]:
z_sdss_test = tab_spz_labeled['z_sdss'][i_test]
z_spzraw_test = tab_spz_labeled['redshift_spz_raw'][i_test]
z_spz_test = tab_spz_labeled['redshift_spz'][i_test]
z_gaia_test = tab_spz_labeled['redshift_qsoc'][i_test]

In [39]:
dz_spzraw_test = (z_spzraw_test - z_sdss_test)/(1+z_sdss_test)
dz_spz_test = (z_spz_test - z_sdss_test)/(1+z_sdss_test)
dz_gaia_test = (z_gaia_test - z_sdss_test)/(1+z_sdss_test)

In [40]:
dz_threshs = {'dzlo': 0.01, 'dzmid': 0.1, 'dzhi': 0.2}
G_maxs = {'Gbright': 19.0, 'Glo': 20.0, 'Ghi': 20.5}
dz_arrs = {'zspz': dz_spz_test, 'zgaia': dz_gaia_test}

In [41]:
for G_name, G_max in G_maxs.items():
    quants[G_name] = G_max
    for dz_thresh_name, dz_thresh in dz_threshs.items():
        quants[dz_thresh_name] = dz_thresh
        for z_name, dz_arr in dz_arrs.items():
        
            i_G = tab_spz_labeled['phot_g_mean_mag'][i_test] < G_max
            i_outliers = np.abs(dz_arr[i_G]) > dz_thresh
            key = f'p_outliers_{z_name}_{dz_thresh_name}_{G_name}'
            val = f"{100*np.sum(i_outliers)/len(i_outliers):.0f}"
            print(key, val)
            quants[key] = val
            
            i_acc = np.abs(dz_arr[i_G]) <= dz_thresh
            key = f'p_acc_{z_name}_{dz_thresh_name}_{G_name}'
            val = f"{100*np.sum(i_acc)/len(i_acc):.0f}"
            print(key, val)
            quants[key] = val

p_outliers_zspz_dzlo_Gbright 15
p_acc_zspz_dzlo_Gbright 85
p_outliers_zgaia_dzlo_Gbright 10
p_acc_zgaia_dzlo_Gbright 90
p_outliers_zspz_dzmid_Gbright 6
p_acc_zspz_dzmid_Gbright 94
p_outliers_zgaia_dzmid_Gbright 7
p_acc_zgaia_dzmid_Gbright 93
p_outliers_zspz_dzhi_Gbright 4
p_acc_zspz_dzhi_Gbright 96
p_outliers_zgaia_dzhi_Gbright 7
p_acc_zgaia_dzhi_Gbright 93
p_outliers_zspz_dzlo_Glo 27
p_acc_zspz_dzlo_Glo 73
p_outliers_zgaia_dzlo_Glo 25
p_acc_zgaia_dzlo_Glo 75
p_outliers_zspz_dzmid_Glo 10
p_acc_zspz_dzmid_Glo 90
p_outliers_zgaia_dzmid_Glo 19
p_acc_zgaia_dzmid_Glo 81
p_outliers_zspz_dzhi_Glo 6
p_acc_zspz_dzhi_Glo 94
p_outliers_zgaia_dzhi_Glo 18
p_acc_zgaia_dzhi_Glo 82
p_outliers_zspz_dzlo_Ghi 39
p_acc_zspz_dzlo_Ghi 61
p_outliers_zgaia_dzlo_Ghi 38
p_acc_zgaia_dzlo_Ghi 62
p_outliers_zspz_dzmid_Ghi 17
p_acc_zspz_dzmid_Ghi 83
p_outliers_zgaia_dzmid_Ghi 30
p_acc_zgaia_dzmid_Ghi 70
p_outliers_zspz_dzhi_Ghi 9
p_acc_zspz_dzhi_Ghi 91
p_outliers_zgaia_dzhi_Ghi 28
p_acc_zgaia_dzhi_Ghi 72


Get numbers for all Gaia candidates with SDSS redshifts:

In [42]:
tab_squasars.columns

<TableColumns names=('ra','dec','source_id','ra_sdss','dec_sdss','objid','z_sdss','phot_g_mean_mag','phot_bp_mean_mag','phot_rp_mean_mag','phot_bp_n_obs','phot_rp_n_obs','dec_unwise','mag_w1_vg','mag_w2_vg','ra_unwise','unwise_objid','dist_arcsec')>

In [43]:
tab_squasars_lite = tab_squasars.copy()
tab_squasars_lite.keep_columns(['source_id', 'z_sdss'])
tab_gall_lite = tab_gall.copy()
tab_gall_lite.keep_columns(['source_id', 'redshift_qsoc', 'phot_g_mean_mag'])

In [44]:
tab_gall_xsquasars = join(tab_gall_lite, tab_squasars_lite, keys='source_id', join_type='inner')
print(len(tab_gall_xsquasars))

326067


In [45]:
tab_gall_xsquasars = tab_gall_xsquasars[np.isfinite(tab_gall_xsquasars['redshift_qsoc'])]
print(len(tab_gall_xsquasars))

320149


In [46]:
z_sdss_gall = tab_gall_xsquasars['z_sdss']
z_gaia_gall = tab_gall_xsquasars['redshift_qsoc']

dz_gaia_gall = (z_gaia_gall - z_sdss_gall)/(1+z_sdss_gall)

In [47]:
z_name = 'zgaia'
for G_name, G_max in G_maxs.items():
    for dz_thresh_name, dz_thresh in dz_threshs.items():
        
        i_G = tab_gall_xsquasars['phot_g_mean_mag'] < G_max
        i_outliers = np.abs(dz_gaia_gall[i_G]) > dz_thresh
        key = f'p_outliers_gall_{z_name}_{dz_thresh_name}_{G_name}'
        val = f"{100*np.sum(i_outliers)/len(i_outliers):.0f}"
        print(key, val)
        quants[key] = val

p_outliers_gall_zgaia_dzlo_Gbright 9
p_outliers_gall_zgaia_dzmid_Gbright 7
p_outliers_gall_zgaia_dzhi_Gbright 6
p_outliers_gall_zgaia_dzlo_Glo 25
p_outliers_gall_zgaia_dzmid_Glo 19
p_outliers_gall_zgaia_dzhi_Glo 18
p_outliers_gall_zgaia_dzlo_Ghi 38
p_outliers_gall_zgaia_dzmid_Ghi 30
p_outliers_gall_zgaia_dzhi_Ghi 28


Ends up being the same as our cleaned sample lol oop (rounded to a percent)

In [48]:
G_name = 'Glo'
for dz_thresh_name, dz_thresh in dz_threshs.items():
    fac = float(quants[f'p_outliers_gall_zgaia_{dz_thresh_name}_{G_name}'])/ \
          float(quants[f'p_outliers_zspz_{dz_thresh_name}_{G_name}'])
    key = f'factor_reduction_outliers_{dz_thresh_name}_{G_name}'
    fac_rounded = round(fac*2)/2
    val = rf'${{\sim}}{fac_rounded:g}\times$'
    print(fac)
    print(key,':', val)
    quants[key] = val

0.9259259259259259
factor_reduction_outliers_dzlo_Glo : ${\sim}1\times$
1.9
factor_reduction_outliers_dzmid_Glo : ${\sim}2\times$
3.0
factor_reduction_outliers_dzhi_Glo : ${\sim}3\times$


### Decontamination

In [49]:
_, _, i_test = utils.split_train_val_test(tab_labeled_sup['rand_ints'], frac_train=0.7, frac_test=0.3, frac_val=0)

In [50]:
i_contam_labeled = (tab_labeled_sup['class'][i_test]=='s') | (tab_labeled_sup['class'][i_test]=='g')
print(np.sum(i_contam_labeled))

i_contam_gclean = np.isin(tab_labeled_sup['source_id'][i_test][i_contam_labeled], tab_gclean['source_id'])
print(np.sum(i_contam_gclean))
print(np.sum(i_contam_labeled)/np.sum(i_contam_gclean))

1119
264
4.238636363636363


In [51]:
key = 'factor_reduction_contaminants'
fac = np.sum(i_contam_labeled)/np.sum(i_contam_gclean)
fac_rounded = round(fac*2)/2
val = rf'${{\sim}}{fac_rounded:g}\times$'
print(fac)
print(key, val)
quants[key] = val

4.238636363636363
factor_reduction_contaminants ${\sim}4\times$


In [52]:
i_q_labeled = tab_labeled_sup['class']=='q'
print(np.sum(i_q_labeled))

i_q_gclean = np.isin(tab_labeled_sup['source_id'][i_q_labeled], tab_gclean['source_id'])
print(np.sum(i_q_gclean))

246122
243206


In [53]:
key = 'p_sqall_excluded_clean'
p = np.sum(i_q_labeled)/np.sum(i_q_gclean)-1
print(p)
val = f'{p*100:.1f}'
print(key, val)
quants[key] = val

0.011989835777077795
p_sqall_excluded_clean 1.2


### Area and volume

In [54]:
def v_shells_of_z(z_arr, z_bins, fsky, cosmo):
    
    v_at_z_bins = np.empty(len(z_bins), dtype=u.Quantity)
    for i in range(len(z_bins)):
        v = cosmo.comoving_volume(z_bins[i])
        v_at_z_bins[i] = v.to(u.Gpc**3)
    v_at_z_bins *= fsky
    v_shells = v_at_z_bins[1:] - v_at_z_bins[:-1] 

    return np.array(v_shells)

In [55]:
def ndens_of_z(z_arr, z_bins, fsky, cosmo):
    
    v_shells = v_shells_of_z(z_arr, z_bins, fsky, cosmo)
    ndens = []
    for i in range(len(z_bins)-1):
        N_inbin = np.sum((z_arr >= z_bins[i]) & (z_arr < z_bins[i+1]))
        ndens_inbin = N_inbin/v_shells[i]
        ndens.append(ndens_inbin)
    return ndens

In [56]:
# eqn 1.7.32 of https://arxiv.org/pdf/1606.00180.pdf
def volume_effective_Gpcperh(z_arr, z_bins, cosmo, fsky, P0):
    
    ndens_at_z_bins = ndens_of_z(z_arr, z_bins, fsky, cosmo)
    ndens_at_z_bins = np.array(ndens_at_z_bins, dtype=u.Quantity)

    nPs = np.array([n*P0 for n in ndens_at_z_bins])
    prefacs = (nPs /(1 + nPs))**2
    v_shells = v_shells_of_z(z_arr, z_bins, fsky, cosmo)    
    v_Gpc = np.sum(prefacs*v_shells)

    v_Gpc = v_Gpc.to(u.Gpc**3) # this should be just in Gpc
    v_Gpcperh = v_Gpc * cosmo.h**3 # (Gpc) -> (Gpc/h)^3, mult by h^3 
    print(f"Sky fraction is {fsky:.3f}")
    print(f"Effective volume is {v_Gpc:.3f} = {v_Gpcperh.value:.3f} (Gpc/h)^3")
    return v_Gpcperh.value

In [57]:
cosmo = Planck18
area_allsky = 41252.96125*(u.deg**2)

TODO: estimating p0 now, what should be doing?

In [58]:
fn_dustmap = f'../data/maps/map_dust_NSIDE{NSIDE}.npy'
map_dust = maps.get_dust_map(NSIDE=NSIDE, R=3.1, fn_map=fn_dustmap)

Dustmap already exists, loading from ../data/maps/map_dust_NSIDE64.npy


Gaia:

In [59]:
Av_hi = 0.5
npix_below_Avhi = np.sum(map_dust < Av_hi)
area_below_Avhi = area_per_pixel*u.deg**2 * npix_below_Avhi
quants['Avhi'] = Av_hi
area_below_Avhi_fmt = rf'{area_below_Avhi.value:.2f} deg$^2$'
quants['area_below_Avhi'] = area_below_Avhi_fmt
print(quants['area_below_Avhi'])

29154.54 deg$^2$


In [60]:
fsky_below_Avhi = area_below_Avhi/area_allsky
quants['fsky_below_Avhi'] = f'{fsky_below_Avhi:.2f}'
print(quants['fsky_below_Avhi'])

0.71


In [61]:
z_arr = tab_gcatlo['redshift_spz']
z_min, z_max = np.min(z_arr), np.max(z_arr)
z_bins = np.arange(0, z_max+0.1, 0.1)
P0 = 4e4 * u.Mpc**3 / cosmo.h**3 
#P0 = P0.to(u.Gpc**3)
#print(P0)
vol_gcatlo = volume_effective_Gpcperh(z_arr, z_bins, cosmo, fsky_below_Avhi, P0)
vol_gcatlo_fmt = rf'{vol_gcatlo:.2f} $(h^{{-1}}\,Gpc)^3$'
quants['volume_effective_gcatlo_below_Avhi'] = vol_gcatlo_fmt
print(quants['volume_effective_gcatlo_below_Avhi'])

Sky fraction is 0.707
Effective volume is 10.598 Gpc3 = 3.283 (Gpc/h)^3
3.28 $(h^{-1}\,Gpc)^3$


In [62]:
z_arr = tab_gcathi['redshift_spz']
z_min, z_max = np.min(z_arr), np.max(z_arr)
z_bins = np.arange(0, z_max+0.1, 0.1)
P0 = 4e4 * u.Mpc**3 / cosmo.h**3 
#P0 = P0.to(u.Gpc**3)
#print(P0)
vol_gcathi = volume_effective_Gpcperh(z_arr, z_bins, cosmo, fsky_below_Avhi, P0)
vol_gcathi_fmt = rf'{vol_gcathi:.2f} $(h^{{-1}}\,Gpc)^3$'
quants['volume_effective_gcathi_below_Avhi'] = vol_gcathi_fmt
print(quants['volume_effective_gcathi_below_Avhi'])

Sky fraction is 0.707
Effective volume is 25.356 Gpc3 = 7.854 (Gpc/h)^3
7.85 $(h^{-1}\,Gpc)^3$


SDSS:

In [63]:
# area via: # https://academic.oup.com/mnras/article/514/3/3396/6566355?login=true
area_sdss = 4808*(u.deg**2)

In [64]:
area_sdss_fmt = rf'{area_sdss.value:g} deg$^2$'
quants['area_sdss'] = area_sdss_fmt
print(quants['area_sdss'])

4808 deg$^2$


In [65]:
fsky_sdss = area_sdss/area_allsky
quants['fsky_sdss'] = f'{fsky_sdss:.2f}'
print(quants['fsky_sdss'])

0.12


In [66]:
z_arr = tab_sqall['z_sdss']
z_min, z_max = np.min(z_arr), np.max(z_arr)
z_bins = np.arange(0, z_max+0.1, 0.1)

In [67]:
z_arr = tab_sqall['z_sdss']
z_min, z_max = np.min(z_arr), np.max(z_arr)
z_bins = np.arange(0, z_max+0.1, 0.1)
P0 = 4e4 * u.Mpc**3 / cosmo.h**3 
vol_sdss = volume_effective_Gpcperh(z_arr, z_bins, cosmo, fsky_sdss, P0)
vol_sdss_fmt = rf'{vol_sdss:.2f} $(h^{{-1}}\,Gpc)^3$'
quants['volume_effective_sdss'] = vol_sdss_fmt
print(quants['volume_effective_sdss'])

Sky fraction is 0.117
Effective volume is 18.456 Gpc3 = 5.717 (Gpc/h)^3
5.72 $(h^{-1}\,Gpc)^3$


Gaia vs SDSS

In [68]:
fac_area = area_below_Avhi/area_sdss
fac_area_rounded = round(fac_area.value*2)/2
val = rf'${{\sim}}{fac_area_rounded:g}\times$'
quants['factor_area_belowAvhi_sdss'] = val
print(quants['factor_area_belowAvhi_sdss'])

${\sim}6\times$


In [69]:
fac_vol = vol_gcathi/vol_sdss
val = rf'${{\sim}}{fac_vol:.1f}\times$'
quants['factor_volume_effective_gcathi_sdss'] = val
print(quants['factor_volume_effective_gcathi_sdss'])

${\sim}1.4\times$


## Selection function / residuals

In [70]:
fn_rand_Glo = f'../data/randoms/randomQ_stardustm10mcs_NSIDE64_G{G_lo}_10x.fits'

tab_rand_Glo = Table.read(fn_rand_Glo)
map_rand_Glo = get_ndens_map(tab_rand_Glo['ra'], tab_rand_Glo['dec'])

map_gcatlo = get_ndens_map(tab_gcatlo['ra'], tab_gcatlo['dec'])

norm_factor = len(tab_gcatlo)/len(tab_rand_Glo)

rmse_frac = utils.compute_rmse(norm_factor*map_rand_Glo, map_gcatlo, fractional=True)
quants['rmse_fractional_residuals_Glo'] = f'{rmse_frac:.2f}'
print(quants['rmse_fractional_residuals_Glo'])

0.51


TODO: run when get sel func for G20.5

In [71]:
# fn_rand_Ghi = f'../data/randoms/randomQ_stardustm10mcs_NSIDE64_G{G_hi}_10x.fits'

# tab_rand_Ghi = Table.read(fn_rand_Ghi)
# map_rand_Ghi = get_ndens_map(tab_rand_Ghi['ra'], tab_rand_Ghi['dec'])

# map_gcathi = get_ndens_map(tab_gcathi['ra'], tab_gcathi['dec'])

# norm_factor = len(tab_gcathi)/len(tab_rand_Ghi)

# rmse_frac = utils.compute_rmse(norm_factor*map_rand_Ghi, map_gcathi, fractional=True)
# quants['rmse_fractional_residuals_Ghi'] = f'{rmse_frac:.2f}'
# print(quants['rmse_fractional_residuals_Ghi'])

In [91]:
tag_decontam = '_mag0.1_lm5_postpm'
fn_cuts = f'../data/color_cuts{tag_decontam}.txt'
cuts = np.genfromtxt(fn_cuts, delimiter=',', names=True)
cut_names = list(cuts.dtype.names)

In [107]:
color_label_dict = {'g_w1': 'G-W1', 
                    'w1_w2': 'W1-W2',
                    'bp_g': 'BP-G'}

In [93]:
color_names = list(cut_names.copy())
color_names.remove('intercept')

In [117]:
N_colors = len(color_names)
tol = 1e-8
cut_strs = []
for cut in cuts:
    cut_terms = []
    for cn in color_names:
        i = cut_names.index(cn)
        if abs(cut[i])<tol:
            continue
        if abs(cut[i]-1.0)<tol:
            cut_terms.append(f'({color_label_dict[cn]})')
        else:
            cut_terms.append(f'{cut[i]}\,({color_label_dict[cn]})')
    i_intercept = cut_names.index('intercept')
    # & symbols are for equation alignment
    cut_str = ' + '.join(cut_terms) + f' &> {cut[i_intercept]:g}'
    cut_strs.append(cut_str)

In [118]:
quants['color_cut_str'] = ' \\\\ '.join(cut_strs)
print(quants['color_cut_str'])

(G-W1) &> 2.15 \\ (W1-W2) &> 0.4 \\ (BP-G) &> -0.3 \\ (G-W1) + 1.2\,(W1-W2) &> 3.4


In [83]:
# straight lines
tol = 1e-8
color_names = ['g_w1', 'w1_w2', 'bp_g']
for cn in color_names:
    for cut in cuts:
        idxs_color = np.where(abs(cut-1.0)<tol)[0]
        if id

array([(1., 0. , 0.,  2.15), (0., 1. , 0.,  0.4 ), (0., 0. , 1., -0.3 ),
       (1., 1.2, 0.,  3.4 )],
      dtype=[('g_w1', '<f8'), ('w1_w2', '<f8'), ('bp_g', '<f8'), ('intercept', '<f8')])

# Write dict to file

In [72]:
with open(fn_quants, 'w') as f:
    for key, val in quants.items():
        print(key, val)
        if save:
            f.write(f'{key} = {val}\n')
if save:
    print()
    print(f"Saved to {fn_quants}!")

N_gall 6,649,162
N_gall_wqsoc 6,375,063
N_gpurer 1,942,825
N_gpurer_wqsoc 1,729,625
N_gsup 1,518,782
N_gclean 1,414,385
p_cut_gsup_gclean 7
N_gcatlo 755,850
N_gcathi 1,295,502
N_squasars_unwise 343,074
N_squasars_sup 246,122
N_sqall 638,083
z_med_gall 1.67
z_med_gcatlo 1.45
zintermediate 2.5
N_above_zintermediate_gcatlo 76804
p_above_zintermediate_gcatlo 0.10161275385327777
Gbright 19.0
dzlo 0.01
p_outliers_zspz_dzlo_Gbright 15
p_acc_zspz_dzlo_Gbright 85
p_outliers_zgaia_dzlo_Gbright 10
p_acc_zgaia_dzlo_Gbright 90
dzmid 0.1
p_outliers_zspz_dzmid_Gbright 6
p_acc_zspz_dzmid_Gbright 94
p_outliers_zgaia_dzmid_Gbright 7
p_acc_zgaia_dzmid_Gbright 93
dzhi 0.2
p_outliers_zspz_dzhi_Gbright 4
p_acc_zspz_dzhi_Gbright 96
p_outliers_zgaia_dzhi_Gbright 7
p_acc_zgaia_dzhi_Gbright 93
Glo 20.0
p_outliers_zspz_dzlo_Glo 27
p_acc_zspz_dzlo_Glo 73
p_outliers_zgaia_dzlo_Glo 25
p_acc_zgaia_dzlo_Glo 75
p_outliers_zspz_dzmid_Glo 10
p_acc_zspz_dzmid_Glo 90
p_outliers_zgaia_dzmid_Glo 19
p_acc_zgaia_dzmid_Glo 81


## Table showing catalog

In [73]:
tab_gcatlo.columns

<TableColumns names=('source_id','ra','dec','l','b','phot_g_mean_mag','phot_bp_mean_mag','phot_rp_mean_mag','phot_g_n_obs','phot_bp_n_obs','phot_rp_n_obs','redshift_qsoc','redshift_qsoc_lower','redshift_qsoc_upper','zscore_qsoc','flags_qsoc','pmra','pmra_error','pmdec','pmdec_error','parallax','parallax_error','ebv','A_v','pm','mag_w1_vg','mag_w2_vg','unwise_objid','rand_ints','g_rp','bp_g','bp_rp','g_w1','w1_w2','redshift_spz_raw','redshift_spz_err','redshift_spz')>

In [74]:
columns_show = ['source_id', 'ra', 'dec', 'redshift_spz', 'redshift_spz_err', 'unwise_objid']
#columns_show = ['source_id', 'ra', 'dec']
fmt_dict = {'source_id': 'd',
            'ra': '.7f',
            'dec': '.7f', 
            'redshift_spz': '.4f',
            'redshift_spz_err': '.4f',
            'unwise_objid': ''
            }

In [75]:
n_show = 5
rand_ints_show = np.arange(5)
table_contents = ''

column_labels = [f'\\texttt{{{c}}}' for c in columns_show]
table_contents += ' & '.join(column_labels) + '\\\\ \n'
for rand_int in rand_ints_show:
    idx = np.where(tab_gcatlo['rand_ints']==rand_int)[0][0]
    vals = [tab_gcatlo[c][idx] for c in columns_show]
    print(vals)
    #table_contents += ' & '.join(f'{vals:{fmt}}') + '\\\\ \n'
    vals_fmt = [f'{vals[i]:{fmt_dict[columns_show[i]]}}' for i in range(len(columns_show))]
    table_contents += ' & '.join(vals_fmt) + ' \\\\ \n'
    
print(table_contents)

[6879714868556260736, 304.2939322727488, -12.73942387481495, 0.2911104957535018, 0.08651193059921262, '3043m122o0003886']
[4783732925018049024, 73.8973451283087, -51.02265972588198, 1.356, 0.39907654649734503, '0739m515o0030215']
[6537038562083771904, 354.7213092835327, -40.83984519128812, 1.8126829862594604, 0.0467893116760254, '3540m409o0014047']
[4179611586896874240, 293.07744725133557, -17.827785909249855, 1.2990739345550537, 0.08900000000000008, '2933m182o0033194']
[3578872861936828928, 190.6480757135183, -9.5431223192319, 2.0745136737823486, 0.2722211804771423, '1899m091o0004733']
\texttt{source_id} & \texttt{ra} & \texttt{dec} & \texttt{redshift_spz} & \texttt{redshift_spz_err} & \texttt{unwise_objid}\\ 
6879714868556260736 & 304.2939323 & -12.7394239 & 0.2911 & 0.0865 & 3043m122o0003886 \\ 
4783732925018049024 & 73.8973451 & -51.0226597 & 1.3560 & 0.3991 & 0739m515o0030215 \\ 
6537038562083771904 & 354.7213093 & -40.8398452 & 1.8127 & 0.0468 & 3540m409o0014047 \\ 
4179611586896

In [76]:
fn_table = '../data/catalog_table.txt'
with open(fn_table, 'w') as f:
    if save:
        f.write(table_contents)
if save:
    print(f"Saved to {fn_table}!")

Saved to ../data/catalog_table.txt!


TODO: write checks for randints for every table

TODO: fix header for final catalog

column name, math symbol, units,human description, entry of first line of table

In [77]:
tab_gall.columns

<TableColumns names=('source_id','ra','dec','l','b','phot_g_mean_mag','phot_bp_mean_mag','phot_rp_mean_mag','phot_g_n_obs','phot_bp_n_obs','phot_rp_n_obs','redshift_qsoc','redshift_qsoc_lower','redshift_qsoc_upper','zscore_qsoc','flags_qsoc','pmra','pmra_error','pmdec','pmdec_error','parallax','parallax_error')>

In [78]:
tab_gall.meta

OrderedDict([('EXTNAME', 'votable'),
             ('TCOMM1',
              'Unique source identifier (unique within a particular Data Release)'),
             ('TUCD1', 'meta.id'),
             ('TCOMM2', 'Right ascension'),
             ('TUCD2', 'pos.eq.ra;meta.main'),
             ('TUTYP2', 'stc:AstroCoords.Position3D.Value3.C1'),
             ('TCOMM3', 'Declination'),
             ('TUCD3', 'pos.eq.dec;meta.main'),
             ('TUTYP3', 'stc:AstroCoords.Position3D.Value3.C2'),
             ('TCOMM4', 'Galactic longitude'),
             ('TUCD4', 'pos.galactic.lon'),
             ('TUTYP4', 'stc:AstroCoords.Position2D.Value2.C1'),
             ('TCOMM5', 'Galactic latitude'),
             ('TUCD5', 'pos.galactic.lat'),
             ('TUTYP5', 'stc:AstroCoords.Position2D.Value2.C2'),
             ('TCOMM6', 'G-band mean magnitude'),
             ('TUCD6', 'phot.mag;em.opt'),
             ('TCOMM7', 'Integrated BP mean magnitude'),
             ('TUCD7', 'phot.mag;em.opt.B'),
    

In [79]:
tab_gall.__dict__

{'_masked': False,
 '_column_class': astropy.table.column.Column,
 'columns': <TableColumns names=('source_id','ra','dec','l','b','phot_g_mean_mag','phot_bp_mean_mag','phot_rp_mean_mag','phot_g_n_obs','phot_bp_n_obs','phot_rp_n_obs','redshift_qsoc','redshift_qsoc_lower','redshift_qsoc_upper','zscore_qsoc','flags_qsoc','pmra','pmra_error','pmdec','pmdec_error','parallax','parallax_error')>,
 'formatter': <astropy.table.pprint.TableFormatter at 0x148284075c00>,
 '_copy_indices': True,
 '_init_indices': True,
 'primary_key': None,
 '_meta': OrderedDict([('EXTNAME', 'votable'),
              ('TCOMM1',
               'Unique source identifier (unique within a particular Data Release)'),
              ('TUCD1', 'meta.id'),
              ('TCOMM2', 'Right ascension'),
              ('TUCD2', 'pos.eq.ra;meta.main'),
              ('TUTYP2', 'stc:AstroCoords.Position3D.Value3.C1'),
              ('TCOMM3', 'Declination'),
              ('TUCD3', 'pos.eq.dec;meta.main'),
              ('TUTYP3'

In [80]:
tab_gcatlo.meta

OrderedDict([('EXTNAME', 'votable'),
             ('TCOMM1',
              'Unique source identifier (unique within a particular Data Release)'),
             ('TUCD1', 'meta.id'),
             ('TCOMM2', 'Right ascension'),
             ('TUCD2', 'pos.eq.ra;meta.main'),
             ('TUTYP2', 'stc:AstroCoords.Position3D.Value3.C1'),
             ('TCOMM3', 'Declination'),
             ('TUCD3', 'pos.eq.dec;meta.main'),
             ('TUTYP3', 'stc:AstroCoords.Position3D.Value3.C2'),
             ('TCOMM4', 'Galactic longitude'),
             ('TUCD4', 'pos.galactic.lon'),
             ('TUTYP4', 'stc:AstroCoords.Position2D.Value2.C1'),
             ('TCOMM5', 'Galactic latitude'),
             ('TUCD5', 'pos.galactic.lat'),
             ('TUTYP5', 'stc:AstroCoords.Position2D.Value2.C2'),
             ('TCOMM6', 'G-band mean magnitude'),
             ('TUCD6', 'phot.mag;em.opt'),
             ('TCOMM7', 'Integrated BP mean magnitude'),
             ('TUCD7', 'phot.mag;em.opt.B'),
    

In [81]:
with fits.open(fn_gall) as hdul:
#     print(hdul.info())
#     print()
#     print(repr(hdul[0].header))
#     print(hdul[0].data)
#     print()
    hdr = hdul[1].header
    print(repr(hdr))
    #print(hdr)
    #print(hdr['TTYPE6'])
    #print(hdul[1].header['phot_g_mean_mag'])
    cols = hdul[1].columns
    #print(cols)
    #print(hdul[1].data.shape)

XTENSION= 'BINTABLE'           / binary table extension                         
BITPIX  =                    8 / 8-bit bytes                                    
NAXIS   =                    2 / 2-dimensional table                            
NAXIS1  =                  118 / width of table in bytes                        
NAXIS2  =              6649162 / number of rows in table                        
PCOUNT  =                    0 / heap size (no gap)                             
GCOUNT  =                    1 / one data group                                 
TFIELDS =                   22 / number of columns                              
EXTNAME = 'votable '           / table name                                     
TTYPE1  = 'source_id'          / label for column 1                             
TFORM1  = 'K       '           / format for column 1                            
TCOMM1  = 'Unique source identifier (unique within a particular Data Release)'  
TUCD1   = 'meta.id '        