# Code to gather data from $z > 5.5$ QSOs.

We want to obtain data from several surveys and catalogs in order to  
manipulate them and try to obtain meaningful correlations.

First, we import the packages to be used

To get the first line working, you need  
to run the following lines:

```bash
 conda install nodejs
 pip install ipympl
 pip install --upgrade jupyterlab
 jupyter labextension install @jupyter-widgets/jupyterlab-manager
 jupyter labextension install jupyter-matplotlib
 jupyter nbextension enable --py widgetsnbextension
 ```

In [1]:
%matplotlib inline
# Static plots
#%matplotlib ipympl
# Interactive plots
import numpy as np
# import matplotlib.cm as cm
# import matplotlib.pyplot as plt
# import matplotlib.colors as mcolors
# import matplotlib.patheffects as mpe
# import matplotlib.patheffects as path_effects
# from matplotlib.ticker import FormatStrFormatter
# from matplotlib.patches import Polygon
from astropy.io import fits
from astropy.table import Table
from astropy.table import Column
from astropy.table import MaskedColumn
from astropy.table import hstack
from astropy.table import vstack
from astropy.table import join
from astropy.wcs import WCS
from astropy import units as u
# from astropy.visualization import hist
from astropy.coordinates import SkyCoord
from astroquery.simbad import Simbad
from astroquery.ned import Ned
import getpass
import progressbar
import pandas as pd
import re

One function, to derive luminosity distances

In [2]:
def luminosity_distance(z_i, H0=70., WM=0.3, WV=0.7):
    z_i   = np.array([z_i], dtype='float64').flatten()
    c     = 299792.458 # velocity of light in km/sec
    h     = H0 / 100.
    WR    = 4.165E-5 / (h * h)   # includes 3 massless neutrino species, T0 = 2.72528
    WK    = 1 - WM - WR - WV
    azs   = 1.0 / (1 + z_i)
    DTT   = 0.0
    DCMR  = 0.0
    # do integral over a=1/(1+z) from az to 1 in n steps, midpoint rule
    n     = 1000  # number of points in integrals
    DL_Mpcs = np.zeros_like(z_i)
    for count, az in enumerate(azs):
        a     = az + (1 - az) * (np.arange(0, n) + 0.5) / n
        adot  = np.sqrt(WK + (WM / a) + (WR / (a * a)) + (WV * a * a))
        for i in range(n):
            # a    = az + (1 - az) * (i + 0.5) / n
            # adot = math.sqrt(WK + (WM / a) + (WR / (a * a)) + (WV * a * a))
            DTT  = DTT + 1. / adot[i]
            DCMR = DCMR + 1. / (a[i] * adot[i])
        DTT   = (1. - az) * DTT / n
        DCMR  = (1. - az) * DCMR / n
        # tangential comoving distance
        ratio = 1.00
        x     = np.sqrt(abs(WK)) * DCMR
        if x > 0.1:
            if WK > 0:
                ratio =  0.5 * (np.exp(x) - np.exp(-x)) / x 
            else:
                ratio = np.sin(x) / x
        else:
            y = x * x
        if WK < 0: y = -y
        ratio  = 1. + y / 6. + y * y / 120.
        DCMT   = ratio * DCMR
        DA     = az * DCMT
        DL     = DA / (az * az)
        DL_Mpc = (c / H0) * DL
        DL_Mpcs[count] = DL_Mpc
    return DL_Mpcs

Define the spectral index $\alpha$ from different sources  
to be used in the luminosity calculations (K-correction)

In [3]:
alpha_first = 0.5  # From FIRST data (Bornancini+2010)
alpha_RG    = 1.0  # For radio galaxies (Verkhodanov & Khabibullina, 2010)
alpha_alex  = 0.8  # Star-forming galaxies (Alexander+2003)
alpha_smol  = 0.7  # Mean value from VLA-COSMOS 3GHz sample (Smolčić et al. 2017)
alpha_butl  = 0.75  # From Butler et al., 2018

Choose one of the spectral indexes

In [4]:
alpha_used  = alpha_butl

---

### Reading data

Next step, reading our data.  
Most of the data files have been created using Topcat

In [5]:
machine  = getpass.getuser()
# cat_path = '/home/' + machine + '/Documentos/Data/'
cat_path = ''  # relative path to the same directory

Read data from FIRST+MILLIQUAS catalogs cross-matched.  

**MILLIQUAS + FIRST (SDSS)**   

Redshift values have been retrieved from the column `pipeline_redshift` in SDSS DR12  
The procedure to obtain these values is explained in **Bolton+2012**  

We also select sources with have explicitely data in both SDSS Quasar Catalog and  
in the FIRST survey (`first_match_flag = 1`).  

In order to get only the best redshift values, we select sources with  
`pipeline_redshift_flag = 0`.

Thus, we can use 9161 objects from the catalog.

In [6]:
#hdu_list = fits.open(cat_path + 'tables_matches_milli_sdss_apr.fits');
#sdss_milli  = Table(hdu_list[1].data);
#hdu_list.close();

sdss_milli = Table.read(cat_path + 'tables_matches_milli_sdss_apr.fits')

L_14GHz_filter = np.array((sdss_milli['L_14GHz'] > 0.0) * (sdss_milli['first_offset'] < 1.0)) ;  # sdss + milliquasar

We load data from the VLA-COSMOS 3 GHz Large Project
**Smolčić et al. 2017**

We can, also, separate sources which have an AGN X-ray counterpart
with the flag `cosm_xray_flag`

In [7]:
# hdu_cosmos          = fits.open(cat_path + 'vla_xmmcosmos_smolcic.fits')
# cosmos_data         = Table(hdu_cosmos[1].data)
# hdu_cosmos.close()

cosmos_data         = Table.read(cat_path + 'vla_xmmcosmos_smolcic.fits')

x_ray_flag          = cosmos_data['cosm_xray_flag']
redshift_cosmos     = cosmos_data['cosm_z']
redshift_cosmos[~np.isfinite(redshift_cosmos)] = 0.0  # Repair NaN values
int_14GHz_cosmos    = cosmos_data['cosm_lum_14ghz']  # Using their own spectral indices
cosmos_3GHz_flux    = cosmos_data['cosm_f_3ghz']  # mJy
cosmos_3GHz_flux_e  = cosmos_data['cosm_f_3ghz_err']
cosmos_3GHz_flux_e[~np.isfinite(cosmos_3GHz_flux_e)] = 0.0  # Repair NaN values
cosmos_14GHz_flux   = cosmos_3GHz_flux_e * (3/1.4)**alpha_used
cosmos_14GHz_flux[~np.isfinite(cosmos_14GHz_flux)] = 0.0  # Repair NaN values
cosmos_14GHz_flux_e = np.abs(cosmos_14GHz_flux) * cosmos_3GHz_flux_e / cosmos_3GHz_flux
lum_dist_cosmos     = cosmos_data['D_lum']  # in m
lum_dist_cosmos[~np.isfinite(lum_dist_cosmos)] = 0.0  # Repair NaN values

redshift_cosmos_x   = cosmos_data['cosm_z'][(cosmos_data['cosm_z'] > 0) * (cosmos_data['cosm_xray_flag'] == 'T')]
int_14GHz_cosmos_x  = cosmos_data['cosm_lum_14ghz'][(cosmos_data['cosm_z'] > 0) * (cosmos_data['cosm_xray_flag'] == 'T')]

We want, also, to add $z > 6$ QSOs from the list in  
Table 3 in the review of **Inayoshi, Visbal, and Haiman, 2020**.  
Six of them have $z > 7$

Not all of them have $1.4$ GHz measurements. Others have  
measurements in different frequencies which can be translated  
into the desired frequency using, for instance, the relation  
from **Butler et al., 2018**:

$$S_{a} = S_{b} \times (\frac{\nu_{b}}{\nu_{a}})^{\alpha}$$  

We load the data from these sources. Fluxes from different frequencies than $1.4$ GHz are translated to the needed value.

In [8]:
high_z_ra        = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[1],  dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_dec       = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[2],  dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_zs        = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[3],  dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_zs_e      = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[4],  dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_14GHz     = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[6],  dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_14GHz_e   = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[7],  dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_3GHz      = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[8],  dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_3GHz_e    = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[9],  dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_15GHz     = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[10], dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_15GHz_e   = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[11], dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_250GHz    = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[12], dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_250GHz_e  = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[13], dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_mass_1450 = np.char.replace(np.loadtxt(cat_path + 'high_z_qso_props.csv', usecols=[14], dtype='str', delimiter=';'), ',', '.').astype(np.float)
high_z_names     = np.loadtxt('high_z_qso_props.csv', usecols=[0], dtype='str', delimiter=';')
high_z_lum_d     = luminosity_distance(high_z_zs) * 3.086e22  # in m
high_z_up_lim    = np.array([val == '<' for val in np.loadtxt('high_z_qso_props.csv', usecols=[5], dtype='str', delimiter=';')])

Accumulate values into one array except 250GHz data.  
Millimetre luminosities will be used separately since we cannot be completely  
sure that they represent, fully, non-thermal emission (from AGN) and not dust.

In [9]:
high_z_14   = high_z_14GHz + high_z_3GHz + high_z_15GHz
high_z_14_e = high_z_14GHz_e + high_z_3GHz_e + high_z_15GHz_e

To complement the dataset, we also load four $z > 5.5$ sources which  
come from the `radio` catalog in the `Heasarc` database.  
We queried the objects which have $1.4$ GHz observations that  
are within a $2.5$ arcsec of an object from the `SDSS QUASAR DR12`  
catalog. We discard the sources that are already included in the `FIRST`  
catalog, to avoid repetition.

To enlarge the size of the sample, we can also query  
the same sample, but extending the redshift range to all positive  
values. We can include, too, the exclusion of ***low quality*** redshift_values (or not).

In [10]:
# hdu_radio_z_55       = fits.open(cat_path + 'radio_cat_sdss_z_55.fits');  # high redshift
# hdu_radio_z_all_spec = fits.open(cat_path + 'radio_cat_sdss_z_all_spec.fits');  # all redshift, high z quality
#hdu_radio_z_all_all  = fits.open(cat_path + 'radio_cat_sdss_z_all_all.fits');  # all redshift, all z quality
# radio_sources        = Table(hdu_radio_z_55[1].data);
# radio_sources        = Table(hdu_radio_z_all_spec[1].data);
# hdu_radio_z_55.close();
# hdu_radio_z_all_spec.close();
# hdu_radio_z_all_all.close();

# radio_sources        = Table.read(cat_path + 'radio_cat_sdss_z_55_spec.fits')       # high redshift
radio_sources        = Table.read(cat_path + 'radio_cat_sdss_z_all_spec.fits')      # all redshift, high z quality
# radio_sources        = Table.read(cat_path + 'radio_cat_sdss_z_all_all_spec.fits')  # all redshift, all z quality

---

### Calculate luminosities

Calculate luminosities (in W/Hz) for different datasets  
using the expression

$$L_{1.4\mathrm{GHz}} = 4 \pi \mathrm{d}^{2}_{L} f_{1.4\mathrm{GHz}} (1 + z)^{\alpha - 1}$$

which comes from Alexander et al. 2003

We can also obtain that luminosity from the flux in $3$ GHz as

$$L_{1.4\mathrm{GHz}} = 4 \pi \mathrm{d}^{2}_{L} {(\frac{3}{1.4})}^{\alpha} f_{3\mathrm{GHz}} (1 + z)^{\alpha - 1}$$

This expression comes from Delhaize et al. 2017.

In [11]:
L_21cm           = 4 * np.pi * (sdss_milli['D_lum'][L_14GHz_filter])**2 * sdss_milli['flux_20_cm'][L_14GHz_filter] * 1e-3 * 1e-26 * (1 + sdss_milli['redshift'][L_14GHz_filter])**(alpha_used - 1)
L_21cm_e         = np.abs(L_21cm) / sdss_milli['snr_20_cm'][L_14GHz_filter]

In [12]:
L_21cm_radio     = 4 * np.pi * (radio_sources['D_lum'])**2 * radio_sources['radio_flux'] * 1e-3 * 1e-26 * (1 + radio_sources['sdss_z'])**(alpha_used - 1)
L_21cm_radio_e   = np.abs(L_21cm_radio) * radio_sources['radio_flux_err'] / radio_sources['radio_flux']

In [13]:
L_14GHz_cosmos   = 4 * np.pi * (lum_dist_cosmos)**2 * cosmos_14GHz_flux * 1e-3 * 1e-26 * (1 + redshift_cosmos)**(alpha_used - 1)
L_14GHz_cosmos_e = np.abs(L_14GHz_cosmos) * cosmos_14GHz_flux_e / cosmos_14GHz_flux;
L_14GHz_cosmos_e[~np.isfinite(L_14GHz_cosmos_e)] = 0.0  # Repair (make zero) error values

  


In [14]:
high_z_lum_14    = 4 * np.pi * high_z_lum_d**2 * high_z_14GHz  * 1e-6 * 1e-26 * (1 + high_z_zs)**(alpha_used - 1)
high_z_lum_3     = 4 * np.pi * high_z_lum_d**2 * high_z_3GHz   * 1e-6 * 1e-26 * (1 + high_z_zs)**(alpha_used - 1) * (3/1.4)**alpha_used
high_z_lum_15    = 4 * np.pi * high_z_lum_d**2 * high_z_15GHz  * 1e-6 * 1e-26 * (1 + high_z_zs)**(alpha_used - 1) * (1.5/1.4)**alpha_used
high_z_lum_250   = 4 * np.pi * high_z_lum_d**2 * high_z_250GHz * 1e-6 * 1e-26 * (1 + high_z_zs)**(alpha_used - 1) * (250/1.4)**(alpha_used)

Mix all luminosities (different bands) to obtain single value (adding zeroes).  
Millimetre luminosities will be used separately since we cannot be completely  
sure that they represent, fully, non-thermal emission (from AGN) and not dust.

In [15]:
high_z_lum_14GHz   = high_z_lum_14 + high_z_lum_3 + high_z_lum_15

We can also determine error values for these luminosities

In [16]:
high_z_lum_14GHz_e  = np.zeros_like(high_z_lum_14GHz)
high_z_lum_250GHz_e = np.zeros_like(high_z_lum_250)
for counter, element in enumerate(high_z_lum_14GHz):
    if element == 0: continue
    high_z_lum_14GHz_e[counter] = np.abs(element) * high_z_14_e[counter] / high_z_14[counter]
for counter, element in enumerate(high_z_lum_250):
    if element == 0: continue
    high_z_lum_250GHz_e[counter] = np.abs(element) * high_z_250GHz_e[counter] / high_z_250GHz[counter]

Create a filter to plot, when needed, only the sources which  
have mm data but not radio observations.

In [17]:
filter_250GHz = np.array((high_z_lum_250 > 0) * (high_z_lum_14GHz == 0))

Now, we can use the points we are interested in. Our sample from **Inayoshi et al., 2020** and the  
sources from **SDSS+FIRST** with $z>5.5$.

Another option to display the data is, instead of showing redshift in the  
horizontal axis, have the mass of the observed objects.

**Inayoshi et al., 2020** use the rest-frame UV magnitude $\mathrm{M}_{1450}$  
to calculate the mass as:

$$M = 10^{[-(\mathrm{M}_{1450} + 3.459) / 2.5]} [\mathrm{M}_{\odot}]$$

which yields, on average, the published virial mass estimates for those available.

Create formal arrays from catalogs to merge them

Arrays from `SDSS+FIRST`

In [18]:
#limit_z_sdss          = 5.5
limit_z_sdss          = 0.0
filter_sdss_z         = np.array(sdss_milli['redshift'][L_14GHz_filter] > limit_z_sdss)

In [19]:
upper_sdss_L          = L_21cm[filter_sdss_z]
upper_sdss_L_e        = L_21cm_e[filter_sdss_z]
upper_sdss_u_lim      = np.zeros_like(upper_sdss_L, dtype=np.bool)
upper_sdss            = sdss_milli[np.array(sdss_milli['redshift'] > limit_z_sdss) * L_14GHz_filter]
upper_sdss_ra         = upper_sdss['sdss_ra']
upper_sdss_dec        = upper_sdss['sdss_dec']
upper_sdss_z          = upper_sdss['redshift']
upper_sdss_z_e        = upper_sdss['redshift_err']
upper_sdss_f_20cm     = upper_sdss['flux_20_cm']  # mJy
upper_sdss_f_20cm_e   = upper_sdss['flux_20_cm'] / upper_sdss['snr_20_cm']  # mJy
upper_sdss_f_250GHz   = np.zeros_like(upper_sdss_L)
upper_sdss_f_250GHz_e = np.zeros_like(upper_sdss_L)
upper_sdss_L_250GHz   = np.zeros_like(upper_sdss_L)
upper_sdss_L_250GHz_e = np.zeros_like(upper_sdss_L)
upper_sdss_mass_1450  = np.zeros_like(upper_sdss_L)

Arrays from the `COSMOS` Field

In [20]:
# upper_cosmos_L          = int_14GHz_cosmos[np.array(cosmos_data['cosm_z'] > limit_z_sdss)]  # Calculated by them
upper_cosmos_L          = L_14GHz_cosmos[np.array(redshift_cosmos > limit_z_sdss)]  # Calculated with our spectral index
upper_cosmos_L_e        = L_14GHz_cosmos_e[np.array(redshift_cosmos > limit_z_sdss)]
upper_cosmos_u_lim      = np.zeros_like(upper_cosmos_L, dtype=np.bool)
upper_cosmos            = cosmos_data[np.array(redshift_cosmos > limit_z_sdss)]
upper_cosmos_ra         = upper_cosmos['cosm_ra']
upper_cosmos_dec        = upper_cosmos['cosm_dec']
upper_cosmos_z          = upper_cosmos['cosm_z']
upper_cosmos_z_e        = np.zeros_like(upper_cosmos_L)
upper_cosmos_f_20cm     = cosmos_14GHz_flux[np.array(redshift_cosmos > limit_z_sdss)]  # mJy
upper_cosmos_f_20cm_e   = cosmos_14GHz_flux_e[np.array(redshift_cosmos > limit_z_sdss)]  # mJy
upper_cosmos_f_250GHz   = np.zeros_like(upper_cosmos_L)
upper_cosmos_f_250GHz_e = np.zeros_like(upper_cosmos_L)
upper_cosmos_L_250GHz   = np.zeros_like(upper_cosmos_L)
upper_cosmos_L_250GHz_e = np.zeros_like(upper_cosmos_L)
upper_cosmos_mass_1450  = np.zeros_like(upper_cosmos_L)

Arrays from `radio`catalog (`Heasarc`)

In [21]:
radio_sdss_ra         = radio_sources['sdss_ra']
radio_sdss_dec        = radio_sources['sdss_dec']
radio_sdss_u_lim      = np.zeros_like(L_21cm_radio, dtype=np.bool)
radio_sdss_z          = radio_sources['sdss_z']
radio_sdss_z_e        = radio_sources['sdss_z_err']
radio_sdss_f_20cm     = radio_sources['radio_flux']
radio_sdss_f_20cm_e   = radio_sources['radio_flux_err']
radio_sdss_f_250GHz   = np.zeros_like(L_21cm_radio)
radio_sdss_f_250GHz_e = np.zeros_like(L_21cm_radio)
radio_sdss_L_250GHz   = np.zeros_like(L_21cm_radio)
radio_sdss_L_250GHz_e = np.zeros_like(L_21cm_radio)
radio_sdss_mass_1450  = np.zeros_like(L_21cm_radio)

Merging `SDSS+FIRST` and `radio`

In [22]:
radio_large_sample_ra         = np.append(upper_sdss_ra,         radio_sdss_ra)          # deg
radio_large_sample_dec        = np.append(upper_sdss_dec,        radio_sdss_dec)         # deg
radio_large_sample_L          = np.append(upper_sdss_L,          L_21cm_radio)           # W/Hz
radio_large_sample_L_e        = np.append(upper_sdss_L_e,        L_21cm_radio_e)         # W/Hz
radio_large_sample_u_lim      = np.append(upper_sdss_u_lim,      radio_sdss_u_lim)
radio_large_sample_L_250GHz   = np.append(upper_sdss_L_250GHz,   radio_sdss_L_250GHz)    # W/Hz
radio_large_sample_L_250GHz_e = np.append(upper_sdss_L_250GHz_e, radio_sdss_L_250GHz_e)  # W/Hz
radio_large_sample_f_20cm     = np.append(upper_sdss_f_20cm,     radio_sdss_f_20cm)      # mJy
radio_large_sample_f_20cm_e   = np.append(upper_sdss_f_20cm_e,   radio_sdss_f_20cm_e)    # mJy
radio_large_sample_f_250GHz   = np.append(upper_sdss_f_250GHz,   radio_sdss_f_250GHz)    # mJy
radio_large_sample_f_250GHz_e = np.append(upper_sdss_f_250GHz_e, radio_sdss_f_250GHz_e)  # mJy
radio_large_sample_z          = np.append(upper_sdss_z,          radio_sdss_z)
radio_large_sample_z_e        = np.append(upper_sdss_z_e,        radio_sdss_z_e)
radio_large_sample_mass_1450  = np.append(upper_sdss_mass_1450,  radio_sdss_mass_1450)   # M_sun

Merging `SDSS+FIRST`+`radio` and the catalog from **Inayoshi et al., 2020**

In [23]:
medium_sample_ra        = np.append(radio_large_sample_ra,         high_z_ra[np.array(high_z_lum_14GHz>0)])               # deg
medium_sample_dec       = np.append(radio_large_sample_dec,        high_z_dec[np.array(high_z_lum_14GHz>0)])              # deg
medium_sample_L         = np.append(radio_large_sample_L,          high_z_lum_14GHz[np.array(high_z_lum_14GHz>0)])        # W/Hz
medium_sample_L_e       = np.append(radio_large_sample_L_e,        high_z_lum_14GHz_e[np.array(high_z_lum_14GHz>0)])      # W/Hz
medium_sample_u_lim     = np.append(radio_large_sample_u_lim,      high_z_up_lim[np.array(high_z_lum_14GHz>0)])
medium_sample_L_250     = np.append(radio_large_sample_L_250GHz,   high_z_lum_250[np.array(high_z_lum_14GHz>0)])          # W/Hz
medium_sample_L_250_e   = np.append(radio_large_sample_L_250GHz_e, high_z_lum_250GHz_e[np.array(high_z_lum_14GHz>0)])     # W/Hz
medium_sample_f20cm     = np.append(radio_large_sample_f_20cm,     high_z_14[np.array(high_z_lum_14GHz>0)] * 1e-3)        # mJy
medium_sample_f20cm_e   = np.append(radio_large_sample_f_20cm_e,   high_z_14_e[np.array(high_z_lum_14GHz>0)] * 1e-3)      # mJy
medium_sample_f250GHz   = np.append(radio_large_sample_f_250GHz,   high_z_250GHz[np.array(high_z_lum_14GHz>0)] * 1e-3)    # mJy
medium_sample_f250GHz_e = np.append(radio_large_sample_f_250GHz_e, high_z_250GHz_e[np.array(high_z_lum_14GHz>0)] * 1e-3)  # mJy
medium_sample_z         = np.append(radio_large_sample_z,          high_z_zs[np.array(high_z_lum_14GHz>0)])
medium_sample_z_e       = np.append(radio_large_sample_z_e,        high_z_zs_e[np.array(high_z_lum_14GHz>0)])
medium_sample_mass_1450 = np.append(radio_large_sample_mass_1450,  high_z_mass_1450[np.array(high_z_lum_14GHz>0)])        # M_sun

Merging `SDSS+FIRST`+`radio`+**Inayoshi et al., 2020** and `COSMOS` data

In [24]:
large_sample_ra        = np.append(medium_sample_ra,        upper_cosmos_ra)          # deg
large_sample_dec       = np.append(medium_sample_dec,       upper_cosmos_dec)         # deg
large_sample_L         = np.append(medium_sample_L,         upper_cosmos_L)           # W/Hz
large_sample_L_e       = np.append(medium_sample_L_e,       upper_cosmos_L_e)         # W/Hz
large_sample_u_lim     = np.append(medium_sample_u_lim,     upper_cosmos_u_lim)
large_sample_L_250     = np.append(medium_sample_L_250,     upper_cosmos_L_250GHz)    # W/Hz
large_sample_L_250_e   = np.append(medium_sample_L_250_e,   upper_cosmos_L_250GHz_e)  # W/Hz
large_sample_f20cm     = np.append(medium_sample_f20cm,     upper_cosmos_f_20cm)      # mJy
large_sample_f20cm_e   = np.append(medium_sample_f20cm_e,   upper_cosmos_f_20cm_e)    # mJy
large_sample_f250GHz   = np.append(medium_sample_f250GHz,   upper_cosmos_f_250GHz)    # mJy
large_sample_f250GHz_e = np.append(medium_sample_f250GHz_e, upper_cosmos_f_250GHz_e)  # mJy
large_sample_z         = np.append(medium_sample_z,         upper_cosmos_z)
large_sample_z_e       = np.append(medium_sample_z_e,       upper_cosmos_z_e)
large_sample_mass_1450 = np.append(medium_sample_mass_1450, upper_cosmos_mass_1450)   # M_sun

At this point, we also want to obtain more properties from the selected  
sources (**Inayoshi et al., 2020** + **SDSS+FIRST**). We will use `astroquery` to  
obtain information from `simbad`.

First, we obtain the names and coordinates of our sources to query them.

In [25]:
names_radio_sdss    = np.append(radio_sources['sdss_name'], upper_sdss['name'])
medium_sample_names = np.append(names_radio_sdss, upper_cosmos['cosm_name'])
large_sample_names  = np.append(medium_sample_names, high_z_names[np.array(high_z_lum_14GHz>0)])
large_sample_coords = SkyCoord(ra=large_sample_ra, dec=large_sample_dec, unit=(u.deg, u.deg))

Now, we correct/change the names for those which can be looked up in `Simbad` and `Ned`.

In [26]:
old_to_replace = ['SDSS J125507.61+463126.5', 'SDSS J160558.86+474300.1', 'SDSS J111036.32+481752.3', 'SDSS J163033.89+401209.6']
new_to_replace = ['NVSS J125507+463128', '2MASS J16055893+4742596', 'NVSS J111036+481753', 'SDSS J163033.90+401209.6']

for old, new in zip(old_to_replace, new_to_replace):
    index = np.where(large_sample_names == old)
    large_sample_names[index] = new

Then, we can query the database to obtain the desired data.  In this point,  
we also add more columns to be queried.

In [27]:
query_simbad_flag               = False
load_simbad_flag                = False
create_simbad_inayoshi_flag     = False
read_simbad_inayoshi_flag       = False
query_ned_names_flag            = False
query_ned_photometry_flag       = False
order_ned_photometry_flag       = False
create_simbad_inayoshi_ned_flag = False
read_simbad_inayoshi_ned_flag   = True

In [28]:
if query_simbad_flag:
    customSimbad   = Simbad()
    initial_fields = customSimbad.get_votable_fields()

    if 'coordinates' in initial_fields:
        customSimbad.remove_votable_fields('coordinates')
        customSimbad.add_votable_fields('ra(d)', 'dec(d)')
    if 'z_value' not in initial_fields:
        customSimbad.add_votable_fields('z_value')
    for band in ['B','V','R','I','J','K']:
        if f'fluxdata({band})' not in initial_fields:
            customSimbad.add_votable_fields(f'flux({band})', f'flux_error({band})')

All sources but those from `COSMOS` catalog have meaningful (for `simbad`) names.  
Thus, separate queries will be executed. And, to standardize results, queries  
will only be based on coordinates (not names).

In [29]:
if query_simbad_flag:
    limit_set            = int(np.floor(np.shape(medium_sample_L)[0]/3))

In [30]:
if query_simbad_flag:
    result_table_simbad  = Table()

In [31]:
# result_table = customSimbad.query_objects(large_sample_names)
# customSimbad.TIMEOUT = 240

# result_table_job_a   = customSimbad.query_objects(large_sample_names[:limit_set])

# result_table_job_b   = customSimbad.query_objects(large_sample_names[limit_set:(limit_set*2)])

# result_table_job_c   = customSimbad.query_objects(large_sample_names[(limit_set*2):(limit_set*3)])

In [32]:
if query_simbad_flag:
    query_error = 0
    final_error = 0

In [33]:
if query_simbad_flag:
    with progressbar.ProgressBar(min_value=0, max_value=(limit_set - 1)) as bar:
        for index, coord in enumerate(large_sample_coords[:limit_set]):
            temp_table      = Table()
            try:
                temp_table      = customSimbad.query_region(coord, radius=2.0*u.arcsec)
                if len(temp_table) == 0:
                    temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                    temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
            except:
                temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
                #print(f'Error with element {(index)} of the sample')
                query_error += 1
            try:
                if temp_table['RA_d'].unit != 'deg':
                    replacement_col     = MaskedColumn(temp_table['RA_d'].data, dtype=result_table_simbad['RA_d'].dtype, unit=result_table_simbad['RA_d'].unit, format=result_table_simbad['RA_d'].format, description=result_table_simbad['RA_d'].description)
                    temp_table['RA_d']  = replacement_col
                result_table_simbad = vstack([result_table_simbad, temp_table[0]])
            except:
                print(f'Error with element {(index)} of the sample')
                final_error += 1
            bar.update(index)

In [34]:
if query_simbad_flag:
    with progressbar.ProgressBar(min_value=limit_set, max_value=(limit_set*2 - 1)) as bar:
        for index, coord in enumerate(large_sample_coords[limit_set:(limit_set*2)]):
            temp_table      = Table()
            try:
                temp_table      = customSimbad.query_region(coord, radius=2.0*u.arcsec)
                if len(temp_table) == 0:
                    temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                    temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
            except:
                temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
                #print(f'Error with element {(index + limit_set)} of the sample')
                query_error += 1
            try:
                if temp_table['RA_d'].unit != 'deg':
                    replacement_col     = MaskedColumn(temp_table['RA_d'].data, dtype=result_table_simbad['RA_d'].dtype, unit=result_table_simbad['RA_d'].unit, format=result_table_simbad['RA_d'].format, description=result_table_simbad['RA_d'].description)
                    temp_table['RA_d']  = replacement_col
                result_table_simbad = vstack([result_table_simbad, temp_table[0]])
            except:
                print(f'Error with element {(index + limit_set)} of the sample')
                final_error += 1
            bar.update(index + limit_set)

In [35]:
if query_simbad_flag:
    with progressbar.ProgressBar(min_value=(limit_set*2), max_value=(limit_set*3)) as bar:
        for index, coord in enumerate(large_sample_coords[(limit_set*2):(limit_set*3 + 1)]):
            temp_table      = Table()
            try:
                temp_table      = customSimbad.query_region(coord, radius=2.0*u.arcsec)
                if len(temp_table) == 0:
                    temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                    temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
            except:
                temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
                # print(f'Error with element {(index + limit_set*2)} of the sample')
                query_error += 1
            try:
                if temp_table['RA_d'].unit != 'deg':
                    replacement_col     = MaskedColumn(temp_table['RA_d'].data, dtype=result_table_simbad['RA_d'].dtype, unit=result_table_simbad['RA_d'].unit, format=result_table_simbad['RA_d'].format, description=result_table_simbad['RA_d'].description)
                    temp_table['RA_d']  = replacement_col
                result_table_simbad = vstack([result_table_simbad, temp_table[0]])
            except:
                print(f'Error with element {(index + limit_set*2)} of the sample')
                final_error += 1
            bar.update(index + limit_set*2)

Query for `COSMOS`

In [36]:
if query_simbad_flag:
    limit_set_cosmos = int(np.floor(np.shape(upper_cosmos_L)[0]/3))

In [37]:
if query_simbad_flag:
    with progressbar.ProgressBar(min_value=(limit_set*3 + 1), max_value=(limit_set*3 + limit_set_cosmos)) as bar:
        for index, coord in enumerate(large_sample_coords[(limit_set*3 + 1):(limit_set*3 + 1 + limit_set_cosmos)]):
            temp_table      = Table()
            try:
                temp_table      = customSimbad.query_region(coord, radius=3.0*u.arcsec)
                if len(temp_table) == 0:
                    temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                    temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
            except:
                # print(f'Error with element {(index + limit_set*3 + 1)} of the sample')
                temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
                query_error += 1
            try:
                if temp_table['RA_d'].unit != 'deg':
                    replacement_col     = MaskedColumn(temp_table['RA_d'].data, dtype=result_table_simbad['RA_d'].dtype, unit=result_table_simbad['RA_d'].unit, format=result_table_simbad['RA_d'].format, description=result_table_simbad['RA_d'].description)
                    temp_table['RA_d']  = replacement_col
                result_table_simbad = vstack([result_table_simbad, temp_table[0]])
            except:
                print(f'Error with element {(index + limit_set*3 + 1)} of the sample')
                temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad query for radio objects'
                result_table_simbad = vstack([result_table_simbad, temp_table[0]])
                final_error += 1
            bar.update(index + limit_set*3 + 1)

In [38]:
if query_simbad_flag:
    with progressbar.ProgressBar(min_value=(limit_set*3 + 1 + limit_set_cosmos), max_value=(limit_set*3 + limit_set_cosmos*2)) as bar:
        for index, coord in enumerate(large_sample_coords[(limit_set*3 + 1 + limit_set_cosmos):(limit_set*3 + 1 + limit_set_cosmos*2)]):
            temp_table      = Table()
            try:
                temp_table      = customSimbad.query_region(coord, radius=3.0*u.arcsec)
                if len(temp_table) == 0:
                    temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                    temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
            except:
                temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
                # print(f'Error with element {(index + limit_set*3 + 1 + limit_set_cosmos)} of the sample')
                query_error += 1
            try:
                if temp_table['RA_d'].unit != 'deg':
                    replacement_col     = MaskedColumn(temp_table['RA_d'].data, dtype=result_table_simbad['RA_d'].dtype, unit=result_table_simbad['RA_d'].unit, format=result_table_simbad['RA_d'].format, description=result_table_simbad['RA_d'].description)
                    temp_table['RA_d']  = replacement_col
                result_table_simbad = vstack([result_table_simbad, temp_table[0]])
            except:
                print(f'Error with element {(index + limit_set*3 + 1 + limit_set_cosmos)} of the sample')
                temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad query for radio objects'
                result_table_simbad = vstack([result_table_simbad, temp_table[0]])
                final_error += 1
            bar.update(index + limit_set*3 + 1 + limit_set_cosmos)

In [39]:
if query_simbad_flag:
    with progressbar.ProgressBar(min_value=(limit_set*3 + 1 + limit_set_cosmos*2), max_value=np.shape(large_sample_coords)[0]) as bar:
        for index, coord in enumerate(large_sample_coords[(limit_set*3 + 1 + limit_set_cosmos*2):]):
            temp_table      = Table()
            try:
                temp_table      = customSimbad.query_region(coord, radius=3.0*u.arcsec)
                if len(temp_table) == 0:
                    temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                    temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
            except:
                temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad_q'
                # print(f'Error with element {(index + limit_set*3 + 1 + limit_set_cosmos*2)} of the sample')
                query_error += 1
            try:
                if temp_table['RA_d'].unit != 'deg':
                    replacement_col     = MaskedColumn(temp_table['RA_d'].data, dtype=result_table_simbad['RA_d'].dtype, unit=result_table_simbad['RA_d'].unit, format=result_table_simbad['RA_d'].format, description=result_table_simbad['RA_d'].description)
                    temp_table['RA_d']  = replacement_col
                result_table_simbad = vstack([result_table_simbad, temp_table[0]])
            except:
                print(f'Error with element {(index + limit_set*3 + 1 + limit_set_cosmos*2)} of the sample')
                temp_table      = Table(names=('RA_d', 'DEC_d'), dtype=(result_table_simbad['RA_d'].info.dtype, result_table_simbad['DEC_d'].info.dtype))
                temp_table.add_row((coord.ra.deg, coord.dec.deg))
                temp_table.meta['description'] = 'Simbad query for radio objects'
                result_table_simbad = vstack([result_table_simbad, temp_table[0]])
                final_error += 1
            bar.update(index + limit_set*3 + 1 + limit_set_cosmos*2)

We save the query to a file for future executions.

In [40]:
if query_simbad_flag:
    result_table_simbad.info

In [41]:
if query_simbad_flag:
    result_table_simbad.write('large_cat_simbad_query.csv', format='ascii.csv', overwrite=True, serialize_method='data_mask')

Steps to create a copy of table to save it as `fits` file.

In [42]:
if query_simbad_flag:
    copy_simbad_to_write = result_table_simbad
    str_id = copy_simbad_to_write['MAIN_ID'].astype('str')
    copy_simbad_to_write.replace_column('MAIN_ID', str_id)

In [43]:
if query_simbad_flag:
    copy_simbad_to_write.info

In [44]:
if query_simbad_flag:
    copy_simbad_to_write.write('large_cat_simbad_query.fits', format='fits', overwrite=True, serialize_method='data_mask')

To save running time, we can load the data from a file.

In [45]:
#if load_simbad_flag:
#    result_table_simbad = Table.read(cat_path + 'large_cat_simbad_query.csv', format='ascii.csv')

In [46]:
if load_simbad_flag:
    result_table_simbad     = Table.read(cat_path + 'large_cat_simbad_query.fits');

In [47]:
# result_table_simbad.info

From this point, we merge the data from the query to `simbad` with the  
values from this notebook (**Inayoshi et al., 2020** and **SDSS+FIRST**).  
In order to do this, we convert the data into `astropy` columns, and then  
into `astropy` tables. They will be ready to be exported.

In [48]:
if create_simbad_inayoshi_flag:
    coords_simbad_inayoshi = SkyCoord(result_table_simbad['RA_d'], result_table_simbad['DEC_d'], unit=u.deg)

In [49]:
if create_simbad_inayoshi_flag:
    column_cat_index    = MaskedColumn(np.arange(np.shape(large_sample_z)[0]), name='INDEX', dtype='int', description='Index number')
    column_cat_name     = MaskedColumn(large_sample_names, name='CAT_NAME', dtype='str', description='Name used in this catalog', mask=np.array(large_sample_names == ''))
    column_cat_coords   = MaskedColumn(coords_simbad_inayoshi.to_string('decimal'), name='COORD', dtype='str', description='Merged Coordinates', mask=np.array(coords_simbad_inayoshi.to_string('decimal') == ''))
    column_z_own        = MaskedColumn(large_sample_z, name='Z_OWN', unit='', description='Redshift from Inayoshi+2020 or SDSS+FIRST', fill_value=np.nan, mask=np.array(large_sample_z == 0))
    column_z_own_err    = MaskedColumn(large_sample_z_e, name='Z_OWN_ERR', unit='', description='Redshift error from Inayoshi+2020 or SDSS+FIRST', fill_value=np.nan, mask=np.array(large_sample_z_e == 0))
    column_L_14GHz      = MaskedColumn(large_sample_L, name='L_20CM', unit='W/Hz', description='Luminosity in 1.4 GHz', fill_value=np.nan, mask=np.array(large_sample_L == 0))
    column_L_14GHz_err  = MaskedColumn(large_sample_L_e, name='L_20CM_ERR', unit='W/Hz', description='Luminosity error in 1.4 GHz', fill_value=np.nan, mask=np.array(large_sample_L_e == 0))
    column_L_14GHz_up   = MaskedColumn(large_sample_u_lim, name='L_20CM_UP_LIM', dtype='bool', description='True if L_20CM is upper limit')
    column_L_250GHz     = MaskedColumn(large_sample_L_250, name='L_250GHZ', unit='W/Hz', description='Luminosity in 250 GHz', fill_value=np.nan, mask=np.array(large_sample_L_250 == 0))
    column_L_250GHz_err = MaskedColumn(large_sample_L_250_e, name='L_250GHZ_ERR', unit='W/Hz', description='Luminosity error in 250 GHz', fill_value=np.nan, mask=np.array(large_sample_L_250_e == 0))
    column_f_20cm       = MaskedColumn(large_sample_f20cm, name='F_20CM', unit='mJy', description='Flux in 20 cm', fill_value=np.nan, mask=np.array(large_sample_f20cm == 0))
    column_f_20cm_err   = MaskedColumn(large_sample_f20cm_e, name='F_20CM_ERR', unit='mJy', description='Flux error in 20 cm', fill_value=np.nan, mask=np.array(large_sample_f20cm_e == 0))
    column_f_250GHz     = MaskedColumn(large_sample_f250GHz, name='F_250GHZ', unit='mJy', description='Flux in 250 GHz', fill_value=np.nan, mask=np.array(large_sample_f250GHz == 0))
    column_f_250GHz_err = MaskedColumn(large_sample_f250GHz_e, name='F_250GHZ_ERR', unit='mJy', description='Flux error in 250 GHz', fill_value=np.nan, mask=np.array(large_sample_f250GHz_e == 0))
    column_mass_1450    = MaskedColumn(large_sample_mass_1450, name='MASS_1450', unit='Msun', description='Mass from mag_1450 (UV)', fill_value=np.nan, mask=np.array(large_sample_mass_1450 == 0))

In [50]:
if create_simbad_inayoshi_flag:
    result_table_simbad.add_columns([column_cat_index, column_cat_name, column_cat_coords, column_z_own, column_z_own_err, column_L_14GHz, column_L_14GHz_err, column_L_14GHz_up, column_L_250GHz, column_L_250GHz_err, column_f_20cm, column_f_20cm_err, column_f_250GHz, column_f_250GHz_err, column_mass_1450])

In [52]:
if create_simbad_inayoshi_flag:
    str_id = result_table_simbad['MAIN_ID'].astype('str')
    result_table_simbad.replace_column('MAIN_ID', str_id)

In [53]:
# copy_table = result_table_simbad.filled(fill_value=np.nan)

We write the table into a file. It can be `.fits`, `.votable`, etc.

In [54]:
#copy_table.write('high_z_qsos.ecsv', format='ascii.ecsv', overwrite=True, serialize_method='data_mask')

In [55]:
if create_simbad_inayoshi_flag:
    result_table_simbad.write('large_cat_simbad_query_inayoshi.fits', format='fits', overwrite=True, serialize_method='data_mask')



In [56]:
if create_simbad_inayoshi_flag:
    result_table_simbad.write('large_cat_simbad_query_inayoshi.csv', format='ascii.csv', overwrite=True, serialize_method='data_mask')

To save running time, we can load the data from a file.

In [57]:
#if read_simbad_inayoshi_flag:
#    result_table_simbad = Table.read(cat_path + 'large_cat_simbad_query_inayoshi.csv', format='ascii.csv')

In [58]:
if read_simbad_inayoshi_flag:
    result_table_simbad = Table.read(cat_path + 'large_cat_simbad_query_inayoshi.fits');

In [59]:
result_table_simbad.info

<Table length=18023>
     name      dtype    unit   format                    description                   n_bad
------------- ------- ------- -------- ----------------------------------------------- -----
      MAIN_ID   str41                                    Main identifier for an object   606
         RA_d float64     deg {:11.8f}                                 Right ascension     0
        DEC_d float64     deg {:12.8f}                                     Declination     0
      Z_VALUE float64         {:16.7f}                                        Redshift  1644
       FLUX_B float32     mag                                              Magnitude B  8961
 FLUX_ERROR_B float32         {:12.3f}                                      flux error 11228
       FLUX_V float32     mag                                              Magnitude V  8857
 FLUX_ERROR_V float32         {:12.3f}                                      flux error 11189
       FLUX_R float32     mag                    

Query the objects of the table in other catalogs and services.

In [60]:
#from astroquery.heasarc import Heasarc
#Heasarc.query_mission_cols(mission='radio')
#tabb = Heasarc.query(large_sample_names, mission='radio', timeout=90)

In [61]:
customNed        = Ned()
fields_to_remove = ['No.', 'Photometry Measurement', 'Uncertainty', 'Units', 'Significance', 'Published frequency', 'Frequency Mode', 'Coordinates Targeted', 'Spatial Mode', 'Qualifiers', 'Comments']

Querying sources with name in `Ned`.

In [62]:
# empty_counter = 0
# res_tab       = {}
# for name in large_sample_names:
#     try:
#         res_tab[name] = customNed.get_table(name, output_table_format=1)
#         res_tab[name].remove_columns(fields_to_remove)
#     except:
#         res_tab[name] = Table()
#         empty_counter += 1

As before, we can do it with coordinates.  

First, we query the coordinates. If we found something,  
we use the name of the source to obtain it photometry.

In [63]:
coords_simbad_inayoshi = SkyCoord(result_table_simbad['RA_d'], result_table_simbad['DEC_d'], unit=u.deg)

In [64]:
empty_tab_name_counter  = 0
empty_tab_photo_counter = 0
error_tab_name_counter  = 0
error_tab_photo_counter = 0
ned_tables              = {}
ned_info                = {}
ned_names               = []
#ned_names               = np.array([''  for x in np.arange(np.shape(large_sample_names)[0])])

In [65]:
if query_ned_names_flag:
    with progressbar.ProgressBar(min_value=0, max_value=np.shape(coords_simbad_inayoshi)[0]) as bar:
        for index, coord in enumerate(coords_simbad_inayoshi):
            try:
                init_table            = customNed.query_region(coords_simbad_inayoshi[index], radius=3.0*u.arcsec)
                if len(init_table) == 0:
                    init_table        = Table(names=('Object Name', 'RA', 'DEC'), dtype=('str', 'float', 'float'), masked=True)
                    init_table.add_row(('No Name', coords_simbad_inayoshi[index].ra.deg, coords_simbad_inayoshi[index].dec.deg), mask=[True, False, False])
                    ned_info[index]   = init_table
                    ned_names.append('No Name')
                    empty_tab_name_counter += 1
                    continue
                init_table.remove_columns(['Magnitude and Filter', 'Positions', 'Diameter Points'])
                ned_info[index]   = init_table
                used_source_idx   = np.nanargmin(init_table['Separation'])  # Index of element with lowest separation from coords
                init_name         = init_table['Object Name'][used_source_idx]
                # ned_names[index]  = init_name
                ned_names.append(init_name)
            except:
                init_table        = Table(names=('Object Name', 'RA', 'DEC'), dtype=('str', 'float', 'float'), masked=True)
                init_table.add_row(('No Name', coords_simbad_inayoshi[index].ra.deg, coords_simbad_inayoshi[index].dec.deg), mask=[True, False, False])
                ned_info[index]   = init_table
                ned_names.append('No Name')
                error_tab_name_counter += 1
            bar.update(index)

100% (18023 of 18023) |##################| Elapsed Time: 0:14:13 Time:  0:14:13


In [66]:
ned_names = np.array(ned_names)

In [67]:
ned_redshifts = []
for key in ned_info:
    ned_redshifts.append(ned_info[index]['Redshift'])
ned_redshifts = np.array(ned_redshifts)

In [68]:
counter_count = 0
indices_non   = []
for index, name in enumerate(ned_names):
    if name != 'No Name':
        counter_count += 1
for index, name in enumerate(ned_names):
    if name == 'No Name':
        indices_non.append(index)

In [69]:
if query_ned_photometry_flag:
    with progressbar.ProgressBar(min_value=0, max_value=np.shape(ned_names)[0]) as bar:
        for index, name in enumerate(ned_names):
            try:
                if name == 'No Name':
                    phot_table        = Table(names=('Observed Passband',), dtype=('str',), masked=True)
                    phot_table.add_row(('No Passband',), mask=(True,))
                    ned_tables[index] = phot_table
                    empty_tab_photo_counter += 1
                    continue
                phot_table            = customNed.get_table(name, table='photometry', output_table_format=3)
                phot_table.remove_columns(fields_to_remove)
                ned_tables[index]     = phot_table
            except:
                phot_table            = Table(names=('Observed Passband',), dtype=('str',), masked=True)
                phot_table.add_row(('No Passband',), mask=(True,))
                ned_tables[index]     = phot_table
                error_tab_photo_counter += 1
            bar.update(index)

100% (18023 of 18023) |##################| Elapsed Time: 0:05:11 Time:  0:05:11


In [70]:
ned_tables[12223].colnames

['Observed Passband',
 'Frequency',
 'Flux Density',
 'Upper limit of uncertainty',
 'Lower limit of uncertainty',
 'Upper limit of Flux Density',
 'Lower limit of Flux Density',
 'NED Uncertainty',
 'NED Units',
 'Refcode']

In [71]:
# empty_counter = 0
# res_tab       = {}
# for name in large_sample_names:
#     try:
#         res_tab[name] = customNed.query_region(large_sample_coords, radius=3.0*u.arcsec)[0]
#         res_tab[name].remove_columns(fields_to_remove)
#     except:
#         res_tab[name] = Table()
#         empty_counter += 1

In [72]:
if order_ned_photometry_flag:
    limit_set_ned = int(np.floor(np.shape(ned_names)[0]/4))

In [73]:
if order_ned_photometry_flag:
    temp_table_ned_photo             = Table()
    chunk_size                       = 300  # Number of elements to calculate before dumping results to external table
    with progressbar.ProgressBar(min_value=0, max_value=np.shape(ned_names)[0]) as bar:
        for index, source_name in enumerate(ned_names):  # Some names will be. 'No Name'
            band_names_str           = []
            column_names_str         = []
            band_frequencies         = []
            measure_names            = ned_tables[index].colnames[1:]
            # init_table = Table(names=('MAIN_ID',), dtype=('str',), masked=True)
            cord_str                 = coords_simbad_inayoshi[index].to_string('decimal')
            init_table               = Table(data=np.array([index, cord_str, source_name]), names=('INDEX', 'COORD', 'MAIN_ID'), dtype=('int', 'str', 'str'), masked=True)
            if source_name == 'No Name':
                # init_table = Table(('No Name',), names=('MAIN_ID',), dtype=('str',), masked=True)
                # init_table.add_row(('No Name',), mask=(True,)) # Mask values in the last step instead
                if index == 0 or (index % chunk_size == 1 and index > 1):
                    init_table_large     = Table(init_table)
                    bar.update(index)
                    continue
                init_table_large     = vstack([init_table_large, init_table])
                if index % chunk_size == 0 and index > 0:
                    temp_table_ned_photo = vstack([temp_table_ned_photo, init_table_large])
                    # result_table_simbad_copy = join(result_table_simbad_copy, init_table_large, keys='COORD', join_type='outer')
                bar.update(index)
                continue
            if len(measure_names) == 0:
                # init_table = Table(('No Name',), names=('MAIN_ID',), dtype=('str',), masked=True)
                # init_table.add_row((source_name,), mask=(True,))
                if index == 0 or (index % chunk_size == 1 and index > 1):
                    init_table_large     = Table(init_table)
                    bar.update(index)
                    continue
                init_table_large     = vstack([init_table_large, init_table])
                if index % chunk_size == 0 and index > 0:
                    temp_table_ned_photo = vstack([temp_table_ned_photo, init_table_large])
                    # result_table_simbad_copy = join(result_table_simbad_copy, init_table_large, keys='COORD', join_type='outer')
                bar.update(index)
                continue
            # init_table = Table((str(source_name),), names=('MAIN_ID',), dtype=('str',), masked=True)
            # init_table.add_row((str(source_name),))
            for row in ned_tables[index]:
                band_name_str        = re.sub(r' \(.*', '', str(row['Observed Passband'].decode('utf-8')))  # Eliminate differences
                if str(band_name_str) not in band_names_str and row['Frequency'] not in band_frequencies:
                    band_frequencies.append(row['Frequency'])
                    band_names_str.append(str(band_name_str))
                    column_name_flux = 'Flux Density ' + band_name_str
                    column_name_err  = 'NED Uncertainty ' + band_name_str
                    if column_name_flux not in column_names_str:
                        column_names_str.append(column_name_flux)
                        column_flux  = MaskedColumn(row['Flux Density'], name=column_name_flux, unit=ned_tables[index]['Flux Density'].unit, dtype='float')
                        column_err   = MaskedColumn(row['NED Uncertainty'], name=column_name_err, dtype='str')
                        init_table.add_columns((column_flux, column_err))
            #init_table.remove_column('MAIN_ID')
            if index == 0 or (index % chunk_size == 1 and index > 1):
                init_table_large     = Table(init_table)
                bar.update(index)
                continue
            init_table_large         = vstack([init_table_large, init_table])
            if index % chunk_size == 0 and index > 0:
                temp_table_ned_photo = vstack([temp_table_ned_photo, init_table_large])
                # result_table_simbad_copy = join(result_table_simbad_copy, init_table_large, keys='COORD', join_type='outer')
            bar.update(index)
    temp_table_ned_photo = vstack([temp_table_ned_photo, init_table_large])
    # result_table_simbad_copy = join(result_table_simbad_copy, init_table_large, keys='COORD', join_type='outer')
    #result_table_simbad_copy         = Table(result_table_simbad)
    #result_table_simbad_copy.add_column(coords_simbad_inayoshi.to_string('decimal'), name='COORD')

100% (18023 of 18023) |##################| Elapsed Time: 0:34:16 Time:  0:34:16


In [82]:
len(temp_table_ned_photo.colnames)

1055

In [74]:
if create_simbad_inayoshi_ned_flag:
    column_coord = MaskedColumn(temp_table_ned_photo['COORD'], name='COORD', mask=np.array(temp_table_ned_photo['COORD'] == ''))
    column_id = MaskedColumn(temp_table_ned_photo['MAIN_ID'], name='MAIN_ID', description='Main identifier for an object', mask=np.array(temp_table_ned_photo['MAIN_ID'] == 'No Name'))

In [75]:
if create_simbad_inayoshi_ned_flag:
    temp_table_ned_photo.replace_column('COORD', column_coord)
    temp_table_ned_photo.replace_column('MAIN_ID', column_id)

In [76]:
if create_simbad_inayoshi_ned_flag:
    df_ned    = temp_table_ned_photo.to_pandas()

In [77]:
if create_simbad_inayoshi_ned_flag:
    df_simbad = result_table_simbad.to_pandas()

Unnamed: 0,RA_d,DEC_d,Z_VALUE,FLUX_B,FLUX_ERROR_B,FLUX_V,FLUX_ERROR_V,FLUX_R,FLUX_ERROR_R,FLUX_I,...,Z_OWN_ERR,L_20CM,L_20CM_ERR,L_250GHZ,L_250GHZ_ERR,F_20CM,F_20CM_ERR,F_250GHZ,F_250GHZ_ERR,MASS_1450
count,18023.0,18023.0,16379.0,9062.0,6795.0,9166.0,6834.0,5540.0,5241.0,5856.0,...,8650.0,17957.0,17938.0,13.0,13.0,17957.0,17938.0,13.0,13.0,17.0
mean,165.161309,14.413467,1.686053,23.330841,0.078736,22.782164,0.068906,23.307508,0.060399,22.746859,...,0.00688,4.736101e+26,3.147703e+24,2.160516e+28,7.353389e+27,19.074815,0.125621,1.732615,0.590308,2556118000.0
std,48.779566,18.108288,0.946778,2.664027,0.09261,2.596944,0.090806,2.23383,0.090315,1.985852,...,0.622824,2.727563e+27,1.109735e+25,3.129408e+28,3.806932e+27,158.854176,0.816971,2.599729,0.313668,2737439000.0
min,0.005324,-22.02839,-0.00337,14.91,0.0,14.8,0.002,13.8,0.0,15.04,...,-6.0,9646523000000000.0,1348291000000000.0,8.841515e+26,6.799705e+26,0.003896,0.000372,0.07,0.04,655000000.0
25%,149.76881,2.099269,0.88785,20.91,0.02,20.459999,0.02,21.9695,0.01,21.3815,...,0.0002,3.574307e+22,3.932484e+21,2.517779e+27,5.655792e+27,0.004959,0.000609,0.21,0.46,1230000000.0
50%,150.40508,2.73556,1.567,24.031,0.05,23.32,0.04,23.639999,0.03,22.809999,...,0.0004,8.950888e+23,7.585818e+22,9.812243e+27,7.359182e+27,0.212533,0.017,0.8,0.6,1720000000.0
75%,182.881579,25.482843,2.41,25.48,0.09,24.870001,0.08,24.942001,0.06,24.23,...,0.0006,1.421476e+26,4.594716e+24,2.251746e+28,1.048921e+28,5.5,0.152514,1.82,0.8,2730000000.0
max,359.905497,64.522768,7.5413,28.837999,1.247,27.99,0.97,28.0,0.81,27.959999,...,41.8536,1.894966e+29,4.082157e+26,1.1405969999999998e+29,1.369149e+28,14774.4199,71.099998,9.46,1.13,12400000000.0


In [78]:
if create_simbad_inayoshi_ned_flag:
    merged_ned_simbad = pd.merge(df_simbad, df_ned, on='INDEX')

In [95]:
# merged_ned_simbad.columns[0:40]

Index(['MAIN_ID_x', 'RA_d', 'DEC_d', 'Z_VALUE', 'FLUX_B', 'FLUX_ERROR_B',
       'FLUX_V', 'FLUX_ERROR_V', 'FLUX_R', 'FLUX_ERROR_R', 'FLUX_I',
       'FLUX_ERROR_I', 'FLUX_J', 'FLUX_ERROR_J', 'FLUX_K', 'FLUX_ERROR_K',
       'INDEX', 'CAT_NAME', 'COORD_x', 'Z_OWN', 'Z_OWN_ERR', 'L_20CM',
       'L_20CM_ERR', 'L_20CM_UP_LIM', 'L_250GHZ', 'L_250GHZ_ERR', 'F_20CM',
       'F_20CM_ERR', 'F_250GHZ', 'F_250GHZ_ERR', 'MASS_1450', 'COORD_y',
       'MAIN_ID_y', 'Flux Density u', 'NED Uncertainty u', 'Flux Density g',
       'NED Uncertainty g', 'Flux Density r', 'NED Uncertainty r',
       'Flux Density i'],
      dtype='object')

In [86]:
# merged_ned_simbad.loc[18000:18010, ['COORD_x', 'COORD_y']]

Unnamed: 0,COORD_x,COORD_y
18000,149.539 2.93855,149.539 2.93855
18001,149.745 2.93884,149.745 2.93884
18002,150.489 2.93917,150.489 2.93917
18003,150.006 2.93915,150.006 2.93915
18004,150.809 2.93986,150.809 2.93986
18005,150.398 2.94012,150.398 2.94012
18006,150.085 2.94013,150.085 2.94013
18007,149.538 2.94105,149.538 2.94105
18008,150.56 2.94199,150.56 2.94199
18009,149.698 2.94245,149.698 2.94245


In [79]:
# merged_ned_simbad.describe()

Unnamed: 0,RA_d,DEC_d,Z_VALUE,FLUX_B,FLUX_ERROR_B,FLUX_V,FLUX_ERROR_V,FLUX_R,FLUX_ERROR_R,FLUX_I,...,Flux Density 203.850 GHz,Flux Density 163.088 GHz,Flux Density 81.551 GHz,Flux Density [OII] 3727,Flux Density [NeIII] 3869 VIRUS-P,Flux Density [OIII] 5007 VIRUS-P,Flux Density 890 microns,Flux Density 1.1 mm,Flux Density 345 GHz,Flux Density 830 A
count,18023.0,18023.0,16379.0,9062.0,6795.0,9166.0,6834.0,5540.0,5241.0,5856.0,...,1.0,1.0,1.0,12.0,1.0,1.0,8.0,16.0,1.0,2.0
mean,165.161309,14.413467,1.686053,23.330841,0.078736,22.782164,0.068906,23.307508,0.060399,22.746859,...,0.0116,0.00839,0.00308,67291670.0,95000000.0,71000000.0,0.012162,0.005337,0.021,1.52e-08
std,48.779566,18.108288,0.946778,2.664027,0.09261,2.596944,0.090806,2.23383,0.090315,1.985852,...,,,,83312420.0,,,0.004622,0.002011,,4.949747e-09
min,0.005324,-22.02839,-0.00337,14.91,0.0,14.8,0.002,13.8,0.0,15.04,...,0.0116,0.00839,0.00308,10300000.0,95000000.0,71000000.0,0.0074,0.0026,0.021,1.17e-08
25%,149.76881,2.099269,0.88785,20.91,0.02,20.459999,0.02,21.9695,0.01,21.3815,...,0.0116,0.00839,0.00308,20925000.0,95000000.0,71000000.0,0.009125,0.00425,0.021,1.345e-08
50%,150.40508,2.73556,1.567,24.031,0.05,23.32,0.04,23.639999,0.03,22.809999,...,0.0116,0.00839,0.00308,36850000.0,95000000.0,71000000.0,0.011,0.0053,0.021,1.52e-08
75%,182.881579,25.482843,2.41,25.48,0.09,24.870001,0.08,24.942001,0.06,24.23,...,0.0116,0.00839,0.00308,62825000.0,95000000.0,71000000.0,0.0135,0.00635,0.021,1.695e-08
max,359.905497,64.522768,7.5413,28.837999,1.247,27.99,0.97,28.0,0.81,27.959999,...,0.0116,0.00839,0.00308,285000000.0,95000000.0,71000000.0,0.0216,0.0093,0.021,1.87e-08


In [80]:
if create_simbad_inayoshi_ned_flag:
    merged_ned_simbad.to_hdf('large_cat_simbad_inayoshi_ned.h5', 'df')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block4_values] [items->Index(['MAIN_ID_x', 'CAT_NAME', 'COORD_x', 'COORD_y', 'MAIN_ID_y',
       'NED Uncertainty u', 'NED Uncertainty g', 'NED Uncertainty r',
       'NED Uncertainty i', 'NED Uncertainty z',
       ...
       'NED Uncertainty 203.850 GHz', 'NED Uncertainty 163.088 GHz',
       'NED Uncertainty 81.551 GHz', 'NED Uncertainty [OII] 3727',
       'NED Uncertainty [NeIII] 3869 VIRUS-P',
       'NED Uncertainty [OIII] 5007 VIRUS-P', 'NED Uncertainty 890 microns',
       'NED Uncertainty 1.1 mm', 'NED Uncertainty 345 GHz',
       'NED Uncertainty 830 A'],
      dtype='object', length=531)]

  encoding=encoding,


In [156]:
if read_simbad_inayoshi_ned_flag:
    power_test = pd.read_hdf('large_cat_simbad_inayoshi_ned.h5', 'df')

In [None]:
# power_table = Table.from_pandas(power_test)

In [177]:
# len(np.unique(np.arange(18023)))

18023

In [None]:
# result_table_copy['1.4_GHz_(VLA)_Flux_Density', '1.4GHz_Flux_Density'].info(['attributes', 'stats'])

In [None]:
# for name in large_sample_names:
#     print(name)

IDs to be changed for use in NED

OLD                          NEW

SDSS J125507.61+463126.5     NVSS J125507+463128  
SDSS J160558.86+474300.1     2MASS J16055893+4742596  
SDSS J111036.32+481752.3     NVSS J111036+481753  
SDSS J163033.89+401209.6     SDSS J163033.90+401209.6

In [None]:
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering
from sklearn.cluster import OPTICS
np.random.seed(5)

In [None]:
estimator = KMeans(n_clusters=6)  # Best suited for straight boundaries
estimator.fit(data_z_L14[[0, 2], :].T)

In [None]:
# model_test = SpectralClustering(n_clusters=5, affinity='nearest_neighbors', assign_labels='kmeans')
model_test = OPTICS(min_samples=2)

In [None]:
model = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', assign_labels='kmeans')

In [None]:
labels_km = estimator.labels_
labels_sc = model.fit_predict(data_z_L14[[0, 2], :].T)

In [None]:
np.sum(~np.isfinite(data_z_L14[2, :]))

In [None]:
labels_test = model_test.fit_predict(data_z_L14[[0, 2], :].T)

In [None]:
labels_test

In [None]:
np.shape(data_z_L14[[0, 2], :].T)

In [None]:
data_z_L14[0, :]

In [None]:
fig = plt.figure(figsize=(8,5))
ax1 = fig.add_subplot(1,1,1)

ax1.scatter(data_z_L14[0, :], data_z_L14[2, :], c=labels_km, edgecolor='k', s=50, cmap='inferno')
ax1.set_yscale('log')
ax1.set_ylim((1e23, 1e29))

In [None]:
fig = plt.figure(figsize=(8,5))
ax1 = fig.add_subplot(1,1,1)

ax1.scatter(data_z_L14[0, :], data_z_L14[2, :], c='k', edgecolor='k', s=50, cmap='inferno')
ax1.set_yscale('log')
ax1.set_ylim((1e23, 1e29))

In [None]:
fig = plt.figure(figsize=(8,5))
ax1 = fig.add_subplot(1,1,1)

ax1.scatter(large_sample_z, large_sample_L, c=labels_test, edgecolor='k', s=50, cmap='inferno')
ax1.set_yscale('log')