# Using Bokeh for displaying Radio Galaxies data

In [1]:
# %matplotlib inline
# Static plots
#%matplotlib ipympl
# Interactive plots
import numpy as np
# import matplotlib.cm as cm
# import matplotlib.pyplot as plt
# import matplotlib.colors as mcolors
# import matplotlib.patheffects as mpe
# from matplotlib.ticker import FuncFormatter
# from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from astropy import units as u
from astropy.visualization import hist
from astropy.coordinates import SkyCoord
from astropy.cosmology import FlatLambdaCDM
from astropy.table import Table, Column, MaskedColumn, vstack, hstack
import getpass
import progressbar
import pandas as pd
import re
import missingno as msno
import itertools
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, Whisker, LogTicker, ColorBar, LogColorMapper
from bokeh.palettes import Inferno256
from bokeh.transform import factor_cmap, factor_mark, log_cmap, linear_cmap

In [2]:
alpha_first = 0.5  # From FIRST data (Bornancini+2010)
alpha_RG    = 1.0  # For radio galaxies (Verkhodanov & Khabibullina, 2010)
alpha_alex  = 0.8  # Star-forming galaxies (Alexander+2003)
alpha_smol  = 0.7  # Mean value from VLA-COSMOS 3GHz sample (Smolčić et al. 2017)
alpha_butl  = 0.75  # From Butler et al., 2018

Choose one of the spectral indexes

In [3]:
alpha_used  = alpha_butl

We define the cosmological properties to calculate luminosity distances and other quantities.

In [4]:
cosmo       = FlatLambdaCDM(H0=70, Om0=0.3)

In [5]:
def lum_from_flux_radio(flux, redshift):  # Flux in mJy
    lum_distance = cosmo.luminosity_distance(redshift).to(u.m).value  # in m
    luminosity   = 4 * np.pi * lum_distance**2 * flux * 1e-3  * 1e-26 * (1 + redshift)**(alpha_used - 1)  # in W/Hz
    return luminosity

In [6]:
def lum_from_flux(flux, redshift):  # Flux in mJy, general version, without K-correction
    lum_distance = cosmo.luminosity_distance(redshift).to(u.m).value  # in m
    luminosity   = 4 * np.pi * lum_distance**2 * flux * 1e-3  * 1e-26  # in W/Hz
    return luminosity

---

## Reading data

In [7]:
machine  = getpass.getuser()
# cat_path = '/home/' + machine + '/Documentos/Data/'
cat_path = ''  # relative path to the same directory

We import our data using the `pandas` package.  

We separate our data in luminosities and redshift values (column name `z_value`).

It is important to keep in mind that columns with luminosity values start their names with `Luminosity...`  
and they are in `W / Hz`.  

Uncertainties have not been included up to this point. They are included (as flux uncertainties in the file `large_cat_simbad_inayoshi_ned.h5`).

In [8]:
file_name        = 'large_cat_flux_z_clean_jun2020.h5'

In [9]:
full_catalog_df  = pd.read_hdf(cat_path + file_name, 'df')  # All data
large_catalog_df = pd.read_hdf(cat_path + file_name, 'df').drop(columns=['z_val', 'origin', 'RA',\
                                                                         'DEC', 'name_init', 'name_new'])
redshift_df      = pd.read_hdf(cat_path + file_name, 'df')['z_val']
ras_df           = pd.read_hdf(cat_path + file_name, 'df')['RA']
decs_df          = pd.read_hdf(cat_path + file_name, 'df')['DEC']
orig_names_df    = pd.read_hdf(cat_path + file_name, 'df')['name_init']
new_names_df     = pd.read_hdf(cat_path + file_name, 'df')['name_new']
# 0:SDSS+FIRST, 1:COSMOS, 2:RADIO, 3:Stripe82, 4:Inayoshi
origin_df        = pd.read_hdf(cat_path + file_name, 'df')['origin']

Since data from `RADIO` come from different sources, we will not use those sources for our calculations  
(it is a compilation of elements).

In [10]:
F_20cm         = pd.read_hdf(cat_path + 'large_cat_simbad_inayoshi_ned_jun2020.h5', 'df')['F_20CM']
F_20cm_e       = pd.read_hdf(cat_path + 'large_cat_simbad_inayoshi_ned_jun2020.h5', 'df')['F_20CM_ERR']
F_20cm_up_l    = pd.read_hdf(cat_path + 'large_cat_simbad_inayoshi_ned_jun2020.h5', 'df')['L_20CM_UP_LIM']
redshift_df_e  = pd.read_hdf(cat_path + 'large_cat_simbad_inayoshi_ned_jun2020.h5', 'df')['Z_OWN_ERR']

In [11]:
lums_20cm   = lum_from_flux_radio(full_catalog_df['Flux 1.4-1.7 GHz'], redshift_df)
lums_20cm_e = np.abs(lums_20cm) * F_20cm_e / F_20cm

In [12]:
origin_df.unique()

array([0., 2., 4., 3., 1.])

In [13]:
full_catalog_df['Luminosity 1.4-1.7 GHz']       = lums_20cm
full_catalog_df['Luminosity 1.4-1.7 GHz error'] = lums_20cm_e
full_catalog_df['z_error']                      = redshift_df_e
# full_catalog_df['origin_radius']                = (origin_df + 1) / 20
full_catalog_df['origin_radius']                = (origin_df + 2) * 6

In [14]:
common_vals_a = np.multiply(redshift_df, lums_20cm)
common_vals_b = np.multiply(common_vals_a, lums_20cm_e)
common_filt   = np.isfinite(common_vals_b) & np.array(~F_20cm_up_l)

In [15]:
mask_origin      = np.array(origin_df == 0) + np.array(origin_df == 1) + np.array(origin_df == 3) + np.array(origin_df == 4)
full_catalog_df  = full_catalog_df.loc[mask_origin & common_filt]
large_catalog_df = large_catalog_df.loc[mask_origin & common_filt]
redshift_df      = redshift_df.loc[mask_origin & common_filt]
ras_df           = ras_df.loc[mask_origin & common_filt]
decs_df          = decs_df.loc[mask_origin & common_filt]
orig_names_df    = orig_names_df.loc[mask_origin & common_filt]
new_names_df     = new_names_df.loc[mask_origin & common_filt]
origin_df        = origin_df.loc[mask_origin & common_filt]

Create columns with values plus and minus errors for plotting.

In [16]:
full_catalog_df['z_err_plus']                = full_catalog_df['z_val'] + full_catalog_df['z_error']
full_catalog_df['z_err_minus']               = full_catalog_df['z_val'] - full_catalog_df['z_error']
full_catalog_df['Lum 1.4-1.7 GHz err_plus']  = full_catalog_df['Luminosity 1.4-1.7 GHz'] + full_catalog_df['Luminosity 1.4-1.7 GHz error']
full_catalog_df['Lum 1.4-1.7 GHz err_minus'] = full_catalog_df['Luminosity 1.4-1.7 GHz'] - full_catalog_df['Luminosity 1.4-1.7 GHz error']

Missing values have been replaced with the number $-99.0$ which, in the context of our dataset,  
is a very unlikely quantity to be found.

If needed (not for the moment) these values can be masked and treated as proper missing values.

In [17]:
numeric_col_names = full_catalog_df.select_dtypes('number').columns

In [18]:
large_catalog_df[large_catalog_df == -99.0] = None
large_catalog_df[large_catalog_df <=  00.0] = None

In [19]:
full_catalog_df[large_catalog_df.columns] = large_catalog_df

In [20]:
vmin = full_catalog_df['Flux 3-8 microns'].min()
vmax = full_catalog_df['Flux 3-8 microns'].max()

In [21]:
source = ColumnDataSource(full_catalog_df)

In [22]:
TOOLTIPS = [
    ('z', '@z_val'),
    ('(R.A., DEC.)', '(@RA, @DEC)'),
    ('Name', '@name_init'),
    ('Origin', '@origin')
]

In [23]:
log_cmap_flux    = log_cmap('Flux 3-8 microns', Inferno256[::-1], low=vmin, high=vmax, nan_color='white')
log_color_mapper = LogColorMapper(palette=Inferno256[::-1], low=vmin, high=vmax)

In [24]:
p = figure(y_axis_type='log', x_range=(-0.001, 10), y_range=(1e23, 1e29),\
           plot_width=900, plot_height=600, tooltips=TOOLTIPS)

In [25]:
p.scatter(x='z_val', y='Luminosity 1.4-1.7 GHz', source=source,\
          fill_color=log_cmap_flux,\
          line_width=0.75, fill_alpha=0.75, line_color='black', size='origin_radius')
p.xaxis.axis_label = 'Redshift'
p.yaxis.axis_label = 'Luminosity 1.4-1.7 GHz'
p.xaxis.axis_label_text_font_style = 'normal'
p.yaxis.axis_label_text_font_style = 'normal'
p.xaxis.axis_label_text_font_size  = '14px'
p.yaxis.axis_label_text_font_size  = '14px'
p.xaxis.major_label_text_font_size = '14px'
p.yaxis.major_label_text_font_size = '14px'

In [26]:
p.add_layout(
    Whisker(source=source, base='z_val', upper='Lum 1.4-1.7 GHz err_plus',\
            lower='Lum 1.4-1.7 GHz err_minus', dimension='height', line_color=log_cmap_flux)
)
p.add_layout(
    Whisker(source=source, base='Luminosity 1.4-1.7 GHz', upper='z_err_plus',\
            lower='z_err_minus', dimension='width', line_color=log_cmap_flux)
)

In [27]:
p.add_layout(
    ColorBar(color_mapper=log_color_mapper, ticker=LogTicker(),\
             label_standoff=12, border_line_color=None, location=(0,0)),
'right')

In [28]:
output_notebook()
show(p)