In [None]:
import numpy as np

import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 12})

import pandas as pd

import seaborn as sns

from my_functions import *

import glob

from scipy.integrate import simpson

from astropy.cosmology import Planck18 as cosmo
import astropy.units as u

In [None]:
w_central = central_wavelength()
nb_fwhm_Arr = nb_fwhm(range(60))
w_lya = 1215.67

In [None]:
## Load QSO catalog
filename = ('/home/alberto/cosmos/JPAS_mocks_sep2021/'
    'JPAS_mocks_classification_01sep_model11/Fluxes/Qso_jpas_mock_flam_train.cat')

my_filter_order = np.arange(60)
my_filter_order[[-4, -3, -2, -1]] = np.array([1, 12, 28, 43])
my_filter_order[1:-4] += 1
my_filter_order[12:-4] += 1
my_filter_order[28:-4] += 1
my_filter_order[43:-4] += 1

qso_flx = pd.read_csv(
    filename, sep=' ', usecols=range(2, 2 + 60)
).to_numpy().T[my_filter_order]
qso_err = pd.read_csv(
    filename, sep=' ', usecols=range(2 + 60, 2 + 60 + 60)
).to_numpy().T[my_filter_order]
qso_zspec = pd.read_csv(filename, sep=' ', usecols=[127]).to_numpy().reshape(-1, )

# Randomly sample sources corresponding to 200 deg2
# idx = np.random.randint(0, 100000, 510 * 200)
idx = np.arange(100_000)
qso_flx = qso_flx[:, idx]
qso_err = qso_err[:, idx]
qso_zspec = qso_zspec[idx]

Lya_fts = pd.read_csv('csv/Lya_fts.csv')
EW_qso = np.abs(Lya_fts.LyaEW)[idx] / (qso_zspec + 1)

# Apply errors
np.random.seed(22)
qso_flx += qso_err * np.random.normal(size=qso_err.shape)

In [None]:
## Load SF catalog

filename = '/home/alberto/almacen/Source_cats/LAE_10deg_z2-5/'
files = glob.glob(filename +'data*')
files.sort()
fi = []

for name in files:
    fi.append(pd.read_csv(name))

data = pd.concat(fi, axis=0, ignore_index=True)

sf_flx = data.to_numpy()[:, 1 : 60 + 1].T
sf_err = data.to_numpy()[:, 60 + 1 : 120 + 1].T

sf_flx += np.random.normal(size=(sf_err.shape)) * sf_err

# files2 = []
# files3 = []
# for i in range(len(files)):
#     files2.append(f'{filename}SEDs{i + 1}.csv')
#     files2.sort()
#     files3.append(f'{filename}SEDs_no_line{i + 1}.csv')
#     files3.sort()
# fi = []
# for name in files2:
#     fi.append(pd.read_csv(name, header=None))
# fi3 = []
# for name in files3:
#     fi3.append(pd.read_csv(name, header=None))

# mock = {}
# mock['SEDs'] = pd.concat(fi, axis=0, ignore_index=True).to_numpy()
# mock['SEDs_no_line'] = pd.concat(fi, axis=0, ignore_index=True).to_numpy()
# mock['w_Arr'] = np.load(filename + 'w_Arr.npy')

EW_sf = data['EW0'].to_numpy()
sf_zspec = data['z'].to_numpy()

In [None]:
pm_flx = np.hstack((qso_flx, sf_flx))
pm_err = np.hstack((qso_err, sf_err))
zspec = np.concatenate((qso_zspec, sf_zspec))
EW_lya = np.concatenate((EW_qso, EW_sf))

N_sf = sf_flx.shape[1]
N_qso = qso_flx.shape[1]

qso_dL = cosmo.luminosity_distance(qso_zspec).to(u.cm).value
sf_dL = cosmo.luminosity_distance(sf_zspec).to(u.cm).value

sf_L = data['L_lya'].to_numpy()

sf_flambda = 10 ** sf_L / (4*np.pi * sf_dL **2)
qso_flambda = Lya_fts.LyaF * 1e-17

qso_L = np.log10(qso_flambda * 4*np.pi * qso_dL ** 2)

L_lya = np.concatenate((qso_L, sf_L))
fline = np.concatenate((qso_flambda, sf_flambda))

is_qso = np.concatenate((np.ones(N_qso), np.zeros(N_sf))).astype(bool)

In [None]:
%xdel sf_flx
%xdel sf_err
%xdel qso_flx
%xdel qso_err
%xdel sf_zspec
%xdel qso_zspec
%xdel EW_sf
%xdel EW_qso
%xdel qso_dL
%xdel sf_L
%xdel qso_L
%xdel sf_flambda
%xdel qso_flambda

In [None]:
w_lya = 1215.67 # A
N_sources = pm_flx.shape[1]
N_sources

In [None]:
mag = flux_to_mag(pm_flx[-2], w_central[-2])
mag[np.isnan(mag)] = 99.

In [None]:
# Lya search
cont_est_lya, cont_err_lya = estimate_continuum(pm_flx, pm_err, IGM_T_correct=True)
line = is_there_line(pm_flx, pm_err, cont_est_lya, cont_err_lya, 20)
lya_lines, lya_cont_lines = identify_lines(line, pm_flx, pm_err, first=True)

# Other lines
cont_est_other, cont_err_other = estimate_continuum(pm_flx, pm_err, IGM_T_correct=False)
line_other = is_there_line(pm_flx, pm_err, cont_est_other, cont_err_other,
    400, obs=True)
other_lines = identify_lines(line_other, pm_flx, pm_err)

# Compute z
z_Arr = np.zeros(N_sources)
z_Arr[np.where(np.array(lya_lines) != -1)] =\
    z_NB(np.array(lya_cont_lines)[np.where(np.array(lya_lines) != -1)])

nice_z = np.abs(z_Arr - zspec) < 0.12

%xdel cont_est_other
%xdel cont_err_other

In [None]:
mag_min = 17
mag_max = 24

nb_min = 5
nb_max = 20

nbs_to_consider = np.arange(nb_min, nb_max + 1)

nb_cut = (np.array(lya_lines) >= nb_min) & (np.array(lya_lines) <= nb_max)

z_min = (w_central[nb_min] - nb_fwhm_Arr[nb_min] * 0.5) / w_lya - 1
z_max = (w_central[nb_max] + nb_fwhm_Arr[nb_max] * 0.5) / w_lya - 1

z_cut = (z_min < z_Arr) & (z_Arr < z_max)
zspec_cut = (z_min < zspec) & (zspec < z_max)
ew_cut = EW_lya > 20
mag_cut = (mag > mag_min) & (mag < mag_max)

nice_lya, bl = nice_lya_select(
    lya_lines, other_lines, pm_flx, pm_err, cont_est_lya, z_Arr, give_bad_lines=True
)
nice_lya = nice_lya & z_cut & mag_cut

In [None]:
## Fractions QSO / SF

good_qso = len(np.where(np.where(nice_lya & nice_z)[0] < N_qso)[0])
bad_qso = len(np.where(np.where(nice_lya & ~nice_z)[0] < N_qso)[0])
N_sel = count_true(nice_lya)
N_sel_good = count_true(nice_lya & nice_z)


good_frac = good_qso / N_sel_good
bad_frac = bad_qso / (N_sel - N_sel_good)

purity = count_true(nice_z & nice_lya) / count_true(nice_lya)

print(f'Good QSOs: {good_qso} | Bad QSOs: {bad_qso}')
print(f'Good SFs: {N_sel_good - good_qso} | Bad SFs: {N_sel - N_sel_good - bad_qso}')
print()
print('Good frac: {0:0.2f}'.format(good_frac))
print('Bad frac: {0:0.2f}'.format(bad_frac))
print()
print('Purity = {0:0.2f}'.format(purity))

In [None]:
def EW_err(fnb, fnb_err, fcont, fcont_err, z, z_err, fwhm):
    e1 = fnb_err * fwhm / fcont / (1 + z)
    e2 = fcont_err * fwhm / (-fcont ** -2 * (1 + z))
    e3 = z_err * fwhm * (fnb - fcont) / fcont * (-1) / ((1 + z) ** 2)

    return (e1**2 + e2**2 + e3**2) ** 0.5

In [None]:
EW_nb_Arr, EW_nb_e, L_Arr, L_e_Arr, flambda, flambda_e = EW_L_NB(
    pm_flx, pm_err, cont_est_lya, cont_err_lya, z_Arr, lya_lines, nice_lya=nice_lya
)

def compute_corrections(flambda, fline, L_Arr):
    F_cor = np.ones(60)
    L_nb_err = np.ones(60)

    for nb in nbs_to_consider:
        to_cor = nice_z & (lya_lines == nb) & mag_cut
        F_to_cor = (flambda / fline)[to_cor]
        F_to_cor[np.isinf(F_to_cor)] = np.nan
        F_cor[nb] = np.nanmedian(F_to_cor)

        L_to_cor = L_Arr[to_cor] - L_lya[to_cor]
        
        L_percentiles = np.nanpercentile(L_to_cor, [16, 50, 84])
        L_nb_err[nb] = (L_percentiles[2] - L_percentiles[1])

    return F_cor, L_nb_err

F_cor, L_nb_err = compute_corrections(flambda, fline, L_Arr)

np.save('npy/L_nb_err.npy', L_nb_err)

EW_nb_Arr, EW_nb_e, L_Arr, L_e_Arr, flambda, flambda_e = EW_L_NB(
    pm_flx, pm_err, cont_est_lya, cont_err_lya, z_Arr, lya_lines, nice_lya=nice_lya,
    F_bias=F_cor
)

In [None]:
%xdel pm_flx
%xdel pm_err
%xdel cont_est_lya
%xdel cont_err_lya

In [None]:
# for k, src in enumerate(np.where(~nice_lya & (L_lya > 45) & zspec_cut & mag_cut & z_cut)[0]):
#     if k == 15: break
#     fig = plt.figure(figsize=(8, 6))
#     ax = plot_JPAS_source(pm_flx[:, src], pm_err[:, src])
#     print(f'z_NB = {z_Arr[src]}')
#     print(f'zspec = {zspec[src]}')
#     print(bl[src])
    
#     ax.axvline(w_central[lya_lines[src]], label='Selected NB')
#     ax.axvline(w_lya * (1 + zspec[src]), ls='--', c='r')
#     ax.legend(fontsize=13)
#     plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(7, 6))

bins = np.linspace(18, 26, 20)

ax.hist(mag[nice_lya & z_cut & nice_z], label='Good z', histtype='step', bins=bins)
ax.hist(mag[nice_lya & z_cut & ~nice_z], label='Bad z', histtype='step', bins=bins)

ax.legend(fontsize=15, loc=2)

ax.set_xlabel('r', fontsize=15)
ax.set_ylabel('N', fontsize=15)

plt.show()

####

fig, ax = plt.subplots(figsize=(7, 6))

L_bins = np.linspace(42.5, 46, 30)

goodh = L_Arr[nice_lya & z_cut & nice_z]
badh = L_Arr[nice_lya & z_cut & ~nice_z]

ax.hist(goodh, label='Good z', histtype='step', bins=L_bins)
ax.hist(badh, label='Bad z', histtype='step', bins=L_bins)

ax.legend(fontsize=15)

ax.set_xlabel('log L$_\mathrm{line}$ retrieved', fontsize=15)
ax.set_ylabel('N', fontsize=15)

plt.show()

####

fig, ax = plt.subplots(figsize=(7, 6))

L_bins = np.linspace(42.5, 46, 30)

goodh = L_Arr[nice_lya & z_cut & nice_z]
goodh_real = L_lya[nice_lya & z_cut & nice_z]
allh_real = L_lya[zspec_cut & ew_cut & mag_cut]

ax.hist(goodh, label='Calc', histtype='step', bins=L_bins)
ax.hist(goodh_real, label='Real', histtype='step', bins=L_bins)
# ax.hist(allh_real, label='All', histtype='step', bins=L_bins)

ax.legend(fontsize=15)

ax.set_xlabel('log L$_\mathrm{line}$ retrieved', fontsize=15)
ax.set_ylabel('N', fontsize=15)

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(7, 6))

Z, x, y = np.histogram2d(
    L_lya[is_qso & nice_lya & nice_z], L_Arr[is_qso & nice_lya & nice_z],
    bins=(np.linspace(42, 47, 30), np.linspace(42, 47, 30))
)
np.meshgrid(x, y)
CS = ax.contour(
    Z.T, extent=[x.min(), x.max(), y.min(), y.max()], levels=np.linspace(10, Z.max(), 5),
    colors='C0'
)

Z, x, y = np.histogram2d(
    L_lya[~is_qso & nice_lya & nice_z], L_Arr[~is_qso & nice_lya & nice_z],
    bins=(np.linspace(42, 47, 30), np.linspace(42, 47, 30))
)
np.meshgrid(x, y)
CS = ax.contour(
    Z.T, extent=[x.min(), x.max(), y.min(), y.max()], levels=np.linspace(10, Z.max(), 5),
    colors='C1'
)

# ax.clabel(CS, inline=1, fontsize=10)

# ax.scatter(L_lya[nice_lya & is_qso], L_Arr[nice_lya & is_qso],
#     label='QSO', alpha=0.3)
# ax.scatter(L_lya[nice_lya & ~is_qso], L_Arr[nice_lya & ~is_qso],
#     label='SF', alpha=0.3)

x = np.linspace(40, 48, 100)
ax.plot(x, x, linestyle='--', color='red', label='1:1')

ax.set_ylabel('Retrieved $\log L$', fontsize=15)
ax.set_xlabel('Real $\log L$', fontsize=15)

ax.set_ylim((42, 47))
ax.set_xlim((42, 47))

ax.legend(fontsize=15)

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(7, 6))

N_bins_1 = 6
N_bins_2 = 3
bins = np.concatenate((
    np.linspace(43.2, 44.25, N_bins_1 + 1),
    np.linspace(44.25, 45.5, N_bins_2 + 1)[1:]
))
bin_centers = [(bins[k] + bins[k + 1]) / 2 for k in range(len(bins) - 1)]

goodh = L_Arr[nice_lya & nice_z]
badh = L_Arr[nice_lya & ~nice_z]

hg, bg = np.histogram(goodh, bins=bins)
hb, _ = np.histogram(badh, bins=bins)

phistar1 = 3.33e-6
Lstar1 = 10 ** 44.65
alpha1 = -1.35
phistar2 = 10 ** -3.45
Lstar2 = 10 ** 42.93
alpha2 = -1.93

volume = z_volume(z_min, z_max, 200)

bins2 = np.linspace(42, 46, 30)
b_c2 = [(bins2[i] + bins2[i + 1]) * 0.5 for i in range(len(bins2) - 1)]
bw2 = bins2[1] - bins2[0]
h_qso, _ = np.histogram(L_lya[is_qso & zspec_cut & ew_cut & mag_cut], bins2)
h_sf, _ = np.histogram(L_lya[~is_qso & zspec_cut & ew_cut & mag_cut], bins2)

b_c = [0.5 * (bins[i] + bins[i + 1]) for i in range(len(bins) - 1)]
bw = [bins[i + 1] - bins[i] for i in range(len(bins) - 1)]

totals = []
for b_i, _ in enumerate(b_c):
    Lx = np.linspace(bins[b_i], bins[b_i + 1], 100)

    totals.append(
        simpson(
            np.interp(
                Lx, b_c2, (h_qso + h_sf) / bw2
            ),
            Lx
        )
    )

totals = np.array(totals)

ax.plot(b_c, hg / totals, marker='s', label='Completeness')
ax.plot(b_c, hg / (hg + hb), marker='s', label='Purity')
# ax.step(bin_centers, hg / (hg + hb) / (hg / totals))

ax.set_xlabel('$\log L$', fontsize=15)

ax.set_xlim((43.2, 45.5))
ax.set_ylim((0, 1))
ax.legend(fontsize=15)

plt.show()

correct = hg / (hg + hb) / (hg / totals) 

In [None]:
def make_2d_puricomp(L_Arr, L_lya, mag):
    fig = plt.figure(figsize=(7, 6))

    height = 1.
    width = 1.
    height2 = 0.7
    spacing = 0.1
    cbar_width = 0.06

    ax0 = fig.add_axes([0, height2 + spacing, width, height])
    ax1 = fig.add_axes([width + spacing, height2 + spacing, width, height])
    axc = fig.add_axes([2 * width + 1.5 * spacing, height2 + spacing, cbar_width, height])
    ax2 = fig.add_axes([0, 0, width, height2])
    ax3 = fig.add_axes([width + spacing, 0, width, height2])

    L_bins = np.linspace(42.5, 46, 20)
    r_bins = np.linspace(mag_min, mag_max, 20)

    L_bins_c = np.array([0.5 * (L_bins[i] + L_bins[i + 1]) for i in range(len(L_bins) - 1)])

    h2d_nice, _, _ = np.histogram2d(
        L_Arr[nice_lya & nice_z],
        mag[nice_lya & nice_z],
        bins=[L_bins, r_bins]
    )

    h2d_parent, _, _ = np.histogram2d(
        L_lya[zspec_cut & mag_cut & ew_cut],
        mag[zspec_cut & mag_cut & ew_cut],
        bins=[L_bins, r_bins]
    )

    h2d_sel, _, _ = np.histogram2d(
        L_Arr[nice_lya],
        mag[nice_lya],
        bins=[L_bins, r_bins]
    )

    h1d_nice, _ = np.histogram(L_Arr[nice_lya & nice_z], bins=L_bins)
    h1d_parent, _ = np.histogram(L_lya[zspec_cut & mag_cut & ew_cut], bins=L_bins)
    h1d_sel, _ = np.histogram(L_Arr[nice_lya], bins=L_bins)

    cmap = 'Spectral'

    puri2d = h2d_nice / h2d_sel
    comp2d = h2d_nice / h2d_parent
    puri1d = h1d_nice / h1d_sel
    comp1d = h1d_nice / h1d_parent

    puri1d[np.isnan(puri1d)] = 0.
    comp1d[np.isnan(comp1d)] = 0.

    sns.heatmap(puri2d.T, ax=ax0, vmin=0, vmax=1, cbar_ax=axc, cmap=cmap)
    sns.heatmap(comp2d.T, ax=ax1, vmin=0, vmax=1, cbar=False, cmap=cmap)

    ax2.plot(L_bins_c, puri1d, marker='s')
    ax3.plot(L_bins_c, comp1d, marker='s')

    ### TICKS

    xticks = range(len(L_bins))
    yticks = range(len(r_bins))
    xtick_labels = ['{0:0.1f}'.format(n) for n in L_bins]
    ytick_labels = ['{0:0.1f}'.format(n) for n in r_bins]


    ax0.set_yticks(yticks)
    ax0.set_yticklabels(ytick_labels, rotation='horizontal')
    ax0.set_xticks(xticks)
    ax0.set_xticklabels(xtick_labels, rotation='vertical')
    ax0.yaxis.set_ticks_position('both')
    ax0.xaxis.set_ticks_position('both')
    ax0.tick_params(axis='y', direction='in', labelsize=14)
    ax0.tick_params(axis='x', direction='in', labelsize=14)

    ax1.set_yticks(yticks)
    ax1.set_yticklabels(ytick_labels, rotation='horizontal')
    ax1.set_xticks(xticks)
    ax1.set_xticklabels(xtick_labels, rotation='vertical')
    ax1.yaxis.set_ticks_position('both')
    ax1.xaxis.set_ticks_position('both')
    ax1.tick_params(axis='y', direction='in', labelsize=14)
    ax1.tick_params(axis='x', direction='in', labelsize=14)

    axc.tick_params(labelsize=14)
    ax2.tick_params(labelsize=14)
    ax3.tick_params(labelsize=14)

    ## Spines
    ax0.spines[:].set_visible(True)
    ax1.spines[:].set_visible(True)

    ## Axis lims
    ax2.set_ylim((0, 1))
    ax3.set_ylim((0, 1))

    ## Titles
    ax0.set_title('Purity', fontsize=25)
    ax1.set_title('Completeness', fontsize=25)

    plt.show()

    np.save('npy/puri2d.npy', puri2d)
    np.save('npy/comp2d.npy', comp2d)
    np.save('npy/puricomp2d_L_bins.npy', L_bins)
    np.save('npy/puricomp2d_r_bins.npy', r_bins)

    return puri2d, comp2d
puri2d, comp2d = make_2d_puricomp(L_Arr, L_lya, mag)

In [None]:
correct[np.isnan(correct) | np.isinf(correct)] = 0.
correct

In [None]:
total_hist = np.zeros(N_bins_1 + N_bins_2)

for nb_c in range(56):
    LAE_src = (np.array(lya_lines) == nb_c)
    LF_hist, _ = np.histogram(L_Arr[nice_lya & LAE_src], bins=bins)

    total_hist += LF_hist
volume = z_volume(z_min, z_max, 200)

fig, ax = plt.subplots(figsize=(9, 7))

ax.errorbar(b_c, total_hist / volume / bw,
    yerr=total_hist ** 0.5 / volume / bw,
    marker='.', linestyle='', markersize=15, label='Uncorrected')
ax.errorbar(b_c, total_hist / volume / bw * correct,
    yerr=(total_hist * correct) ** 0.5 / volume / bw,
    marker='.', linestyle='', markersize=15, label='Corrected')

Lx = np.linspace(10 ** 42, 10 ** 45.5, 1000)
Phi = double_schechter(
                Lx, phistar2, Lstar2, alpha2, phistar1, Lstar1, alpha1
            ) * Lx * np.log(10)

plt.plot(np.log10(Lx), Phi)

ax.set_ylim(1e-8, 1e-4)
ax.set_xlim(42.75, 45.5)
ax.set_yscale('log')

ax.legend(fontsize=15)

ax.set_xlabel('log L', fontsize=15)
ax.set_ylabel('$\Phi$ [Mpc$^{-3}$ $\Delta$ log L $^{-1}$]', fontsize=15)

plt.show()

In [None]:
count_true(~nice_lya & nice_z) / count_true(~nice_lya & ~nice_z)

In [None]:
fig, ax = plt.subplots(figsize=(9, 7))

bins2 = np.linspace(42, 46, 30)
b_c = [(bins2[i] + bins2[i + 1]) * 0.5 for i in range(len(bins2) - 1)]
bw = bins2[1] - bins2[0]
h_qso, b = np.histogram(L_lya[is_qso & zspec_cut & ew_cut], bins2)
h_sf, b = np.histogram(L_lya[~is_qso & zspec_cut & ew_cut], bins2)

ax.plot(b_c, h_qso / bw / volume, ls='', marker='s', label='Mock QSO')
ax.plot(b_c, h_sf / bw / volume, ls='', marker='s', label='Mock SF')
# ax.plot(b_c, (h_sf + h_qso) / bw / volume, ls='', marker='s', label='Combined',
#     color='dimgray', alpha=0.5)
plt.plot(np.log10(Lx), Phi, label='Theoretical')

ax.set_yscale('log')

ax.set_xlabel('log L', fontsize=15)
ax.set_ylabel('$\Phi$ [Mpc$^{-3}$ log $\Delta$ L $^{-1}$]', fontsize=15)

ax.legend(fontsize=15)

ax.set_ylim(1e-8, 1e-4)
ax.set_xlim(42.5, 45.5)
ax.set_yscale('log')

plt.show()