In [None]:
import numpy as np

import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 12})

import pandas as pd

from my_functions import *

import glob
import gc

from scipy.integrate import simpson
from scipy.special import erf

from astropy.cosmology import Planck18 as cosmo
import astropy.units as u

In [None]:
w_central = central_wavelength()
nb_fwhm_Arr = nb_fwhm(range(60))
w_lya = 1215.67

In [None]:
## Load QSO catalog
filename = ('/home/alberto/cosmos/JPAS_mocks_sep2021/'
    'JPAS_mocks_classification_01sep_model11/Fluxes/Qso_jpas_mock_flam_train.cat')

my_filter_order = np.arange(60)
my_filter_order[[-4, -3, -2, -1]] = np.array([1, 12, 28, 43])
my_filter_order[1:-4] += 1
my_filter_order[12:-4] += 1
my_filter_order[28:-4] += 1
my_filter_order[43:-4] += 1

qso_flx = pd.read_csv(
    filename, sep=' ', usecols=range(2, 2 + 60)
).to_numpy().T[my_filter_order]
qso_err = pd.read_csv(
    filename, sep=' ', usecols=range(2 + 60, 2 + 60 + 60)
).to_numpy().T[my_filter_order]
qso_zspec = pd.read_csv(filename, sep=' ', usecols=[127]).to_numpy().reshape(-1, )

# Randomly sample sources corresponding to 200 deg2
# idx = np.random.randint(0, 100000, 510 * 200)
idx = np.arange(100_000)
qso_flx = qso_flx[:, idx]
qso_err = qso_err[:, idx]
qso_zspec = qso_zspec[idx]

Lya_fts = pd.read_csv('csv/Lya_fts.csv')
EW_qso = np.abs(Lya_fts.LyaEW)[idx] / (qso_zspec + 1)

# Apply errors
np.random.seed(22)
qso_flx += qso_err * np.random.normal(size=qso_err.shape)

In [None]:
## Load SF catalog

filename = '/home/alberto/almacen/Source_cats/LAE_10deg_z2-5/'
files = glob.glob(filename +'data*')
files.sort()
fi = []

for name in files:
    fi.append(pd.read_csv(name))

data = pd.concat(fi, axis=0, ignore_index=True)

sf_flx = data.to_numpy()[:, 1 : 60 + 1].T
sf_err = data.to_numpy()[:, 60 + 1 : 120 + 1].T

sf_flx += np.random.normal(size=(sf_err.shape)) * sf_err

# files2 = []
# files3 = []
# for i in range(len(files)):
#     files2.append(f'{filename}SEDs{i + 1}.csv')
#     files2.sort()
#     files3.append(f'{filename}SEDs_no_line{i + 1}.csv')
#     files3.sort()
# fi = []
# for name in files2:
#     fi.append(pd.read_csv(name, header=None))
# fi3 = []
# for name in files3:
#     fi3.append(pd.read_csv(name, header=None))

# mock = {}
# mock['SEDs'] = pd.concat(fi, axis=0, ignore_index=True).to_numpy()
# mock['SEDs_no_line'] = pd.concat(fi, axis=0, ignore_index=True).to_numpy()
# mock['w_Arr'] = np.load(filename + 'w_Arr.npy')

EW_sf = data['EW0'].to_numpy()
sf_zspec = data['z'].to_numpy()

In [None]:
pm_flx = np.hstack((qso_flx, sf_flx))
pm_err = np.hstack((qso_err, sf_err))
zspec = np.concatenate((qso_zspec, sf_zspec))
EW_lya = np.concatenate((EW_qso, EW_sf))

N_sf = sf_flx.shape[1]
N_qso = qso_flx.shape[1]

qso_dL = cosmo.luminosity_distance(qso_zspec).to(u.cm).value
sf_dL = cosmo.luminosity_distance(sf_zspec).to(u.cm).value

sf_L = data['L_lya'].to_numpy()

sf_flambda = 10 ** sf_L / (4*np.pi * sf_dL **2)
qso_flambda = Lya_fts.LyaF * 1e-17

qso_L = np.log10(qso_flambda * 4*np.pi * qso_dL ** 2)

L_lya = np.concatenate((qso_L, sf_L))
fline = np.concatenate((qso_flambda, sf_flambda))

is_qso = np.concatenate((np.ones(N_qso), np.zeros(N_sf))).astype(bool)

In [None]:
%xdel sf_flx
%xdel sf_err
%xdel qso_flx
%xdel qso_err
%xdel sf_zspec
%xdel qso_zspec
%xdel EW_sf
%xdel EW_qso
%xdel qso_dL
%xdel sf_L
%xdel qso_L
%xdel sf_flambda
%xdel qso_flambda

In [None]:
w_lya = 1215.67 # A
N_sources = pm_flx.shape[1]
N_sources

In [None]:
mag = flux_to_mag(pm_flx[-2], w_central[-2])
mag[np.isnan(mag)] = 99.

In [None]:
# Lya search
cont_est_lya, cont_err_lya = estimate_continuum(pm_flx, pm_err, IGM_T_correct=True)
line = is_there_line(pm_flx, pm_err, cont_est_lya, cont_err_lya, 20)
lya_lines, lya_cont_lines = identify_lines(line, pm_flx, pm_err, first=True)

# Other lines
cont_est_other, cont_err_other = estimate_continuum(pm_flx, pm_err, IGM_T_correct=False)
line_other = is_there_line(pm_flx, pm_err, cont_est_other, cont_err_other,
    400, obs=True)
other_lines = identify_lines(line_other, pm_flx, pm_err)

# Compute z
z_Arr = np.zeros(N_sources)
z_Arr[np.where(np.array(lya_lines) != -1)] =\
    z_NB(np.array(lya_cont_lines)[np.where(np.array(lya_lines) != -1)])

nice_z = np.abs(z_Arr - zspec) < 0.12

%xdel cont_est_other
%xdel cont_err_other

In [None]:
mag_min = 17
mag_max = 30

nb_min = 5
nb_max = 20

nbs_to_consider = np.arange(nb_min, nb_max + 1)

nb_cut = (np.array(lya_lines) >= nb_min) & (np.array(lya_lines) <= nb_max)

z_min = (w_central[nb_min] - nb_fwhm_Arr[nb_min] * 0.5) / w_lya - 1
z_max = (w_central[nb_max] + nb_fwhm_Arr[nb_max] * 0.5) / w_lya - 1

z_cut = (z_min < z_Arr) & (z_Arr < z_max)
zspec_cut = (z_min < zspec) & (zspec < z_max)


nice_lya, bl = nice_lya_select(
    lya_lines, other_lines, pm_flx, pm_err, cont_est_lya, z_Arr, give_bad_lines=True
)
nice_lya = nice_lya & (mag > mag_min) & z_cut & (mag < mag_max)

In [None]:
## Fractions QSO / SF

good_qso = len(np.where(np.where(nice_lya & nice_z)[0] < N_qso)[0])
bad_qso = len(np.where(np.where(nice_lya & ~nice_z)[0] < N_qso)[0])
N_sel = count_true(nice_lya)
N_sel_good = count_true(nice_lya & nice_z)


good_frac = good_qso / N_sel_good
bad_frac = bad_qso / (N_sel - N_sel_good)

purity = count_true(nice_z & nice_lya) / count_true(nice_lya)

print(f'Good QSOs: {good_qso} | Bad QSOs: {bad_qso}')
print(f'Good SFs: {N_sel_good - good_qso} | Bad SFs: {N_sel - N_sel_good - bad_qso}')
print()
print('Good frac: {0:0.2f}'.format(good_frac))
print('Bad frac: {0:0.2f}'.format(bad_frac))
print()
print('Purity = {0:0.2f}'.format(purity))

In [None]:
def EW_err(fnb, fnb_err, fcont, fcont_err, z, z_err, fwhm):
    e1 = fnb_err * fwhm / fcont / (1 + z)
    e2 = fcont_err * fwhm / (-fcont ** -2 * (1 + z))
    e3 = z_err * fwhm * (fnb - fcont) / fcont * (-1) / ((1 + z) ** 2)

    return (e1**2 + e2**2 + e3**2) ** 0.5

In [None]:
def EW_L_NB(pm_flx, pm_err, cont_flx, cont_err, z_Arr, lya_lines, F_bias=None,
    nice_lya=None):
    '''
    Returns the EW0 and the luminosity from a NB selection given by lya_lines
    '''
    N_sources = pm_flx.shape[1]
    nb_fwhm_Arr = nb_fwhm(range(56))

    if nice_lya is None:
        nice_lya = np.ones(N_sources).astype(bool)
    if F_bias is None:
        F_bias = np.ones(60)

    EW_nb_Arr = np.zeros(N_sources)
    EW_nb_e = np.zeros(N_sources)
    L_Arr = np.zeros(N_sources)
    L_e_Arr = np.zeros(N_sources)
    cont = np.zeros(N_sources)
    cont_e = np.zeros(N_sources)
    flx = np.zeros(N_sources)
    flx_e = np.zeros(N_sources)

    fwhm = nb_fwhm_Arr[lya_lines]

    for src in np.where(nice_lya)[0]: 
       l = lya_lines[src]
       cont[src] = cont_flx[l, src]
       cont_e[src] = cont_err[l, src]
       flx[src] = pm_flx[l, src] / F_bias[l]
       flx_e[src] = pm_err[l, src]

    flambda = flx - cont
    flambda_e = (flx_e ** 2 + cont_e ** 2) ** 0.5
    
    EW_nb_Arr = fwhm * flambda / cont * (1 + z_Arr)
    EW_nb_e = EW_err(flx, flx_e, cont, cont_e, z_Arr, 0.06, fwhm)

    z_1 = z_NB(z_Arr - 0.5)
    z_2 = z_NB(z_Arr + 0.5)
    
    dL = cosmo.luminosity_distance(z_Arr).to(u.cm).value
    dL_e = (
        cosmo.luminosity_distance(z_2).to(u.cm).value
        - cosmo.luminosity_distance(z_1).to(u.cm).value
    ) * 0.5

    L_Arr = np.log10(fwhm * flambda * 4*np.pi * dL ** 2)
    L_e_Arr = (
        (10 ** L_Arr / flambda) ** 2 * (flx_e ** 2 + cont_e ** 2)
        + (2 * L_Arr / dL) ** 2 * dL_e ** 2
    ) ** 0.5


    return EW_nb_Arr, EW_nb_e, L_Arr, L_e_Arr, flambda * fwhm, flambda_e * fwhm

In [None]:
EW_nb_Arr, EW_nb_e, L_Arr, L_e_Arr, flambda, flambda_e = EW_L_NB(
    pm_flx, pm_err, cont_est_lya, cont_err_lya, z_Arr, lya_lines, nice_lya=nice_lya
)

F_cor = np.ones(60)
for nb in nbs_to_consider:
    to_cor = (flambda / fline)[nice_lya & nice_z & (lya_lines == nb)]   
    to_cor[np.isinf(to_cor)] = np.nan
    F_cor[nb] = np.nanmedian(
        to_cor
    )

EW_nb_Arr, EW_nb_e, L_Arr, L_e_Arr, flambda, flambda_e = EW_L_NB(
    pm_flx, pm_err, cont_est_lya, cont_err_lya, z_Arr, lya_lines, nice_lya=nice_lya,
    F_bias=F_cor
)

In [None]:
F_cor

In [None]:
# %xdel pm_flx
# %xdel pm_err
# %xdel cont_est_lya
# %xdel cont_err_lya

In [None]:
fig, ax = plt.subplots(figsize=(7, 6))

bins = np.linspace(18, 26, 20)

ax.hist(mag[nice_lya & z_cut & nice_z], label='Good z', histtype='step', bins=bins)
ax.hist(mag[nice_lya & z_cut & ~nice_z], label='Bad z', histtype='step', bins=bins)

ax.legend(fontsize=15, loc=2)

ax.set_xlabel('r', fontsize=15)
ax.set_ylabel('N', fontsize=15)

plt.show()

####

fig, ax = plt.subplots(figsize=(7, 6))

L_bins = np.linspace(42.5, 46, 30)

goodh = L_Arr[nice_lya & z_cut & nice_z]
badh = L_Arr[nice_lya & z_cut & ~nice_z]

ax.hist(goodh, label='Good z', histtype='step', bins=L_bins)
ax.hist(badh, label='Bad z', histtype='step', bins=L_bins)

ax.legend(fontsize=15, loc=2)

ax.set_xlabel('log L$_\mathrm{line}$ retrieved', fontsize=15)
ax.set_ylabel('N', fontsize=15)

plt.show()

####

fig, ax = plt.subplots(figsize=(7, 6))

L_bins = np.linspace(42.5, 46, 30)

goodh = L_Arr[nice_lya & z_cut & nice_z]
goodh_real = L_lya[nice_lya & z_cut & nice_z]

ax.hist(goodh, label='Calc', histtype='step', bins=L_bins)
ax.hist(goodh_real, label='Real', histtype='step', bins=L_bins)

ax.legend(fontsize=15, loc=2)

ax.set_xlabel('log L$_\mathrm{line}$ retrieved', fontsize=15)
ax.set_ylabel('N', fontsize=15)

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(7, 6))

# Z, x, y = np.histogram2d(
#     L_lya[nice_lya & nice_z & is_qso], L_Arr[nice_lya & nice_z & is_qso],
#     bins=(np.linspace(42, 47, 20), np.linspace(42, 47, 20))
# )
# np.meshgrid(x, y)
# CS = ax.contour(
#     Z, extent=[x.min(), x.max(), y.min(), y.max()], levels=np.linspace(5, Z.max(), 7)
# )

Z, x, y = np.histogram2d(
    L_lya[nice_lya & nice_z &  ~is_qso], L_Arr[nice_lya & nice_z & ~is_qso],
    bins=(np.linspace(42, 47, 20), np.linspace(42, 47, 20))
)
np.meshgrid(x, y)
CS = ax.contour(
    Z, extent=[x.min(), x.max(), y.min(), y.max()], levels=np.linspace(5, Z.max(), 7)
)

# ax.clabel(CS, inline=1, fontsize=10)

ax.scatter(L_lya[nice_lya & is_qso], L_Arr[nice_lya & is_qso],
    label='QSO', alpha=0.2)
ax.scatter(L_lya[nice_lya & ~is_qso], L_Arr[nice_lya & ~is_qso],
    label='SF', alpha=0.2)
x = np.linspace(40, 48, 100)
ax.plot(x, x, linestyle='--', color='red', label='1:1')

ax.set_ylabel('Retrieved $\log L$', fontsize=15)
ax.set_xlabel('Real $\log L$', fontsize=15)

ax.set_ylim((42, 47))
ax.set_xlim((42, 47))

ax.legend(fontsize=15)

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(7, 6))

N_bins_1 = 7
N_bins_2 = 2
bins = np.concatenate((
    np.linspace(43.2, 44.25, N_bins_1 + 1),
    np.linspace(44.25, 45, N_bins_2 + 1)[1:]
))
bin_centers = [(bins[k] + bins[k + 1]) / 2 for k in range(len(bins) - 1)]

goodh = L_Arr[nice_lya & nice_z]
badh = L_Arr[nice_lya & ~nice_z]

hg, bg = np.histogram(goodh, bins=bins)
hb, _ = np.histogram(badh, bins=bins)

phistar1 = 3.33e-6
Lstar1 = 10 ** 44.65
alpha1 = -1.35
phistar2 = 10 ** -3.45
Lstar2 = 10 ** 42.93
alpha2 = -1.93

volume = z_volume(z_min, z_max, 200)

h_qso, b = np.histogram(L_lya[is_qso & zspec_cut], bins)
h_sf, b = np.histogram(L_lya[~is_qso & zspec_cut], bins)
b_c = [0.5 * (b[i] + b[i + 1]) for i in range(len(b) - 1)]
bw = [b[i + 1] - b[i] for i in range(len(b) - 1)]

totals = []
for b_i, _ in enumerate(b_c):
    Lx = np.linspace(b[b_i], b[b_i + 1], 100)

    totals.append(
        simpson(
            np.interp(
                Lx, b_c, (h_qso + h_sf) / bw[b_i]
            ),
            Lx
        )
    )

totals = np.array(totals)

ax.step(b_c, hg / totals, label='Completeness')
ax.step(b_c, hg / (hg + hb), label='Purity')
# ax.step(bin_centers, hg / (hg + hb) / (hg / totals))

ax.set_xlabel('$\log L$', fontsize=15)

ax.set_xlim((43.2, 45))
ax.set_ylim((0, 1))
ax.legend(fontsize=15)

plt.show()

correct = hg / (hg + hb) / (hg / totals) 

In [None]:
bins

In [None]:
correct[np.isnan(correct) | np.isinf(correct)] = 0.
correct

In [None]:
total_hist = np.zeros(N_bins_1 + N_bins_2)

for nb_c in range(56):
    LAE_src = (np.array(lya_lines) == nb_c)
    LF_hist, _ = np.histogram(L_Arr[nice_lya & LAE_src], bins=b)

    total_hist += LF_hist
volume = z_volume(z_min, z_max, 200)

fig, ax = plt.subplots(figsize=(9, 7))

ax.errorbar(b_c, total_hist / volume / bw,
    yerr=total_hist ** 0.5 / volume / bw,
    marker='.', linestyle='', markersize=15, label='Uncorrected')
ax.errorbar(b_c, total_hist / volume / bw * correct,
    yerr=(total_hist * correct) ** 0.5 / volume / bw,
    marker='.', linestyle='', markersize=15, label='Corrected')

Lx = np.linspace(10 ** 42, 10 ** 45.5, 1000)
Phi = double_schechter(
                Lx, phistar2, Lstar2, alpha2, phistar1, Lstar1, alpha1
            ) * Lx * np.log(10)

plt.plot(np.log10(Lx), Phi)

ax.set_ylim(1e-8, 1e-4)
ax.set_xlim(42.5, 45.5)
ax.set_yscale('log')

ax.legend(fontsize=15)

ax.set_xlabel('log L', fontsize=15)
ax.set_ylabel('$\Phi$ [Mpc$^{-3}$ $\Delta$ log L $^{-1}$]', fontsize=15)

plt.show()

In [None]:
count_true(~nice_lya & nice_z) / count_true(~nice_lya & ~nice_z)

In [None]:
fig, ax = plt.subplots(figsize=(9, 7))

ax.plot(b_c, h_qso / bw / volume, ls='', marker='s', label='Mock QSO')
ax.plot(b_c, h_sf / bw / volume, ls='', marker='s', label='Mock SF')
ax.plot(b_c, (h_sf + h_qso) / bw / volume, ls='', marker='s', label='Combined',
    color='dimgray', alpha=0.5)
plt.plot(np.log10(Lx), Phi, label='Theoretical')

ax.set_yscale('log')

ax.set_xlabel('log L', fontsize=15)
ax.set_ylabel('$\Phi$ [Mpc$^{-3}$ log $\Delta$ L $^{-1}$]', fontsize=15)

ax.legend(fontsize=15)

ax.set_ylim(1e-8, 1e-4)
ax.set_xlim(42.5, 45.5)
ax.set_yscale('log')

plt.show()