In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import emcee
import corner
from minijpas_LF_and_puricomp import effective_volume
from scipy.optimize import minimize
from scipy.stats import multivariate_normal
from my_functions import schechter
from multiprocessing import Pool
from LAEs.my_functions import *
from LAEs.plot_puricomp2d import *

In [None]:
%matplotlib inline

In [None]:
def mask_puricomp(puri, comp, min_puri=0.2, min_comp=0.2):
    return (puri > min_puri) & (comp > min_comp)

min_N_bin = 2

In [None]:
# My LF
nbs_list = [[1, 5], [4, 8], [7, 11], [10, 14], [13, 17], [16, 20]]
qso_factor = 1.0
comb_nbs_list = nbs_list
survey_list = [f'minijpasAEGIS00{i}' for i in range(1, 4 + 1)] + ['jnep']
w_central = central_wavelength()
w_lya = 1215.67
nb_fwhm_Arr = nb_fwhm(np.arange(60))
L_binning = np.load('npy/L_nb_err_binning.npy')
b = np.log10(L_binning)
LF_bins = np.array([(b[i] + b[i + 1]) / 2 for i in range(len(b) - 1)])
bin_width = np.array([b[i + 1] - b[i] for i in range(len(b) - 1)])

total_volume = 0.
for [this_nb_min, this_nb_max] in comb_nbs_list:
    total_volume += effective_volume(this_nb_min, this_nb_max, 'both')
masked_volume = None
hist_mat = None
LF_raw = None
total_puri_list = np.load('npy/total_puri_list.npy')
total_comp_list = np.load('npy/total_comp_list.npy')

for i, [nb1, nb2] in enumerate(comb_nbs_list):
    pathname = f'Luminosity_functions/LF_r17-24_nb{nb1}-{nb2}_ew30_ewoth100_nb_{qso_factor:0.1f}'
    comp_list, _, _, puri_list, comp_den_list, _, _, puri_den_list, puricomp_bins = \
        load_puricomp1d(pathname)

    # Bin centers
    bc = [puricomp_bins[i: i + 2].sum() * 0.5 for i in range(len(puricomp_bins) - 1)]

    this_puri = np.interp(LF_bins, bc, total_puri_list[i])
    this_comp = np.interp(LF_bins, bc, total_comp_list[i])
    this_hist = None
    for survey_name in survey_list:
        filename_hist = f'{pathname}/hist_i_mat_{survey_name}.npy'
        hist_i_mat = np.load(filename_hist)

        if this_hist is None:
            this_hist = hist_i_mat
        else:
            this_hist += hist_i_mat
    this_hist = this_hist / total_volume / bin_width

    this_volume = np.ones_like(LF_bins) * effective_volume(nb1, nb2, 'both')

    filename_dict = f'{pathname}/LFs.pkl'
    with open(filename_dict, 'rb') as file:
        this_LF_raw = pickle.load(file)['LF_total_raw'] * this_volume
        if LF_raw is None:
            LF_raw = this_LF_raw
        else:
            LF_raw += this_LF_raw
        
    # Set masked bins by puricomp_mask to 0
    puricomp_mask = mask_puricomp(this_puri, this_comp) & (this_LF_raw >= min_N_bin)
    this_hist[:, ~puricomp_mask] = 0.
    this_volume[~puricomp_mask] = 0.

    if masked_volume is None:
        masked_volume = this_volume
    else:
        masked_volume += this_volume

    if hist_mat is None:
        hist_mat = this_hist
    else:
        hist_mat = hist_mat + this_hist

hist_mat = hist_mat * total_volume / masked_volume

L_LF_err_percentiles = np.nanpercentile(hist_mat, [16, 50, 84], axis=0)
LF_err_plus = L_LF_err_percentiles[2] - L_LF_err_percentiles[1]
LF_err_minus = L_LF_err_percentiles[1] - L_LF_err_percentiles[0]
hist_median = L_LF_err_percentiles[1]

mask_low_N = (LF_raw < min_N_bin)
for h in [LF_err_plus, LF_err_minus, hist_median]:
    h[mask_low_N] = 0.

volwid = total_volume * bin_width
yerr_plus = (hist_median + volwid * (LF_err_plus) ** 2) ** 0.5 * volwid ** -0.5
yerr_minus = (hist_median + volwid * (LF_err_minus) ** 2) ** 0.5 * volwid ** -0.5

LF_dict = {
    'LF_bins': LF_bins,
    'LF_total': hist_median,
    'LF_total_uncorr': LF_raw / total_volume,
    'LF_total_err': [yerr_minus, yerr_plus]
}

LF_yerr_minus = yerr_minus
LF_yerr_plus = yerr_plus
LF_phi = LF_dict['LF_total']
LF_bin = LF_dict['LF_bins']

In [None]:
# The fitting surve
def power_fit(Lx, A, B):
    return 10 ** (A * np.log10(Lx) + B)

In [None]:
def prior_f(theta):
    A0 = theta[0]
    B0 = theta[1]
    A_range = (-1.6 < A0 < 0)
    B_range = (0 < B0 < 50)

    if A_range & B_range:
        return 0.
    else:
        return -np.inf

def log_likelihood(theta, Lx, Phi, yerr):
    A0 = theta[0]
    B0 = theta[1]

    model_Arr = power_fit(Lx, A0, B0)
    sigma = yerr**2

    return -0.5 * np.sum((model_Arr - Phi) ** 2 / sigma + np.log(sigma))

def log_p(theta, Lx, Phi, yerr):
    return log_likelihood(theta, Lx, Phi, yerr) + prior_f(theta)

In [None]:
## MCMC parameters ##
N_walkers = 1000
N_steps = 200

# Error to use
yerr = (LF_yerr_plus + LF_yerr_minus) * 0.5
yerr[LF_phi == 0] = np.inf

# In which LF bins fit
where_fit = np.isfinite(yerr) & (LF_bins > 43.45) & (LF_bins < 44.7)

theta0 = np.zeros((N_walkers, 2))
theta0[:, 0] = np.random.normal(-0.58, 1e-4, N_walkers)
theta0[:, 1] = np.random.normal(20.26, 1e-4, N_walkers)

args = (10**LF_bins[where_fit], LF_phi[where_fit], yerr[where_fit])

with Pool() as pool:
    # Ensemble the sampler
    sampler = emcee.EnsembleSampler(N_walkers, 2, log_p, args=args, pool=pool)
    # Run the MCMC
    sampler.run_mcmc(theta0, N_steps, progress=True);

# print(f'Autocorrelation time: {sampler.get_autocorr_time()}')

In [None]:
flat_samples = sampler.get_chain(discard=N_steps // 5 * 4, thin=15, flat=True)
labels = ['A', 'B', 'log_f']

[A_fit, B_fit] = np.median(flat_samples, axis=0)
[A_perc84, B_perc84] = np.percentile(flat_samples, [84], axis=0)[0]
[A_perc16, B_perc16] = np.percentile(flat_samples, [16], axis=0)[0]
A_fit_err = (A_perc84 - A_perc16) * 0.5
B_fit_err = (B_perc84 - B_perc16) * 0.5

print(f'Best fit: A = {A_fit:0.2f} ± {A_fit_err:0.2f}, B = {B_fit:0.2f} ± {B_fit_err:0.2f}')

fig = corner.corner(flat_samples, labels=labels,
                    truths=[A_fit, B_fit])
plt.show()

In [None]:
from my_functions import double_schechter
fig, ax = plt.subplots(figsize=(5, 4))

# Compute fit for all steps in chain
Phi_fit_i = []
Lx = np.logspace(42, 46, 1000)
for ii, step in enumerate(flat_samples[::-1]):
    if ii == 10_000:
        break
    Phi_fit_i.append(power_fit(Lx, step[0], step[1]))
Phi_fit_84 = np.percentile(Phi_fit_i, 84, axis=0)
Phi_fit_16 = np.percentile(Phi_fit_i, 16, axis=0)

# My fit
Phi_fit = power_fit(Lx, A_fit, B_fit)
ax.plot(np.log10(Lx), Phi_fit, label='My fit')
ax.fill_between(np.log10(Lx), Phi_fit_16, Phi_fit_84, alpha=0.3,
                color='C0')

# Sobral 2018
Lxx = np.logspace(42, 46, 1000)
s18_A_fit = -0.75
s18_B_fit = 27.1
Phi_fit = power_fit(Lxx, s18_A_fit, s18_B_fit)
ax.plot(np.log10(Lxx), Phi_fit,
        ls='--', c='cornflowerblue', zorder=96,
        label='Sobral 2018, (power-law)')
# Sobral 2017
Lxx = np.logspace(42, 46, 1000)
s17_A_fit = -1.48
s17_B_fit = 59.4
Phi_fit = power_fit(Lxx, s17_A_fit, s17_B_fit)
ax.plot(np.log10(Lxx), Phi_fit,
        ls='--', c='darkolivegreen', zorder=96,
        label='Sobral 2017, (power-law)')
# Matthee 2017
Lxx = np.logspace(42, 46, 1000)
m17_A_fit = -0.74
m17_B_fit = 27.5
Phi_fit = power_fit(Lxx, m17_A_fit, m17_B_fit)
ax.plot(np.log10(Lxx), Phi_fit,
        ls='--', c='red', zorder=96,
        label='Matthee 2017 (power-law)')


ax.errorbar(LF_bin[where_fit], LF_phi[where_fit],
            yerr=[LF_yerr_minus[where_fit], LF_yerr_plus[where_fit]],
            fmt='s', color='r', capsize=4)
ax.errorbar(LF_bin, LF_phi,
            yerr=[LF_yerr_minus, LF_yerr_plus],
            fmt='s', color='r', capsize=4,
            markerfacecolor='none')

ax.set_yscale('log')
ax.set_ylim(1e-8, 5e-3)
ax.set_xlim(42.5, 45.5)


# Plot the reference LF curves

phistar1 = 3.33e-6
Lstar1 = 44.65
alpha1 = -1.35

phistar2 = -3.45
Lstar2 = 42.93
alpha2 = -1.93

Phi_center = double_schechter(
    Lx, phistar1, 10 ** Lstar1, alpha1, 10 ** phistar2, 10 ** Lstar2, alpha2
) * Lx * np.log(10)

ax.plot(
    np.log10(Lx), Phi_center, ls='-.', alpha=0.7,
    zorder=1, color='C6', label='Spinoso 2020 (Schechter)'
)

phistar1 = 10 ** -3.41
Lstar1 = 10 ** 42.87
alpha1 = -1.7

phistar2 = 10 ** -5.85
Lstar2 = 10 ** 44.6
alpha2 = -1.2

Phi_center = double_schechter(
    Lx, phistar1, Lstar1, alpha1, phistar2, Lstar2, alpha2
) * Lx * np.log(10)

ax.plot(
    np.log10(Lx), Phi_center, ls='-.', alpha=0.7,
    zorder=0, color='C7', label='Zhang 2021 (Schechter)'
)

ax.set_ylim(1e-8, 1e-2)
ax.set_xlim(42.5, 45.5)
ax.set_yscale('log')

ax.set_xlabel(r'$\log L_{\mathrm{Ly}\alpha}$ [erg$\,$s$^{-1}$]', fontsize=15)
ax.set_ylabel(r'$\Phi$ [Mpc$^{-3}\,\Delta\log L^{-1}$]', fontsize=15)

ax.tick_params(labelsize=14, direction='in', which='both')
ax.yaxis.set_ticks_position('both')
ax.xaxis.set_ticks_position('both')
ax.legend(fontsize=9)

plt.show()

In [None]:
# Fit using separate LFs
concat_bins = np.array([])
concat_LF = np.array([])
concat_LF_err_plus = np.array([])
concat_LF_err_minus = np.array([])

for i in range(6):
    [nb1, nb2] = nbs_list[i]

    z_min = (w_central[nb1] - nb_fwhm_Arr[nb1] * 0.5) / w_lya - 1
    z_max = (w_central[nb2] + nb_fwhm_Arr[nb2] * 0.5) / w_lya - 1

    this_hist = None
    this_volume = effective_volume(nb1, nb2, 'both')
    for survey_name in survey_list:
            pathname = f'Luminosity_functions/LF_r17-24_nb{nb1}-{nb2}_ew30_ewoth100_nb_{qso_factor:0.1f}'
            filename_hist = f'{pathname}/hist_i_mat_{survey_name}.npy'
            hist_i_mat = np.load(filename_hist)

            if this_hist is None:
                this_hist = hist_i_mat
            else:
                this_hist += hist_i_mat
            filename_dict = f'{pathname}/LFs.pkl'
            with open(filename_dict, 'rb') as file:
                LF_raw = pickle.load(file)['LF_total_raw']

    L_LF_err_percentiles = np.percentile(this_hist, [16, 50, 84], axis=0)
    LF_err_plus = L_LF_err_percentiles[2] - L_LF_err_percentiles[1]
    LF_err_minus = L_LF_err_percentiles[1] - L_LF_err_percentiles[0]
    hist_median = L_LF_err_percentiles[1]

    yerr_plus = (hist_median + LF_err_plus **
                        2) ** 0.5 / bin_width / this_volume
    yerr_minus = (hist_median + LF_err_minus **
                        2) ** 0.5 / bin_width / this_volume


    this_LF_dict = {
        'LF_bins': LF_bins,
        'LF_total': hist_median / bin_width / this_volume,
        'LF_total_err': [yerr_minus, yerr_plus],
        'LF_total_uncorr': LF_raw,
    }

    concat_bins = np.concatenate([concat_bins, LF_bins])
    concat_LF = np.concatenate([concat_LF, this_LF_dict['LF_total']])
    concat_LF_err_plus = np.concatenate([concat_LF_err_plus, yerr_plus])
    concat_LF_err_minus = np.concatenate([concat_LF_err_minus, yerr_minus])

In [None]:
## MCMC parameters ##
N_walkers = 10000
N_steps = 100

# Error to use
concat_yerr = (concat_LF_err_plus + concat_LF_err_minus) * 0.5
concat_yerr[concat_LF == 0] = np.inf

# In which LF bins fit
where_fit = np.isfinite(concat_yerr) & (concat_bins > 43.) & (concat_bins < 45)

theta0 = np.zeros((N_walkers, 2))
theta0[:, 0] = np.random.normal(-0.56, 1e-4, N_walkers)
theta0[:, 1] = np.random.normal(20.24, 1e-4, N_walkers)

args = (10**concat_bins[where_fit], concat_LF[where_fit], concat_yerr[where_fit])

with Pool() as pool:
    # Ensemble the sampler
    sampler = emcee.EnsembleSampler(N_walkers, 2, log_p, args=args, pool=pool)
    # Run the MCMC
    sampler.run_mcmc(theta0, N_steps, progress=True);

# print(f'Autocorrelation time: {sampler.get_autocorr_time()}')

In [None]:
flat_samples = sampler.get_chain(discard=N_steps // 5 * 4, thin=15, flat=True)
labels = ['A', 'B', 'log_f']

[A_fit, B_fit] = np.median(flat_samples, axis=0)
[A_perc84, B_perc84] = np.percentile(flat_samples, [84], axis=0)[0]
[A_perc16, B_perc16] = np.percentile(flat_samples, [16], axis=0)[0]
A_fit_err = (A_perc84 - A_perc16) * 0.5
B_fit_err = (B_perc84 - B_perc16) * 0.5

print(f'Best fit: A = {A_fit:0.2f} ± {A_fit_err:0.2f}, B = {B_fit:0.2f} ± {B_fit_err:0.2f}')

fig = corner.corner(flat_samples, labels=labels,
                    truths=[A_fit, B_fit])
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(5, 4))

# Compute fit for all steps in chain
Phi_fit_i = []
Lx = np.logspace(42, 46, 1000)
for ii, step in enumerate(flat_samples[::-1]):
    if ii == 10_000:
        break
    Phi_fit_i.append(power_fit(Lx, step[0], step[1]))
Phi_fit_84 = np.percentile(Phi_fit_i, 84, axis=0)
Phi_fit_16 = np.percentile(Phi_fit_i, 16, axis=0)

# My fit
Phi_fit = power_fit(Lx, A_fit, B_fit)
ax.plot(np.log10(Lx), Phi_fit)
ax.fill_between(np.log10(Lx), Phi_fit_16, Phi_fit_84, alpha=0.3, color='C0')

ax.errorbar(concat_bins, concat_LF,
            yerr=concat_yerr,
            fmt='s', mec='r', mfc='none', capsize=4, label='My points')

ax.set_yscale('log')
ax.set_ylim(1e-8, 5e-3)
ax.set_xlim(42.5, 45.5)


# Plot the reference LF curves

phistar1 = 3.33e-6
Lstar1 = 44.65
alpha1 = -1.35

phistar2 = -3.45
Lstar2 = 42.93
alpha2 = -1.93

Phi_center = double_schechter(
    Lx, phistar1, 10 ** Lstar1, alpha1, 10 ** phistar2, 10 ** Lstar2, alpha2
) * Lx * np.log(10)

ax.plot(
    np.log10(Lx), Phi_center, ls='-.', alpha=0.7,
    zorder=1, color='C6'
)

phistar1 = 10 ** -3.41
Lstar1 = 10 ** 42.87
alpha1 = -1.7

phistar2 = 10 ** -5.85
Lstar2 = 10 ** 44.6
alpha2 = -1.2

Phi_center = double_schechter(
    Lx, phistar1, Lstar1, alpha1, phistar2, Lstar2, alpha2
) * Lx * np.log(10)

ax.plot(
    np.log10(Lx), Phi_center, ls='-.', alpha=0.7,
    zorder=0, color='C7'
)
plt.show()