In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from my_functions import *
from three_filter import three_filter_method, NB_3fm
from scipy.stats import median_abs_deviation as mad

In [None]:
## Load everything
pm_data = np.load('npy/pm_eLAE_300deg_uniform2.npy')
pm_data_noerr = np.load('npy/pm_LAE_300deg_uniform2.npy')
f_name = 'Source_cats/Source_cat_LAE_300deg_uniform2.npy'
mock = np.load(f_name, allow_pickle=True).item()

## Somehow, some bright mocks are sneaking in. So, get rid of them.
mask_bright = np.where(pm_data_noerr[-3] <= 1e-17)[0]
pm_data_noerr = pm_data_noerr[:, mask_bright]
pm_data = pm_data[:, mask_bright]
for key in mock.keys():
    if key == 'w_Arr': continue
    mock[key] = mock[key][mask_bright]

err_fit_params = np.load('npy/err_fit_params_minijpas.npy')
m = err_fit_params[:, 0].reshape(-1, 1)
b = err_fit_params[:, 1].reshape(-1, 1)
pm_err = pm_data_noerr * 10 ** (b + m * np.log10(np.abs(pm_data_noerr)))

filters_tags = load_filter_tags()
tcurves = load_tcurves(filters_tags)
w_central = np.array(central_wavelength(tcurves))

bb_fwhm = [
    nb_fwhm(tcurves, -4, True),
    nb_fwhm(tcurves, -3, True),
    nb_fwhm(tcurves, -2, True),
    nb_fwhm(tcurves, -1, True)
]

In [None]:
N_nb = 5
ew0min = 30

z_idx = np.zeros(mock['redshift_Lya_Arr'].shape).astype(bool)

for nb_c in range(6, 20):
    z_nb = w_central[nb_c] / 1215.67 - 1

    cont_est, cont_err = stack_estimation(
        pm_data, pm_err, nb_c, N_nb, w_central
    )

    diff_err = (pm_err[nb_c]**2 + cont_err**2)**0.5
    fwhm = nb_fwhm(tcurves, nb_c, True)

    line = (
        (pm_data[nb_c] - cont_est > 3*diff_err)
        & (pm_data[nb_c] - cont_est > ew0min * (1 + z_nb) * cont_est / fwhm)
    )

    z_idx[line] = True

fig, ax = plt.subplots(figsize=(12, 9))

hist_total, bins = np.histogram(mock['redshift_Lya_Arr'], bins=np.linspace(2.5, 3.2, 60))

hist, bins = np.histogram(
    mock['redshift_Lya_Arr'][z_idx], bins=np.linspace(2.5, 3.2, 60)
    )
ax.bar(
    [(bins[i] + bins[i + 1])/2 for i in range(len(bins) - 1)], hist/hist_total,
    width=bins[1]-bins[0]
)

for nb_c in range(6, 15):
    ax.axvline(x=w_central[nb_c] / 1215.67 - 1, color='red')
ax.set_xlabel('z', fontsize=15)
ax.set_ylim((0, 1))
plt.show()

fig, ax = plt.subplots(figsize=(10,8))
ax.plot(
    w_central[6:15] / 1215.67 - 1, np.median(pm_err[6:15] / pm_data[6:15], axis=1),
    marker='.', linestyle='', markersize=15
)
ax.set_ylabel('Median rel err', fontsize=15)
ax.set_xlabel('z', fontsize=15)
plt.show()

print('TOTAL SELECTED: {0}/{1} ({2:0.1f}%)'.format(
    len(np.where(z_idx)[0]), len(z_idx), len(np.where(z_idx)[0])/len(z_idx) * 100
))

In [None]:
cm = np.zeros((2, 2))
N_nb = 5
ew0min = 0
for nb_c in range(7, 14):
    z_nb = w_central[nb_c] / 1215.67 - 1

    cont_est, cont_err = stack_estimation(
        pm_data, pm_err, nb_c, N_nb, w_central
        )

    diff_err = (pm_err[nb_c]**2 + cont_err**2)**0.5
    fwhm = nb_fwhm(tcurves, nb_c, True)

    # IGM_T = IGM_TRANSMISSION(w_central[nb_c])
    # cont_est *= (IGM_T + 1) * 0.5

    line = (
        (pm_data[nb_c] - cont_est > 3*diff_err)
        & (pm_data[nb_c] - cont_est > ew0min * (1 + z_nb) * cont_est / fwhm)
    )    

    cm += conf_matrix(line, mock['redshift_Lya_Arr'], nb_c)

fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, ax=ax, cbar=False)
ax.set_xlabel('REAL', fontsize=15)
ax.set_ylabel('SELECTED', fontsize=15)
ax.set_title('Stack', fontsize=20)
ax.set_xticklabels(['True', 'False'])
ax.set_yticklabels(['True', 'False'])

plt.show()

print('True Positive Rate: {0:0.2f}%'.format(100 * cm[0,0] / (cm[0,0] + cm[1,0])))
print('True Negative Rate: {0:0.2f}%'.format(100 * cm[1,1] / (cm[1,1] + cm[0,1])))
print('Contamination: {0:0.2f}%'.format(100 * cm[0,1] / (cm[0,0] + cm[1,0])))
print('Accuracy: {0:0.2f}%'.format(100 * (cm[0,0] + cm[1,1]) / (np.sum(cm))))

In [None]:
cm = np.zeros((2, 2))
ew0min = 0
for nb_c in range(6,20):
    NB = pm_data[nb_c]
    BB_LC = pm_data[-3]
    BB_LU = pm_data[-2]
    NB_err = pm_err[nb_c]
    BB_LC_err = pm_err[-3]
    BB_LU_err = pm_err[-2]
    t_NB = np.array(tcurves['t'][nb_c])
    t_BB_LC = np.array(tcurves['t'][-3])
    t_BB_LU = np.array(tcurves['t'][-2])
    w_NB = np.array(tcurves['w'][nb_c])
    w_BB_LC = np.array(tcurves['w'][-3])
    w_BB_LU = np.array(tcurves['w'][-2])
    w_EL = w_central[nb_c]

    F_EL, A, B, A_err, B_err = three_filter_method(
        NB, BB_LC, BB_LU, NB_err, BB_LC_err, BB_LU_err,
        t_NB, w_NB, t_BB_LC, t_BB_LU, w_BB_LC, w_BB_LU, w_EL
    )

    f_cont = A * w_EL + B
    f_cont_var = w_EL**2 * A_err**2 + B_err**2
    EW0 = F_EL / f_cont * 1215.67/w_EL

    line = (
        (pm_data[nb_c] - f_cont > 3*(pm_err[nb_c]**2 + f_cont_var) ** 0.5)
        & (EW0 > ew0min)
    )

    cm += conf_matrix(line, mock['redshift_Lya_Arr'], nb_c)

fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, ax=ax, cbar=False)
ax.set_xlabel('REAL', fontsize=15)
ax.set_ylabel('SELECTED', fontsize=15)
ax.set_title('3-filter', fontsize=20)
ax.set_xticklabels(['True', 'False'])
ax.set_yticklabels(['True', 'False'])
plt.show()

print('True Positive Rate: {0:0.2f}%'.format(100 * cm[0,0] / (cm[0,0] + cm[1,0])))
print('True Negative Rate: {0:0.2f}%'.format(100 * cm[1,1] / (cm[1,1] + cm[0,1])))
print('Contamination: {0:0.2f}%'.format(100 * cm[0,1] / (cm[0,0] + cm[1,0])))
print('Accuracy: {0:0.2f}%'.format(100 * (cm[0,0] + cm[1,1]) / (np.sum(cm))))

In [None]:
## Real continuum
f_cont = np.zeros(mock['SEDs_no_line'].shape[0])
f_line_cont = np.zeros(mock['SEDs_no_line'].shape[0])
for i in range(mock['SEDs_no_line'].shape[0]):
    w_c = (mock['redshift_Lya_Arr'][i] + 1) * 1215.67
    f_cont[i] = NB_synthetic_photometry(
        mock['SEDs_no_line'][i], mock['w_Arr'], w_c, 11
    )
    f_line_cont[i] = NB_synthetic_photometry(
        mock['SEDs'][i], mock['w_Arr'], w_c, 11
    )

real_out_ew0 = 11 * (f_line_cont - f_cont) / f_cont / (1 + mock['redshift_Lya_Arr'])

In [None]:
nb_c = 11
ew0min = 0

In [None]:
## Stacking method
N_nb = 6
w_lya = 1215.67
EW_ret_Arr = np.ones(mock['EW_Arr'].shape) * -999.
fwhm = nb_fwhm(tcurves, nb_c, True)

cont_est, cont_err = stack_estimation(
    pm_data, pm_err, nb_c, N_nb, w_central
)

EW_ret = (pm_data[nb_c] - cont_est) * fwhm / cont_est / (w_central[nb_c] / w_lya)

line = (
    (pm_data[nb_c] - cont_est > 3 * (pm_err[nb_c]**2 + cont_err**2)**0.5)
    & (EW_ret > ew0min)
)

IGM_T = IGM_TRANSMISSION(w_central[nb_c])
cont_est *= (1 + IGM_T) * 0.5
EW_ret = (pm_data[nb_c] - cont_est) * fwhm / cont_est\
    / (w_central[nb_c] / w_lya)

print(len(np.where(line)[0]))

EW_ret_Arr[line] = EW_ret[line]

mask999 = np.where(EW_ret_Arr != -999.)

EW0_stack_diff = EW_ret_Arr[mask999] - real_out_ew0[mask999]
line_st = line

In [None]:
## 3 Filter method

NB = pm_data[nb_c]
BB_LC = pm_data[-3]
BB_LU = pm_data[-2]
NB_err = pm_err[nb_c]
BB_LC_err = pm_err[-3]
BB_LU_err = pm_err[-2]
t_NB = np.array(tcurves['t'][nb_c])
t_BB_LC = np.array(tcurves['t'][-3])
t_BB_LU = np.array(tcurves['t'][-2])
w_NB = np.array(tcurves['w'][nb_c])
w_BB_LC = np.array(tcurves['w'][-3])
w_BB_LU = np.array(tcurves['w'][-2])
w_EL = w_central[nb_c]

F_EL, A, B, A_err, B_err = three_filter_method(
    NB, BB_LC, BB_LU, NB_err, BB_LC_err, BB_LU_err,
    t_NB, w_NB, t_BB_LC, t_BB_LU, w_BB_LC, w_BB_LU, w_EL
)

f_cont = A * w_EL + B
f_cont_var = w_EL**2 * A_err**2 + B_err**2
EW0 = F_EL / f_cont * 1215.67/w_EL

line = (
    (pm_data[nb_c] - f_cont > 3*(pm_err[nb_c]**2 + f_cont_var) ** 0.5)
    & (EW0 > ew0min)
)

EW0_3fm_diff = EW0[line] - real_out_ew0[line]
line_3f = line

In [None]:
## NB 3FM
F_EL, A, B, A_err, B_err = NB_3fm(pm_data, pm_err, nb_c, tcurves, w_central)
f_cont = A * w_EL + B
f_cont_var = w_EL**2 * A_err**2 + B_err**2
EW0 = F_EL / f_cont * 1215.67/w_EL

line = (
    (pm_data[nb_c] - f_cont > 3*(pm_err[nb_c]**2 + f_cont_var) ** 0.5)
    & (EW0 > ew0min)
)

EW0_nb3_diff = EW0[line] - real_out_ew0[line]
line_nb = line

In [None]:
## Plot
fig, ax = plt.subplots(2, figsize=(10, 16))
plt.subplots_adjust(hspace=.1)

ax[0].plot(real_out_ew0[line_st], EW_ret_Arr[line_st], marker='.', linestyle='',
    label='Stacking', markersize=12)
ax[0].plot(real_out_ew0[line_3f], EW0[line_3f], marker='.', linestyle='',
    label='3-filter', markersize=12)
# ax[0].plot(real_out_ew0[line_nb], EW0[line_nb], marker='.', linestyle='',
#     label='NB-3FM', markersize=12)
ax[0].plot(np.linspace(0, 150, 100), np.linspace(0, 150, 100), linestyle='--',
    linewidth=4, color='red')
ax[0].set_ylim((0, 200))
ax[0].set_xlim((0, 200))
ax[0].legend(fontsize=15)
ax[0].set_xlabel('Real EW$_0$ ($\AA$)', fontsize=15)
ax[0].set_ylabel('Measured EW$_0$ ($\AA$)', fontsize=15)

hist_bins = np.linspace(-1.5, 1.5, 30)
ax[1].hist(EW0_stack_diff / real_out_ew0[line_st],
    bins=hist_bins, alpha=0.6, label='Stacking')
ax[1].hist(EW0_3fm_diff / real_out_ew0[line_3f],
     bins=hist_bins, alpha=0.6, label='3-filter')
# ax[1].hist(EW0_nb3_diff / real_out_ew0[line_nb],
#      bins=hist_bins, alpha=0.6, label='NB-3FM')
ax[1].legend(fontsize=15)
ax[1].set_xlabel('$\epsilon_{\mathrm{rel}}$(EW$_0$)', fontsize=15)
ax[1].set_ylabel('N', fontsize=15)

plt.show()

print('STACKING :    Bias = {0:0.2f}, sigma = {1:0.2f}, median relerr = {2:0.2f}'
    .format(np.median(EW0_stack_diff), mad(EW0_stack_diff),
    np.median(np.abs(EW0_stack_diff / real_out_ew0[line_st]))))
print('3FM      :    Bias = {0:0.2f}, sigma = {1:0.2f}, median relerr = {2:0.2f}'
    .format(np.median(EW0_3fm_diff), mad(EW0_3fm_diff),
    np.median(np.abs(EW0_3fm_diff / real_out_ew0[line_3f]))))
# print('3FM NB   :    Bias = {0:0.2f}, sigma = {1:0.2f}, median relerr = {2:0.2f}'
#     .format(np.median(EW0_nb3_diff), mad(EW0_nb3_diff),
#     np.median(np.abs(EW0_nb3_diff / real_out_ew0[line_nb]))))

In [None]:
fig, ax = plt.subplots(figsize=(12, 9))

histbins = np.linspace(10, 150, 10)

z_nb = w_central[nb_c] / w_lya - 1

line_in_nb_c = np.where(
    np.abs((mock['redshift_Lya_Arr'] + 1)*w_lya - (z_nb + 1)*w_lya)
    < (nb_fwhm(tcurves, nb_c, True)/2 + 6)
)

ax.hist(mock['EW_Arr'][line_st], bins=histbins, label='Stack selection', alpha=0.6)
ax.hist(mock['EW_Arr'][line_3f], bins=histbins, label='3-filter selection', alpha=0.6)
# ax.hist(mock['EW_Arr'][line_nb], bins=histbins, label='NB-3FM selection', alpha=0.6)
ax.hist(mock['EW_Arr'][line_in_nb_c], bins=histbins, histtype='step', label='All')

ax.set_xlabel('Real EW$_0$ ($\AA$)', fontsize=15)
ax.set_ylabel('N', fontsize=15)

ax.legend(fontsize=14)
plt.show()

In [None]:
np.random.seed(22)

selection = np.random.choice(np.where(line_st 
    & ~(np.abs((mock['redshift_Lya_Arr'] + 1)*w_lya - (z_nb + 1)*w_lya)
    < (nb_fwhm(tcurves, nb_c, True)/2 + 10)))[0], size=6, replace=False)

for mock_n in selection:
    pm = pm_data[:, mock_n]
    
    fig, ax = plt.subplots(figsize = (12, 8))
    ax.plot(mock['w_Arr'], mock['SEDs_no_line'][mock_n], c='orange', lw=2, zorder=-1)
    ax.plot(mock['w_Arr'], mock['SEDs_no_IGM'][mock_n], c='tan', lw=2, zorder=-1)
    ax.errorbar(w_central[:-3], pm[:-3], yerr=pm_err[:-3, mock_n], c='gray', fmt='.')

    ax.scatter(w_central[-4], pm[-4], c='purple' , marker='s')
    ax.scatter(w_central[-3], pm[-3], c='green'  , marker='s')
    ax.scatter(w_central[-2], pm[-2], c='red'    , marker='s')
    ax.scatter(w_central[-1], pm[-1], c='dimgray', marker='s')

    ax.errorbar(w_central[-4], pm[-4],
                xerr=bb_fwhm[-4]/2, yerr=pm_err[-4, mock_n],
                fmt='none', color='purple' , elinewidth=4)
    ax.errorbar(w_central[-3], pm[-3], yerr=pm_err[-3, mock_n],
                xerr=bb_fwhm[-3]/2, fmt='none', color='green'  , elinewidth=4)
    ax.errorbar(w_central[-2], pm[-2], yerr=pm_err[-2, mock_n],
                xerr=bb_fwhm[-2]/2, fmt='none', color='red'    , elinewidth=4)
    ax.errorbar(w_central[-1], pm[-1], yerr=pm_err[-1, mock_n],
                xerr=bb_fwhm[-1]/2, fmt='none', color='dimgray', elinewidth=4)

    f_line_cont = NB_synthetic_photometry(
        mock['SEDs_no_line'][mock_n], mock['w_Arr'],
        (mock['redshift_Lya_Arr'][mock_n] + 1) * 1215.67,
        11
    )

    ax.plot(
        (mock['redshift_Lya_Arr'][mock_n] + 1) * 1215.67, f_line_cont,
        marker='.', markersize=15, color='black'
    )
    ax.errorbar(
        w_central[nb_c], cont_est[mock_n]/ ((1 + IGM_T) * 0.5), marker='^', markersize=11,
        yerr = cont_err[mock_n], elinewidth=3, capsize=3, capthick=3
    )

    ax.set_xlabel('$\lambda\ (\AA)$', size=15)
    ax.set_ylabel('$f_\lambda$ (erg cm$^{-2}$ s$^{-1}$ $\AA^{-1}$)', size=15)
    ax.set_ylim((
        np.amax(mock['SEDs_no_line'][mock_n]) * (1 - 2),
        np.amax(mock['SEDs_no_line'][mock_n]) * (1 + 3)
        ))

    plt.show()
    print(mock['EW_Arr'][mock_n])

In [None]:
np.where(line_st 
    & ~(np.abs((mock['redshift_Lya_Arr'] + 1)*w_lya - (z_nb + 1)*w_lya)
    < (nb_fwhm(tcurves, nb_c, True)/2 + 10)))[0].shape