In [None]:
import numpy as np
import pandas as pd
from my_functions import *
from load_mocks import load_QSO_mock, load_SF_mock

In [None]:
def set_prep(pm_flx, pm_err, L_lya, L_Arr, zspec, N_samples=3_000):
    # L_min = 42
    # L_max = 45.5
    # my_L_Arr = L_min + np.random.rand(N_samples) * (L_max - L_min)

    # where_close_L = np.zeros(N_samples).astype(int)
    # for src in range(N_samples):
    #     where_close_L[src] = np.argmin(np.abs(my_L_Arr[src] - L_lya))
    where_close_L = np.arange(N_samples)

    sampled_pm_flx = pm_flx[:, where_close_L]
    sampled_pm_err = pm_err[:, where_close_L]
    sampled_L_Arr = L_Arr[where_close_L]
    sampled_zspec = zspec[where_close_L]
    sampled_labels = L_lya[where_close_L]

    return sampled_pm_flx, sampled_pm_err, sampled_L_Arr, sampled_zspec, sampled_labels

In [None]:
t_or_t = 'test'

In [None]:
qso_name = f'QSO_double_{t_or_t}_minijpas_0'
sf_name = f'LAE_12.5deg_z2-4.25_{t_or_t}_minijpas_0'
qso_flx, qso_err, EW_qso, qso_zspec, qso_L_lya = load_QSO_mock(qso_name, add_errs=True, how_many=10)
sf_flx, sf_err, EW_sf, sf_zspec, sf_L_lya= load_SF_mock(sf_name, add_errs=True, how_many=10)

qso_cont_est_lya, qso_cont_err_lya = estimate_continuum(qso_flx, qso_err, IGM_T_correct=True)
qso_line = is_there_line(qso_flx, qso_err, qso_cont_est_lya, qso_cont_err_lya, 30)
qso_lya_lines, qso_lya_cont_lines, _ = identify_lines(
    qso_line, qso_flx, qso_err, first=True, return_line_width=True
)

qso_z_Arr = np.zeros(len(qso_zspec))
qso_z_Arr[np.where(np.array(qso_lya_lines) != -1)] =\
    z_NB(np.array(qso_lya_cont_lines)[np.where(np.array(qso_lya_lines) != -1)])

_, _, L_qso_Arr, _, _, _ = EW_L_NB(
    qso_flx, qso_err, qso_cont_est_lya, qso_cont_err_lya, qso_z_Arr, qso_lya_lines, N_nb=0
)

sf_cont_est_lya, sf_cont_err_lya = estimate_continuum(sf_flx, sf_err, IGM_T_correct=True)
sf_line = is_there_line(sf_flx, sf_err, sf_cont_est_lya, sf_cont_err_lya, 30)
sf_lya_lines, sf_lya_cont_lines, _ = identify_lines(
    sf_line, sf_flx, sf_err, first=True, return_line_width=True
)

sf_z_Arr = np.zeros(len(sf_zspec))
sf_z_Arr[np.where(np.array(sf_lya_lines) != -1)] =\
    z_NB(np.array(sf_lya_cont_lines)[np.where(np.array(sf_lya_lines) != -1)])

_, _, L_sf_Arr, _, _, _ = EW_L_NB(
    sf_flx, sf_err, sf_cont_est_lya, sf_cont_err_lya, sf_z_Arr, sf_lya_lines, N_nb=0
)

In [None]:
sampled_qso_flx, sampled_qso_err, sampled_qso_L, sampled_qso_zspec, qso_labels =\
    set_prep(qso_flx, qso_err, qso_L_lya, L_qso_Arr, qso_zspec, len(qso_L_lya))
sampled_sf_flx, sampled_sf_err, sampled_sf_L, sampled_sf_zspec, sf_labels =\
    set_prep(sf_flx, sf_err, sf_L_lya, L_sf_Arr, sf_zspec, len(sf_L_lya))

dataset_qso = np.hstack(
    (
        sampled_qso_flx[2:55].T,
        sampled_qso_flx[-3:].T,
        np.abs(sampled_qso_err[2:55].T / sampled_qso_flx[2:55].T),
        np.abs(sampled_qso_err[-3:].T / sampled_qso_flx[-3:].T),
        sampled_qso_L.reshape(-1, 1),
        sampled_qso_zspec.reshape(-1, 1)
    )
)
dataset_sf = np.hstack(
    (
        sampled_sf_flx[2:55].T,
        sampled_sf_flx[-3:].T,
        np.abs(sampled_sf_err[2:55].T / sampled_sf_flx[2:55].T),
        np.abs(sampled_sf_err[-3:].T / sampled_sf_flx[-3:].T),
        sampled_sf_L.reshape(-1, 1),
        sampled_sf_zspec.reshape(-1, 1)
    )
)

dataset = np.vstack([dataset_qso, dataset_sf])
labels = np.concatenate([qso_labels, sf_labels])

# Shuffle
perm = np.random.permutation(dataset.shape[0])
dataset = dataset[perm]

where = np.isfinite(dataset[:, -2]) & (labels > 43)

dataset = dataset[where]

labels = labels[perm][where]
print(len(labels))

In [None]:
pd.DataFrame(dataset).to_csv(f'MLmodels/datasets/dataset_magAll_{t_or_t}.csv')
pd.DataFrame(labels).to_csv(f'MLmodels/datasets/tags_magAll_{t_or_t}.csv')