In [None]:
import sys
sys.path.insert(0, '..')

from paus_utils import w_central

from jpasLAEs.utils import flux_to_mag, bin_centers

import pickle

import numpy as np

from load_paus_mocks import load_mock_dict

from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn import model_selection

import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 12})

In [None]:
field_name = 'W3'
savedir = '/home/alberto/almacen/PAUS_data/LF_corrections'

nb_min, nb_max = 0, 16

with open(f'{savedir}/mock_dict_{field_name}_nb{nb_min}-{nb_max}.pkl', 'rb') as f:
    mock_dict = pickle.load(f)

mock_dict['SFG'].keys()

In [None]:
# Get the minimum number of candidates to set the set length
N_candidates_list = []
for mock_name, mock in mock_dict.items():
    N_candidates_list.append(sum(mock['nice_lya']))

set_len = np.min(N_candidates_list)
print(f'{set_len=}')

In [None]:
# Make the set for each class
tt_set = None
labels = None
rmag = None
zspec = None
L_Arr = None

for mock_name, mock in mock_dict.items():
    mock_len = len(mock['zspec'])
    nice_lya = mock['nice_lya']
    np.random.seed(299792458)
    selection = np.random.choice(np.arange(mock_len)[nice_lya], set_len,
                                 replace=False)
    this_set = np.hstack([
        mock['flx'][:40, selection].T * 1e17,
        mock['err'][:40, selection].T / mock['flx'][:40, selection].T,
        mock['lya_NB'][selection].reshape(-1, 1),
    ])

    if tt_set is None:
        tt_set = this_set
        this_rmag = flux_to_mag(mock['flx'][-4, selection], w_central[-4])
        rmag = this_rmag
        zspec = mock['zspec'][selection]
        L_Arr = mock['L_lya'][selection]
    else:
        tt_set = np.vstack([tt_set, this_set])

        this_rmag = flux_to_mag(mock['flx'][-4, selection], w_central[-4])
        rmag = np.concatenate([rmag, this_rmag])
        zspec = np.concatenate([zspec, mock['zspec'][selection]])
        L_Arr = np.concatenate([L_Arr, mock['L_lya'][selection]])
    

label_names = []
for i in range(len(mock_dict)):
    mock_name = list(mock_dict.keys())[i]
    print(f'{i} for {mock_name}')
    if labels is None:
        labels = np.ones(set_len).astype(int) * i
    else:
        labels = np.concatenate([labels, np.ones(set_len).astype(int) * i])
    label_names.append(mock_name)

In [None]:
# Train/Test split
split_seed = 299792458
x_train, x_test, y_train, y_test =\
    model_selection.train_test_split(tt_set, labels, test_size=0.2,
                                     random_state=split_seed)

## Pre-processing ##

# PCA
# pca = PCA(n_components=0.99, svd_solver='full')

# pca.fit(x_train)
# x_train = pca.transform(x_train)
# x_test = pca.transform(x_test)
# print(x_train.shape)

# Standard scaler
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [None]:

def do_grid_search():
    # Create the parameter grid based on the results of random search
    param_grid = {
        'hidden_layer_sizes': [(60, 60), (60, 40), (30, 30), (20, 20), (40, 20)],
        'solver': ['adam'],
        'alpha': [1e-4, 1e-5, 1e-6],
        'batch_size': [300, 500, 750, 'auto'],
        'learning_rate': ['adaptive', 'constant'],
        'max_iter': [10000],
        'n_iter_no_change': [10],
        'shuffle': [False, True]
    }
    # Create a based model
    nn = MLPClassifier()
    # Instantiate the grid search model
    grid_search = RandomizedSearchCV(
        estimator=nn, param_distributions=param_grid,
        cv=5, n_jobs=-1, pre_dispatch='2*n_jobs',
        verbose=3,
    )

    grid_search.fit(x_train, y_train)

    return grid_search.best_params_

best_params = do_grid_search()

print(best_params)


In [None]:
cl_best = MLPClassifier(**best_params)
cl_best.fit(x_train, y_train)
test_score = cl_best.score(x_test, y_test)
train_score = cl_best.score(x_train, y_train)
print(f'Score\n\nTrain: {train_score:0.3f}\nTest: {test_score:0.3f}')

# Predict test
pred_test = cl_best.predict(x_test)

In [None]:
rmag_train, rmag_test =\
    model_selection.train_test_split(rmag, test_size=0.2, random_state=split_seed)
zspec_train, zspec_test =\
    model_selection.train_test_split(zspec, test_size=0.2, random_state=split_seed)
L_Arr_train, L_Arr_test =\
    model_selection.train_test_split(L_Arr, test_size=0.2, random_state=split_seed)

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Compute confusion matrix
r_mask = (rmag_test < 22.5)
cm = confusion_matrix(y_test[r_mask], pred_test[r_mask])

# Plot confusion matrix
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm, annot=True, cmap="Blues", fmt='.2f',
            xticklabels=label_names, yticklabels=label_names,
            cbar=False)
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title('r < 22.5')
plt.show()

In [None]:
# Compute confusion matrix
r_mask = (rmag_test >= 2)
cm = confusion_matrix(y_test[r_mask], pred_test[r_mask])

# Plot confusion matrix
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm, annot=True, cmap="Blues", fmt='.2f',
            xticklabels=label_names, yticklabels=label_names,
            cbar=False)
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title('r $\geq$ 0')
plt.show()