In [None]:
from my_functions import *

import numpy as np

from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn import model_selection

import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 12})

In [None]:
w_central = central_wavelength()
nb_fwhm_Arr = nb_fwhm(range(60))
w_lya = 1215.67

In [None]:
# Load the train-test set
dirname = '/home/alberto/almacen/ML_train_sets'

pm_flx = np.load(f'{dirname}/pm_flx_tt.npy')
pm_err = np.load(f'{dirname}/pm_err_tt.npy')
lya_lines = np.load(f'{dirname}/lya_lines_tt.npy')
zspec = np.load(f'{dirname}/zspec_tt.npy')
L_Arr = np.load(f'{dirname}/L_Arr_tt.npy')
# Labels: 1 = LAE, 0 = no LAE
labels = np.load(f'{dirname}/labels.npy')

In [None]:
# Build the matrix of components
tt_mat = np.hstack([
    pm_flx[1:-4].T,
    pm_flx[-3:].T,
    pm_err[1:-4].T,
    pm_err[-3:].T,
    lya_lines.reshape(-1, 1)
])
print(tt_mat.shape)

# Train-Test split

split_seed = 23894567
x_train, x_test, y_train, y_test =\
    model_selection.train_test_split(tt_mat, labels, test_size=0.2,
                                     random_state=split_seed)

### Pre-processing ###

# Flux errs to relerr
x_train[:, 58:-1] = x_train[:, :58] / x_train[:, 58:-1]
x_test[:, 58:-1] = x_test[:, :58] / x_test[:, 58:-1]
# Fluxes to 1e-17 units plus constant
x_test[:, :58] = x_test[:, :58] * 1e17

# PCA
pca = PCA(n_components=0.99, svd_solver='full')

pca.fit(x_train)
x_train = pca.transform(x_train)
x_test = pca.transform(x_test)
x_train.shape

# Min Max scaler
# scaler = MinMaxScaler()
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)


In [None]:
rmag_train, rmag_test = model_selection.train_test_split(
    flux_to_mag(pm_flx[-2], w_central[-2]),
    test_size=0.2, random_state=split_seed)
zspec_train, zspec_test = model_selection.train_test_split(
    zspec, test_size=0.2, random_state=split_seed)
L_Arr_train, L_Arr_test = model_selection.train_test_split(
    L_Arr, test_size=0.2, random_state=split_seed)

In [None]:
def do_grid_search():
    # Create the parameter grid based on the results of random search 
    param_grid = {
        'hidden_layer_sizes': [(60, 60), (60, 40), (70, 50), (50, 30)],
        'solver': ['adam'],
        'alpha': [1e-4],
        'learning_rate': ['adaptive', 'constant'],
        'max_iter': [1000],
        'n_iter_no_change': [10],
        'shuffle': [False, True]
    }
    # Create a based model
    nn = MLPClassifier()
    # Instantiate the grid search model
    grid_search = RandomizedSearchCV(
        estimator=nn, param_distributions=param_grid, 
        cv=5, n_jobs=-1, pre_dispatch='2*n_jobs',
        verbose=4, n_iter=20
    )

    grid_search.fit(x_train, y_train)

    return grid_search.best_params_

best_params = do_grid_search()

# best_params = {
#     'solver': 'adam',
#     'shuffle': False,
#     'n_iter_no_change': 10,
#     'max_iter': 1000,
#     'learning_rate': 'adaptive',
#     'hidden_layer_sizes': (60, 60),
#     'alpha': 0.001,
#     'activation': 'relu'
# }
print(best_params)

In [None]:
cl_best = MLPClassifier(**best_params)
cl_best.fit(x_train, y_train)
test_score = cl_best.score(x_test, y_test)
train_score = cl_best.score(x_train, y_train)
print(f'Score\n\nTrain: {train_score:0.3f}\nTest: {test_score:0.3f}')

# Predict test
pred_test = cl_best.predict(x_test)

In [None]:
# Test class as a function of mag and zspec

fig, ax = plt.subplots(figsize=(6, 6))

colors = ['g' if pred_test[i] else 'r' for i in range(len(pred_test))]
ax.scatter(rmag_test, zspec_test,
           marker='o', s=10,
           color=colors)

ax.set_ylabel('zspec')
ax.set_xlabel('r')

plt.show()

fig, ax = plt.subplots(figsize=(6, 6))

colors = ['g' if pred_test[i] else 'r' for i in range(len(pred_test))]
ax.scatter(rmag_test, L_Arr_test,
           marker='o', s=10,
           color=colors)

ax.set_ylabel('L_lya')
ax.set_xlabel('r')

plt.show()

fig, ax = plt.subplots(figsize=(6, 4))

bins = np.linspace(0, 4, 60)
ax.hist(zspec_test[pred_test == 1], color='g', histtype='step',
        bins=bins, lw=2)
ax.hist(zspec_test[pred_test == 0], color='r', histtype='step',
        bins=bins, lw=2)

plt.show()

fig, ax = plt.subplots(figsize=(6, 4))

bins = np.linspace(16, 24, 30)
h_good = ax.hist(rmag_test[pred_test == y_test], color='g', histtype='step',
        bins=bins, lw=2)
h_bad = ax.hist(rmag_test[pred_test != y_test], color='r', histtype='step',
        bins=bins, lw=2)

plt.show()

fig, ax = plt.subplots(figsize=(6, 3))

ax.plot(bin_centers(bins), 1 / (1 + h_bad[0]/h_good[0]))
# ax.set_ylim(0, 1.01)

plt.show()


fig, ax = plt.subplots(figsize=(6, 4))

bins = np.linspace(41, 46, 30)
h_good = ax.hist(L_Arr_test[pred_test == y_test], color='g', histtype='step',
        bins=bins, lw=2)
h_bad = ax.hist(L_Arr_test[pred_test != y_test], color='r', histtype='step',
        bins=bins, lw=2)

plt.show()

fig, ax = plt.subplots(figsize=(6, 3))

ax.plot(bin_centers(bins), 1 / (1 + h_bad[0]/h_good[0]))
# ax.set_ylim(0, 1.01)

plt.show()