In [None]:
import os
import sys
sys.modules['numpy._core'] = None
import numpy as np
sys.modules['numpy._core.multiarray'] = np.core.multiarray
sys.modules['numpy._core.numeric'] = np.core.numeric

import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

os.environ["OMP_NUM_THREADS"] = "10"
tf.config.threading.set_intra_op_parallelism_threads(1)
tf.config.threading.set_inter_op_parallelism_threads(1)

from music_nextsim_tuning import train_params, plot_scatter_histo
DAY_SECONDS = 24 * 60 * 60

In [None]:
def build_and_compile_model(input_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(16, activation='relu', input_shape=(input_size,)),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(1)
    ])

    model.compile(
        loss='mean_absolute_error',
        optimizer=tf.keras.optimizers.Adam(0.0005)
    )
    return model

def train_func(param_name, i, train_features, train_labels, test_features, test_labels):
    print(i)
    input_size = train_features.shape[1]
    model = build_and_compile_model(input_size)
    earlystopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=patience,
        restore_best_weights=True,
        verbose=1
    )
    modelcheckpoint = tf.keras.callbacks.ModelCheckpoint(
        f'{idir}/model_{param_name}_{i:02}.hdf5',
        save_best_only=True,
        monitor='val_loss',
        mode='min'
    )
    history = model.fit(
        train_features,
        train_labels,
        validation_data=(test_features, test_labels),
        verbose=0,
        epochs=epochs,
        callbacks=[earlystopping, modelcheckpoint]
    )
    return model, history

In [None]:
# COMMON
rdir = './music_matrix/cfg01_m20'
xlims = {
    'compression_factor': [0, 20000],
    'C_lab' : [0, 2e6],
}
bins = 14
density = True
n_repeats = 10
epochs = 300
patience = 10

In [None]:
configs = [
    dict(
        idir = './music_matrix/cfg01_m20',
        max_date = '2007-05-01',
        good_features = ['hom_01', 'cor_02', 'cor_04', 'ASM_04', 'hom_02', 'div_90', 'ASM_02',
                'ene_04', 'ASM_01', 'ene_02', 'con_02', 'ene_01', 'con_01', 'con_04',
                'a50_05', 'dis_02', 'dis_04', 'dis_01', 'a90_10', 'mom_3o', 'cnv_50',
                'she_50', 'mom_3s', 'mom_2o', 'hom_04', 'a50_10', 'she_90', 'mom_2s',
                'a50_15', 'cnv_90', 'mom_1o', 'mom_1s'],
    ),
    dict(
        idir = './music_matrix/cfg02_m20',
        max_date = '2007-06-01',
        good_features = ['cor_01', 'hom_01', 'ASM_04', 'hom_02', 'ene_04', 'ene_02', 'ASM_02',
                'cor_02', 'ene_01', 'ASM_01', 'div_90', 'con_02', 'con_01', 'a50_05',
                'con_04', 'cnv_50', 'she_50', 'hom_04', 'dis_02', 'she_90', 'dis_01',
                'dis_04', 'cnv_90', 'a50_10', 'mom_3o', 'mom_1s', 'mom_3s', 'a50_15',
                'mom_2o', 'mom_1o', 'mom_2s'],
    )
]

In [None]:
for config in configs:
    idir = config['idir']
    max_date = config['max_date']
    good_features = config['good_features']

    inp_ftrs = pd.read_pickle(f'{idir}/ftrs.pickle')
    inp_lbls = pd.read_pickle(f'{idir}/lbls.pickle')
    inp_rgps = pd.read_pickle(f'{rdir}/rgps.pickle')
    print(inp_ftrs.shape, inp_lbls.shape, inp_rgps.shape)

    inp_lbls = inp_lbls.drop(inp_ftrs[inp_ftrs.date > max_date].index)
    inp_ftrs = inp_ftrs.drop(inp_ftrs[inp_ftrs.date > max_date].index)
    inp_rgps = inp_rgps.drop(inp_rgps[inp_rgps.date > max_date].index)
    print(inp_ftrs.shape, inp_lbls.shape, inp_rgps.shape)

    inp_ftrs = inp_ftrs[good_features].astype(float)
    inp_rgps = inp_rgps[good_features].astype(float)
    print(inp_ftrs.shape, inp_lbls.shape, inp_rgps.shape)

    ftrs_avg = inp_ftrs.mean()
    ftrs_std = inp_ftrs.std()
    lbls_avg = inp_lbls.mean()
    lbls_std = inp_lbls.std()

    inp_ftrs = (inp_ftrs - ftrs_avg) / ftrs_std
    inp_rgps = (inp_rgps - ftrs_avg) / ftrs_std
    inp_lbls = (inp_lbls - lbls_avg) / lbls_std

    param_names = list(inp_lbls.columns)
    print(param_names)

    rgps_pred_params, test_pred_params, test_labe_params, test_prms_params, train_prms_params = train_params(param_names, inp_ftrs, inp_lbls, inp_rgps, train_func, lbls_std, lbls_avg, n_repeats, epochs, patience, ax=None)
    plot_scatter_histo(param_names, test_labe_params, test_pred_params, rgps_pred_params, bins, density, xlims)
    np.savez(f'{idir}/nn_training.npz', rgps_pred_params=rgps_pred_params, test_pred_params=test_pred_params, test_labe_params=test_labe_params, test_prms_params=test_prms_params)