In [None]:
import glob
import os
import sys
sys.modules['numpy._core'] = None
import numpy as np
sys.modules['numpy._core.multiarray'] = np.core.multiarray
sys.modules['numpy._core.numeric'] = np.core.numeric

import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

os.environ["OMP_NUM_THREADS"] = "10"
tf.config.threading.set_intra_op_parallelism_threads(1)
tf.config.threading.set_inter_op_parallelism_threads(1)

In [None]:
def build_and_compile_model(input_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(16, activation='relu', input_shape=(input_size,)),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(1)
    ])

    model.compile(
        loss='mean_absolute_error',
        optimizer=tf.keras.optimizers.Adam(0.0005)
    )
    return model


In [None]:
force = False

configs = [
    dict(
        idir = './music_matrix/cfg01_m20',
        good_features = ['hom_01', 'cor_02', 'cor_04', 'ASM_04', 'hom_02', 'div_90', 'ASM_02',
                'ene_04', 'ASM_01', 'ene_02', 'con_02', 'ene_01', 'con_01', 'con_04',
                'a50_05', 'dis_02', 'dis_04', 'dis_01', 'a90_10', 'mom_3o', 'cnv_50',
                'she_50', 'mom_3s', 'mom_2o', 'hom_04', 'a50_10', 'she_90', 'mom_2s',
                'a50_15', 'cnv_90', 'mom_1o', 'mom_1s'],
    ),
    dict(
        idir = './music_matrix/cfg02_m20',
        good_features = ['cor_01', 'hom_01', 'ASM_04', 'hom_02', 'ene_04', 'ene_02', 'ASM_02',
                'cor_02', 'ene_01', 'ASM_01', 'div_90', 'con_02', 'con_01', 'a50_05',
                'con_04', 'cnv_50', 'she_50', 'hom_04', 'dis_02', 'she_90', 'dis_01',
                'dis_04', 'cnv_90', 'a50_10', 'mom_3o', 'mom_1s', 'mom_3s', 'a50_15',
                'mom_2o', 'mom_1o', 'mom_2s'],
    )
]

rgps_ifiles = sorted(glob.glob('./music_matrix/rgps/*ftrs.pickle'))

In [None]:
for rgps_ifile in rgps_ifiles:
    for config in configs:
        idir = config['idir']
        good_features = config['good_features']
        conf_name = idir.split('/')[-1].split('_')[0]
        rgps_ofile = rgps_ifile.replace('ftrs.pickle', f'inference_{conf_name}.npz')
        if os.path.exists(rgps_ofile) and not force:
            continue
        print(rgps_ofile)

        inp_ftrs = pd.read_pickle(f'{idir}/ftrs.pickle')
        inp_lbls = pd.read_pickle(f'{idir}/lbls.pickle')
        inp_rgps = pd.read_pickle(rgps_ifile)
        print(inp_ftrs.shape, inp_lbls.shape, inp_rgps.shape)

        inp_ftrs = inp_ftrs[good_features].astype(float)
        inp_date = inp_rgps['date']
        inp_rgps = inp_rgps[good_features].astype(float)
        print(inp_ftrs.shape, inp_lbls.shape, inp_rgps.shape)

        ftrs_avg = inp_ftrs.mean()
        ftrs_std = inp_ftrs.std()
        lbls_avg = inp_lbls.mean()
        lbls_std = inp_lbls.std()

        inp_ftrs = (inp_ftrs - ftrs_avg) / ftrs_std
        inp_rgps = (inp_rgps - ftrs_avg) / ftrs_std

        input_size = inp_ftrs.shape[1]
        model = build_and_compile_model(input_size)

        param_names = list(inp_lbls.columns)
        results = {'date': inp_date.to_numpy()}
        for param_name in param_names:
            results[param_name] = []
            for model_n in range(10):
                model_file = f'{idir}/model_{param_name}_{model_n:02}.hdf5'
                model.load_weights(model_file)
                out_rgps = model.predict(inp_rgps)
                out_rgps = out_rgps * lbls_std[param_name] + lbls_avg[param_name]
                results[param_name].append(out_rgps.flatten())
            results[param_name] = np.vstack(results[param_name])
        np.savez(rgps_ofile, **results)