# Load Packages

In [None]:

%load_ext autoreload
%autoreload 2

import sys
from os.path import join
from tqdm.auto import tqdm
import joblib
sys.path.append("../../")

from src.file_manager.load_data import load_split_dict
from src.models.rue.save_load_model import load_model
from src.models.infernoise.tuning import tune_infernoise
from src.models.infernoise.predicting import infernoise_test_predictions
from src.misc import create_folder
from seed_file import seed

# seed = 2023
tuning_seed = 2023
data_label = "physionet"
batch_size = 64

# File paths
fp_notebooks_folder = "../"
fp_project_folder = join(fp_notebooks_folder, "../")
fp_processed_data_folder = join(fp_project_folder, "processed_data")
fp_output_data_folder = join(fp_processed_data_folder, "physionet")
fp_checkpoint_folder = join(fp_project_folder, "checkpoints")
fp_project_checkpoints = join(fp_checkpoint_folder, data_label)
fp_tuning = join(fp_project_checkpoints, "tuning")
fp_models = join(fp_project_checkpoints, "models")
fp_predictions = join(fp_project_checkpoints, "predictions")

# Seed filepaths
fp_cur_tune_folder = join(fp_tuning, str(tuning_seed))
create_folder(fp_cur_tune_folder)
fp_cur_model_folder = join(fp_models, str(seed))
create_folder(fp_cur_model_folder)
fp_cur_predictions_folder = join(fp_predictions, str(seed))
create_folder(fp_cur_predictions_folder)

# Load Data

In [None]:
split_dict = load_split_dict(fp_output_data_folder)

# Tuning

In [None]:
if seed == tuning_seed:
    infernoise_best_hp_dict = {}
    for time_label, target_cols in tqdm(split_dict["target_cols"].items()):
        ae_regressor = load_model(
            name=f"rue_{time_label}", fp_checkpoints=fp_cur_model_folder)
        infernoise_tuning_df = tune_infernoise(
            ae_regressor, stddev_list=[0.00001, 0.00005, 0.0001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5], 
            valid_df=split_dict["valid_df"], inputs=split_dict["feat_cols"], outputs=target_cols, 
            seed=seed, T=10, regressor_label="_"+time_label
        )
        display(infernoise_tuning_df)
        infernoise_tuning_df.to_csv(join(fp_cur_tune_folder, f"tuning_infernoise_{time_label}.csv"))
        infernoise_best_hp_dict[time_label] = infernoise_tuning_df.iloc[infernoise_tuning_df["loss"].argmin(), 0]
    joblib.dump(infernoise_best_hp_dict, join(fp_cur_tune_folder, "all_infernoise_predictor_best_hp.joblib"))
    display(infernoise_best_hp_dict)

# Prediction

In [None]:
infernoise_best_hp_dict = joblib.load(
    join(fp_cur_tune_folder, "all_infernoise_predictor_best_hp.joblib"))
for time_label, target_cols in tqdm(split_dict["target_cols"].items()):
    ae_regressor = load_model(
            name=f"rue_{time_label}", fp_checkpoints=fp_cur_model_folder)
    infernoise_valid_df = infernoise_test_predictions(
         ae_regressor, test_df=split_dict["valid_df"], 
         inputs=split_dict["feat_cols"], outputs=target_cols, regressor_label="_"+time_label, 
        seed=seed, T=10, stddev=infernoise_best_hp_dict[time_label])
    infernoise_test_df = infernoise_test_predictions(
         ae_regressor, test_df=split_dict["test_df"], 
         inputs=split_dict["feat_cols"], outputs=target_cols, regressor_label="_"+time_label, 
        seed=seed, T=10, stddev=infernoise_best_hp_dict[time_label])
    infernoise_valid_df.to_csv(join(fp_cur_predictions_folder, f"infernoise_valid_{time_label[-1]}.csv"))
    infernoise_test_df.to_csv(join(fp_cur_predictions_folder, f"infernoise_test_{time_label[-1]}.csv"))