# Load Packages

In [None]:
%load_ext autoreload
%autoreload 2

import sys
from os.path import join
from tqdm.auto import tqdm
import joblib
sys.path.append("../../")

from src.file_manager.load_data import load_split_dict
from src.models.rue.tuning import model_tuning_regressor, model_tuning_decoder
from src.models.rue.training import model_training_predictor, model_training_decoder
from src.models.rue.save_load_model import save_model, load_model
from src.models.rue.predicting import model_test_predictions
from src.misc import create_folder

seed = 2023
tuning_seed = 2023
data_label = "mimic"
batch_size = 64

# File paths
fp_notebooks_folder = "../"
fp_project_folder = join(fp_notebooks_folder, "../")
fp_data_folder = join(fp_project_folder, "../", "data")
fp_processed_folder = join(fp_project_folder, "processed_data", data_label)
fp_output_data_folder = join(fp_data_folder, data_label)
fp_checkpoint_folder = join(fp_project_folder, "checkpoints")
fp_project_checkpoints = join(fp_checkpoint_folder, data_label)
fp_tuning = join(fp_project_checkpoints, "tuning")
fp_models = join(fp_project_checkpoints, "models")
fp_predictions = join(fp_project_checkpoints, "predictions")

# Seed filepaths
fp_cur_tune_folder = join(fp_tuning, str(tuning_seed))
create_folder(fp_cur_tune_folder)
fp_cur_model_folder = join(fp_models, str(seed))
create_folder(fp_cur_model_folder)
fp_cur_predictions_folder = join(fp_predictions, str(seed))
create_folder(fp_cur_predictions_folder)

# Load Data

In [None]:
split_dict = joblib.load(join(fp_processed_folder, "mimic_split_dict.joblib"))
predictors = ['ABPdias (mmHg)_mean Min0', 'RESP (bpm)_mean Min0', 'ABPmean (mmHg)_mean Min0', 'ABPsys (mmHg)_mean Min0', 'SpO2 (%)_mean Min0', 'HR (bpm)_mean Min0', 'ABPdias (mmHg)_std Min0', 'RESP (bpm)_std Min0', 'ABPmean (mmHg)_std Min0', 'ABPsys (mmHg)_std Min0', 'SpO2 (%)_std Min0', 'HR (bpm)_std Min0', 'ABPdias (mmHg)_mean Min1', 'RESP (bpm)_mean Min1', 'ABPmean (mmHg)_mean Min1', 'ABPsys (mmHg)_mean Min1', 'SpO2 (%)_mean Min1', 'HR (bpm)_mean Min1', 'ABPdias (mmHg)_std Min1', 'RESP (bpm)_std Min1', 'ABPmean (mmHg)_std Min1', 'ABPsys (mmHg)_std Min1', 'SpO2 (%)_std Min1', 'HR (bpm)_std Min1', 'ABPdias (mmHg)_mean Min2', 'RESP (bpm)_mean Min2', 'ABPmean (mmHg)_mean Min2', 'ABPsys (mmHg)_mean Min2', 'SpO2 (%)_mean Min2', 'HR (bpm)_mean Min2', 'ABPdias (mmHg)_std Min2', 'RESP (bpm)_std Min2', 'ABPmean (mmHg)_std Min2', 'ABPsys (mmHg)_std Min2', 'SpO2 (%)_std Min2', 'HR (bpm)_std Min2', 'ABPdias (mmHg)_mean Min3', 'RESP (bpm)_mean Min3', 'ABPmean (mmHg)_mean Min3', 'ABPsys (mmHg)_mean Min3', 'SpO2 (%)_mean Min3', 'HR (bpm)_mean Min3', 'ABPdias (mmHg)_std Min3', 'RESP (bpm)_std Min3', 'ABPmean (mmHg)_std Min3', 'ABPsys (mmHg)_std Min3', 'SpO2 (%)_std Min3', 'HR (bpm)_std Min3', 'ABPdias (mmHg)_mean Min4', 'RESP (bpm)_mean Min4', 'ABPmean (mmHg)_mean Min4', 'ABPsys (mmHg)_mean Min4', 'SpO2 (%)_mean Min4', 'HR (bpm)_mean Min4', 'ABPdias (mmHg)_std Min4', 'RESP (bpm)_std Min4', 'ABPmean (mmHg)_std Min4', 'ABPsys (mmHg)_std Min4', 'SpO2 (%)_std Min4', 'HR (bpm)_std Min4']

# Tune and Train Predictor

## Tune

In [None]:
if seed == tuning_seed:
    all_rue_predictor_best_hp = {}
    for time_label, time_info_dict in tqdm(split_dict.items(), total=len(split_dict)):
        rue_predictor_tuning_df, rue_best_hp = model_tuning_regressor(
            param_grid=dict(
                encoder_width = [256, 512, 1028], # , 256, 512
                encoder_depth = [1, 2, 3], #  3, 4
                decoder_width = [128],
                decoder_depth = [2]
            ), predictors=predictors, pred_cols=time_info_dict["outputs"], 
            train_df=time_info_dict["train_df"], valid_df=time_info_dict["valid_df"], seed=seed,
            batch_size=batch_size, max_epochs=10000, verbose=1, patience=20
        )
        display(rue_predictor_tuning_df)
        rue_predictor_tuning_df.to_csv(join(fp_cur_tune_folder, f"tuning_rue_{time_label}.csv"))
        all_rue_predictor_best_hp[time_label] = rue_best_hp
    joblib.dump(all_rue_predictor_best_hp, join(fp_cur_tune_folder, "all_rue_predictor_best_hp.joblib"))

## Train

In [None]:
all_rue_predictor_best_hp = joblib.load(join(fp_cur_tune_folder, "all_rue_predictor_best_hp.joblib"))
all_rue_decoder_best_hp = {}
for time_label, time_info_dict in tqdm(split_dict.items(), total=len(split_dict)):
    best_predictor_hp = all_rue_predictor_best_hp[time_label]
    ae_regressor = model_training_predictor(
        best_predictor_hp, predictors=predictors, pred_cols=time_info_dict["outputs"], 
        train_df=time_info_dict["train_df"], valid_df = time_info_dict["valid_df"], seed=seed,
        batch_size=batch_size, max_epochs=10000, verbose=1, patience=20
    ) 
    save_model(model=ae_regressor, name=f"rue_predictor_{time_label}",
               fp_checkpoints=join(fp_cur_model_folder, str(seed)), override=True)

# Tune and Train Decoder

## Tune

In [None]:
if seed == tuning_seed:
    all_rue_decoder_best_hp = {}
    for time_label, time_info_dict in tqdm(split_dict.items(), total=len(split_dict)):
        best_predictor_hp = all_rue_predictor_best_hp[time_label]
        prev_model = load_model(name=f"rue_predictor_{time_label}", fp_checkpoints=fp_cur_model_folder)
        rue_tuning_df, rue_best_hp = model_tuning_decoder(
            param_grid=dict(
                encoder_width = [best_predictor_hp["encoder_width"]], # , 256, 512
                encoder_depth = [best_predictor_hp["encoder_depth"]], #  3, 4
                decoder_width = [256, 512, 1028], 
                decoder_depth = [1, 2, 3]
            ), predictors=predictors, pred_cols=time_info_dict["outputs"], 
            train_df=time_info_dict["train_df"], valid_df=time_info_dict["valid_df"], seed=seed,
            max_epochs=10000, verbose=1, patience=20, prev_model=prev_model
        )
        display(rue_tuning_df)
        rue_tuning_df.to_csv(join(fp_cur_tune_folder, f"tuning_rue_decoder_{time_label}.csv"))
        all_rue_decoder_best_hp[time_label] = rue_best_hp
    joblib.dump(all_rue_decoder_best_hp, join(fp_cur_tune_folder, "all_rue_decoder_best_hp.joblib"))

## Train

In [None]:
all_rue_decoder_best_hp = joblib.load(join(fp_cur_tune_folder, "all_rue_decoder_best_hp.joblib"))
for time_label, time_info_dict in tqdm(split_dict.items(), total=len(split_dict)):
    prev_model = load_model(name=f"rue_predictor_{time_label}", fp_checkpoints=fp_cur_model_folder)
    hp_dict = all_rue_decoder_best_hp[time_label]
    ae_regressor = model_training_decoder(
        hp_dict, predictors=predictors, pred_cols=time_info_dict["outputs"], 
        train_df=time_info_dict["train_df"], valid_df = time_info_dict["valid_df"], seed=seed, prev_model=prev_model,
        batch_size=batch_size, max_epochs=10000, verbose=1, patience=20
    ) 
    save_model(model=ae_regressor, name=f"rue_{time_label}", fp_checkpoints=fp_cur_model_folder, override=True)

# Prediction

In [None]:
for time_label, time_info_dict in tqdm(split_dict.items(), total=len(split_dict)):
    ae_regressor = load_model(name=f"rue_{time_label}", fp_checkpoints=fp_cur_model_folder)
    rue_valid_df = model_test_predictions(
        ae_regressor, df_train=time_info_dict["train_df"], df_test=time_info_dict["valid_df"], 
        pred_cols=time_info_dict["outputs"], predictors=predictors, regressor_label="_"+time_label, pred_min=int(time_label[-1]), T=10, seed=seed)
    rue_test_df = model_test_predictions(
        ae_regressor, df_train=time_info_dict["train_df"], df_test=time_info_dict["test_df"], 
        pred_cols=time_info_dict["outputs"], predictors=predictors, regressor_label="_"+time_label, pred_min=int(time_label[-1]), T=10, seed=seed)
    # display(rue_test_df)
    rue_valid_df.to_csv(join(fp_predictions, f"rue_valid_{time_label[-1]}.csv"))
    rue_test_df.to_csv(join(fp_predictions, f"rue_test_{time_label[-1]}.csv"))
