In [None]:
%load_ext autoreload
%autoreload 2

import sys
from os.path import join
from tqdm.auto import tqdm
sys.path.append("../../")

from src.file_manager.load_data import load_split_dict
from src.models.sgpr.training import model_training_gpr
from src.models.sgpr.save_load_model import save_model_gpr, load_model_gpr
from src.models.sgpr.predicting import model_test_predictions_gpr
from src.misc import create_folder
from seed_file import seed

# seed = 2023
tuning_seed = 2023
data_label = "physionet"
batch_size = 64

# File paths
fp_notebooks_folder = "../"
fp_project_folder = join(fp_notebooks_folder, "../")
fp_processed_data_folder = join(fp_project_folder, "processed_data")
fp_output_data_folder = join(fp_processed_data_folder, "physionet")
fp_checkpoint_folder = join(fp_project_folder, "checkpoints")
fp_project_checkpoints = join(fp_checkpoint_folder, data_label)
fp_tuning = join(fp_project_checkpoints, "tuning")
fp_models = join(fp_project_checkpoints, "models")
fp_predictions = join(fp_project_checkpoints, "predictions")

# Seed filepaths
fp_cur_tune_folder = join(fp_tuning, str(tuning_seed))
create_folder(fp_cur_tune_folder)
fp_cur_model_folder = join(fp_models, str(seed))
create_folder(fp_cur_model_folder)
fp_cur_predictions_folder = join(fp_predictions, str(seed))
create_folder(fp_cur_predictions_folder)

# Load Data

In [None]:
split_dict = load_split_dict(fp_output_data_folder)

# Train SGPR

In [None]:
for time_label, target_cols in tqdm(split_dict["target_cols"].items()):
    gpr = model_training_gpr(
        predictors=split_dict["feat_cols"], pred_cols=target_cols, 
        train_df=split_dict["train_df"], valid_df = split_dict["valid_df"], 
        seed=seed, prop_inducing=0.01
    ) 
    save_model_gpr(
        model=gpr, name=f"gpr_{time_label}", 
        fp_checkpoints=fp_cur_model_folder, predictors=split_dict["feat_cols"])

# Prediction

In [None]:
for time_label, target_cols in tqdm(split_dict["target_cols"].items()):
    pred_min = int(time_label[-1])
    gpr = load_model_gpr(
        name=f"gpr_{time_label}", fp_checkpoints=fp_cur_model_folder)
    gpr_valid_pred = model_test_predictions_gpr(
        gpr=gpr, df_test=split_dict["valid_df"], pred_cols=target_cols, 
        predictors=split_dict["feat_cols"], 
        regressor_label=f"_{time_label}", pred_min=pred_min, seed=seed)
    gpr_valid_pred.to_csv(join(fp_cur_predictions_folder, f"gpr_valid_{pred_min}.csv"))
    gpr_test_pred = model_test_predictions_gpr(
        gpr=gpr, df_test=split_dict["test_df"], pred_cols=target_cols, 
        predictors=split_dict["feat_cols"], 
        regressor_label=f"_{time_label}", pred_min=pred_min, seed=seed)
    gpr_test_pred.to_csv(join(fp_cur_predictions_folder, f"gpr_test_{pred_min}.csv"))