In [2]:
import sys
from pathlib import Path
import pandas as pd
import pickle

from sklearn.ensemble import RandomForestClassifier

from stepshift.views import StepshiftedModels
from views_runs import DataPartitioner, ViewsRun

PATH = Path.cwd() 
sys.path.insert(0, str(Path(*[i for i in PATH.parts[:PATH.parts.index("views_pipeline")+1]]) / "common_utils")) # PATH_COMMON_UTILS
from set_path import setup_project_paths, setup_artifacts_paths, setup_data_paths
setup_project_paths(PATH) #adds all necessary paths to sys.path

from config_data_partitions import get_data_partitions #change to common_utils/set_partition.py
from config_hyperparameters import get_hp_config
from config_model import get_model_config

In [5]:
def train(model_config, hp_config, data_partitions):
    print("Training...")

    # Define the artifacts path manually or according to your notebook structure
    artifacts_path = Path("your_path_to_artifacts_directory")

    calib_pickle_path = artifacts_path / "model_calibration_partition.pkl"
    future_pickle_path = artifacts_path / "model_future_partition.pkl"

    if calib_pickle_path.exists() and future_pickle_path.exists():
        print("Pickle files already exist. Loading models from pickle files...")
        with open(calib_pickle_path, 'rb') as file:
            model_calibration_partition = pickle.load(file)
        with open(future_pickle_path, 'rb') as file:
            model_future_partition = pickle.load(file)

    else:
        # Assuming you have loaded the dataset before calling this function
        dataset = "models/electric_relaxation/data/raw/raw.parquet"  # Load your dataset here

        calib_partition = DataPartitioner({'calib': data_partitions["calib_partitioner_dict"]})
        future_partition = DataPartitioner({'future': data_partitions["future_partitioner_dict"]})

        base_model = RandomForestClassifier(n_estimators=hp_config["n_estimators"], n_jobs=hp_config["n_jobs"])
        stepshifter_def = StepshiftedModels(base_model, model_config["steps"], model_config["depvar"])

        model_calibration_partition = ViewsRun(calib_partition, stepshifter_def)
        model_calibration_partition.fit('calib', 'train', dataset)

        model_future_partition = ViewsRun(future_partition, stepshifter_def)
        model_future_partition.fit('future', 'train', dataset)

        assert model_calibration_partition is not None and model_future_partition is not None, "Model training failed."

        with open(calib_pickle_path, 'wb') as file:
            pickle.dump(model_calibration_partition, file)
        with open(future_pickle_path, 'wb') as file:
            pickle.dump(model_future_partition, file)

        print("Models trained and saved in artifacts folder!")

    return model_calibration_partition, model_future_partition
