In [None]:
import os
import sys
from pathlib import Path

import datarobot as dr
from dotenv import load_dotenv

# The notebook should be executed from the project root directory
if "_correct_path" not in locals():
    os.chdir("..")
    sys.path.append(".")
    print(f"changed dir to {Path('.').resolve()})")
    _correct_path = True
load_dotenv()
client = dr.Client()

In [3]:
from datarobotx.idp.use_cases import get_or_create_use_case

from infra.settings_main import use_case_args

if "DATAROBOT_DEFAULT_USE_CASE" in os.environ:
    use_case_id = os.environ["DATAROBOT_DEFAULT_USE_CASE"]
else:
    use_case_id = get_or_create_use_case(
        endpoint=client.endpoint,
        token=client.token,
        name=use_case_args.resource_name,
        description=use_case_args.description,
    )

# Data Ingest and Preparation

In [None]:
import pandas as pd

from infra.settings_datasets import training_dataset


def preprocess_dataset(dataset: pd.DataFrame) -> pd.DataFrame:
    """Sample function showing how to execute arbitrary code on your dataset

    Parameters
    ----------
    dataset : pd.DataFrame
        A dataset we will preprocess

    Returns
    -------
    pd.DataFrame :
        Preprocessed dataset
    """
    # 必要な前処理はここに記載する
    # dataset["balance_as_percent_of_income"] = (
    #    dataset["revol_bal"] / dataset["annual_inc"]
    #)
    #dataset.drop(columns=["mths_since_last_major_derog", "policy_code"], inplace=True)

    return dataset


# Replace as needed with your own data ingest and/or preparation logic
df = preprocess_dataset(pd.read_csv(training_dataset.file_path))

In [None]:
from datarobotx.idp.datasets import get_or_create_dataset_from_df

print("Uploading training data to AI Catalog...")
training_dataset_id = get_or_create_dataset_from_df(
    endpoint=client.endpoint,
    token=client.token,
    data_frame=df,
    name=training_dataset.resource_name,
    use_cases=use_case_id,
)

# Model Training

In [None]:
from infra.common.schema import (
    AdvancedOptionsArgs,
    AnalyzeAndModelArgs,
    AutopilotRunArgs,
)
from infra.settings_main import project_name

autopilotrun_args = AutopilotRunArgs(
    name=f"Predictive AI Starter Project [{project_name}]",
    advanced_options_config=AdvancedOptionsArgs(seed=42),
    analyze_and_model_config=AnalyzeAndModelArgs(
        metric="RMSE",
        mode=dr.enums.AUTOPILOT_MODE.QUICK,
        target="Sales",
        worker_count=-1,
    ),
)

registered_model_name = f"Predictive AI Starter Registered Model [{project_name}]"

In [None]:
from datarobotx.idp.autopilot import get_or_create_autopilot_run
from datarobotx.idp.registered_model_versions import (
    get_or_create_registered_leaderboard_model_version,
)

print("Running Autopilot...")
project_id = get_or_create_autopilot_run(
    endpoint=client.endpoint,
    token=client.token,
    dataset_id=training_dataset_id,
    use_case=use_case_id,
    **autopilotrun_args.model_dump(),
)

model_id = dr.ModelRecommendation.get(project_id).model_id

print("Registered recommended model...")
registered_model_version_id = get_or_create_registered_leaderboard_model_version(
    endpoint=client.endpoint,
    token=client.token,
    model_id=model_id,
    registered_model_name=registered_model_name,
)

# Export settings for provisioning app, other dependent resources

In [None]:
import yaml

from infra.settings_main import model_training_output_path
from starter.i18n import gettext
from starter.schema import AppSettings

print("Capturing settings required to deploy the frontend...")
app_settings = AppSettings(
    registered_model_version_id=registered_model_version_id,
    registered_model_name=registered_model_name,
    use_case_id=use_case_id,
    project_id=project_id,
    model_id=model_id,
    target=autopilotrun_args.analyze_and_model_config.target,
    training_dataset_id=training_dataset_id,
    page_title=gettext("Predictive AI Starter"),
    page_description=gettext(
        "An application designed to simplify interactions with predictions while providing clear insights into the key drivers behind those predictions."
    ),
)

with open(model_training_output_path, "w") as f:
    yaml.dump(app_settings.model_dump(), f, allow_unicode=True)