In [3]:
from sklearn.linear_model import QuantileRegressor
import numpy as np 
import pandas as pd 
import category_encoders.ordinal
import numpy
import pandas
import sklearn
import sklearn.impute
import xgboost
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer, make_column_selector

df = pd.read_csv("./data/training_data.csv")
X = df.drop(["charges"], axis=1)
y = df["charges"]

quantiles = np.linspace(0.05, 0.95, 19)
quantiles = [np.round(x,3) for x in quantiles]
models = {}
for quantile in quantiles:
    preprocessor = ColumnTransformer([
    ("CAT_ORDCAT2_1", Pipeline([("ORDCAT2_1", category_encoders.ordinal.OrdinalEncoder())]), make_column_selector(dtype_include='category')),
    ("NUM_PNI2_2", Pipeline([("PNI2_2", sklearn.impute.SimpleImputer(strategy='median'))]), make_column_selector(dtype_include=numpy.number)),
    ])

    estimator = Pipeline([("preprocessor", preprocessor), (f"quantile_{quantile}", QuantileRegressor(quantile=quantile, alpha=0))])
    estimator.fit(X, y)
    models[quantile] = {"model": estimator}

In [4]:
from pathlib import Path 
import pickle
for quantile, model in models.items():
    folder = Path(f"./models/quantile-{quantile}")
    folder.mkdir(exist_ok=True, parents = True)
    with open(str(folder.absolute() / "model.pkl"), "wb") as f:
        pickle.dump(model["model"], f)
    with open(str(folder.absolute() / "requirements.txt"), "w") as f:
        f.write("category_encoders==2.6.0\n")
        f.write("scikit-learn==1.6.1")
    
    

In [1]:
import datarobot as dr 
client = dr.Client()

In [26]:
# training_data = dr.Dataset.create_from_file("/home/notebooks/storage/custom-model-simple/training_data.csv")
training_data = dr.Dataset.get("67bf7d05ad1d7cda48cc2453")
# cm_test_data = dr.Dataset.create_from_file("/home/notebooks/storage/custom-model-simple/test_data.csv")
cm_test_data = dr.Dataset.get("67bf4791518c626964cc2087")
environment = dr.ExecutionEnvironment.list("scikit").pop()
prediction_environment = [ pe for pe in dr.PredictionEnvironment.list() if pe.platform == "datarobotServerless"]
prediction_environment = prediction_environment[2]
prediction_environment

PredictionEnvironment('668401df486fcb136bf056d1', 'DataRobot Serverless Predictions', 'datarobotServerless', 'DataRobot Serverless Predictions')

In [10]:

import time
from pathlib import Path 
import pickle
import requests

def create_custom_model(name, artifact_folder, training_dataset_id, environment_id ):
    cm = dr.CustomInferenceModel.create(
        name, 
        target_name="charges",
        target_type= dr.enums.TARGET_TYPE.REGRESSION)
    cmv = dr.CustomModelVersion.create_clean(cm.id, 
                                            base_environment_id = environment_id,
                                            folder_path = artifact_folder, 
                                            # training_dataset_id=training_dataset_id
                                            )
    return cm, cmv

def build_custom_model_environment(cm, cmv):
    url = f"customModels/{cm.id}/versions/{cmv.id}/dependencyBuild/"
    build_req = client.post(url)
    build_info = dr.CustomModelVersionDependencyBuild.get_build_info(cm.id, cmv.id)
    # build = dr.CustomModelVersionDependencyBuild.start_build(cm.id, cmv.id, max_wait = 1200)
    return cm, cmv, build_info

def test_custom_model(cm, cmv, test_dataset_id):
    custom_model_test = dr.CustomModelTest.create(
        cm.id, 
        cmv.id, 
        dataset_id = testing_dataset_id, 
        network_egress_policy = dr.enums.NETWORK_EGRESS_POLICY.PUBLIC) 
    return cm, cmv
    
def register_custom_model(cm, cmv):
    registered_model_version = dr.RegisteredModelVersion.create_for_custom_model_version(
        custom_model_version_id =  cmv.id, 
        name = cm.name, 
        registered_model_name=  cm.name,
        description = cm.name,
    )
    return registered_model_version

def create_deployment(registered_model_version, prediction_environment_id):
    deployment = dr.Deployment.create_from_registered_model_version(
        registered_model_version.id,
        prediction_environment_id=prediction_environment_id,
        label = registered_model_version.name,
    )
    deployment.update_association_id_settings(["ASSOCIATION_ID"], required_in_prediction_requests=False)
    # deployment.update_drift_tracking_settings(target_drift_enabled=True, feature_drift_enabled=True)
    return deployment

def update_deployment_settings(deployment):
    deployment.update_association_id_settings(["ASSOCIATION_ID"], required_in_prediction_requests=False)
    deployment.update_drift_tracking_settings(target_drift_enabled=True, feature_drift_enabled=False)


In [27]:
custom_models= []
for quantile, model in models.items():
    name = f"quantile-{quantile}"
    folder = Path(f"./models/quantile-{quantile}")
    cm_task = create_custom_model(name, folder,  training_data.id, environment.id)
    custom_models.append(cm_task)

In [32]:
build[2].build_status

'submitted'

In [47]:
builds = [build_custom_model_environment(cm, cmv) for cm, cmv in custom_models]
for build in builds:
    while build[2].build_status == "submitted":
        build[2].refresh()
    while build[2].build_status == "processing":
        build[2].refresh()
    if build[2].build_status != "success":
        print(build)

In [20]:
custom_models = [(cm, cm.latest_version) for cm in custom_models]

In [26]:
registered_models = [register_custom_model(cm, cmv) for cm, cmv in custom_models]
    

registering custom model CustomModelVersion('v1.0')
registering custom model CustomModelVersion('v1.0')
registering custom model CustomModelVersion('v1.0')
registering custom model CustomModelVersion('v1.0')
registering custom model CustomModelVersion('v1.0')
registering custom model CustomModelVersion('v1.0')
registering custom model CustomModelVersion('v1.0')
registering custom model CustomModelVersion('v1.0')
registering custom model CustomModelVersion('v1.0')


In [0]:
[cm.delete() for cm, cmv in custom_models]

In [27]:
# registered_models = [ r for r in dr.RegisteredModel.list() if "quantile-" in str(r) ]
# registered_models = [r.list_versions()[0] for r in registered_models]

In [2]:
# import datarobot as dr 
# deployments = [d for d in dr.Deployment.list() if 'quantile-0' in str(d)]

In [43]:
deployments = [ create_deployment(r, prediction_environment.id) for r in registered_models]

creating deployment quantile-0.05
creating deployment quantile-0.1
creating deployment quantile-0.15
creating deployment quantile-0.2
creating deployment quantile-0.25
creating deployment quantile-0.3
creating deployment quantile-0.35
creating deployment quantile-0.4
creating deployment quantile-0.45
creating deployment quantile-0.5
creating deployment quantile-0.55
creating deployment quantile-0.6
creating deployment quantile-0.65
creating deployment quantile-0.7
creating deployment quantile-0.75
creating deployment quantile-0.8
creating deployment quantile-0.85
creating deployment quantile-0.9
creating deployment quantile-0.95


In [0]:
[update_deployment_settings(d) for d in deployments]

In [None]:
import yaml 
routing_conf = []
for deployment in deployments:
    data = dict(
        deployment_id = deployment.id 
        model_id = deployment.model["id"]
        tag = deployment.label, 
        target_type = "Regression", 
        url = f"https://app.datarobot.com/api/v2/deployments/{deployment.id}/predictions"
    )
    routing_conf.append(data)
with open("./models/master-model/routing_config.yaml", "w") as f:
    f.write(yaml.dump(routing_conf))

## Deploy umbrella model

In [18]:
environment = dr.ExecutionEnvironment.list("scikit").pop()
prediction_environment = [ pe for pe in dr.PredictionEnvironment.list() if pe.platform == "datarobotServerless"]
prediction_environment = prediction_environment[2]
prediction_environment

PredictionEnvironment('668401df486fcb136bf056d1', 'DataRobot Serverless Predictions', 'datarobotServerless', 'DataRobot Serverless Predictions')

In [27]:
response = client.post("customModels", 
                       data = {
                        "customModelType": "inference",
                        "isProxyModel": False,
                        "isUnstructuredModelKind": True,
                        "name": 'Rental Calc Umbrella Model v2',
                        "targetName": "charges",
                        "targetType": "Regression",
                        }
                    )
umbrella_custom_model = dr.CustomInferenceModel.get(response.json()["id"])

umbrella_custom_model_version = dr.CustomModelVersion.create_clean(umbrella_custom_model.id, 
                                            base_environment_id = environment.id,
                                            folder_path = "./master-model",   
                                            training_dataset_id=training_data.id
                                            )
print("version created")
build = build_custom_model_environment(umbrella_custom_model, umbrella_custom_model_version)
while build[2].build_status == "submitted":
    build[2].refresh()
while build[2].build_status == "processing":
    build[2].refresh()
if build[2].build_status != "success":
    print("build comleted, status:")
    print(build)
registered_model_version = register_custom_model(umbrella_custom_model, umbrella_custom_model_version)
print("version registered")

version created
version registered


In [29]:
deployment = create_deployment(registered_model_version, prediction_environment.id)
print("version deployed")
deployment.update_association_id_settings(["ASSOCIATION_ID"], required_in_prediction_requests=False)
deployment.update_drift_tracking_settings(target_drift_enabled=False, feature_drift_enabled=True)
print("feature drift enabled")

version deployed
feature drift enabled


In [0]:
registered_model_version.id

In [0]:
deployment.id