In [18]:
from sklearn.linear_model import QuantileRegressor
import numpy as np 
import pandas as pd 
import category_encoders.ordinal
import numpy
import pandas
import sklearn
import sklearn.impute
import xgboost
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer, make_column_selector

df = pd.read_csv("./data/training_data.csv")
X = df.drop(["charges"], axis=1)
y = df["charges"]

quantiles = np.linspace(0.05, 0.95, 19)
quantiles = [np.round(x,3) for x in quantiles]
models = {}
for quantile in quantiles:
    preprocessor = ColumnTransformer([
    ("CAT_ORDCAT2_1", Pipeline([("ORDCAT2_1", category_encoders.ordinal.OrdinalEncoder())]), make_column_selector(dtype_include='category')),
    ("NUM_PNI2_2", Pipeline([("PNI2_2", sklearn.impute.SimpleImputer(strategy='median'))]), make_column_selector(dtype_include=numpy.number)),
    ])
    estimator = Pipeline([("preprocessor", preprocessor), (f"quantile_{quantile}", QuantileRegressor(quantile=quantile, alpha=0))])
    estimator.fit(X, y)
    models[quantile] = {"model": estimator}

In [20]:
from pathlib import Path 
import pickle
for quantile, model in models.items():
    folder = Path(f"./models/quantile-{quantile}")
    folder.mkdir(exist_ok=True, parents = True)
    with open(str(folder.absolute() / "model.pkl"), "wb") as f:
        pickle.dump(model["model"], f)
    with open(str(folder.absolute() / "requirements.txt"), "w") as f:
        f.write("category_encoders==2.6.0\n")
        f.write("scikit-learn==1.6.1")

In [28]:
str(list(models.items())[0][0])

'0.05'

In [29]:
import datarobot as dr
import requests
EXTERNAL_PREDICTION_ENV = '65fb005b00e2ba108b8758d0'
url = "https://app.datarobot.com/api/v2/keyValues/fromFile/"
client = dr.Client() 
headers = {
  'Authorization': f'Bearer {client.token}'
}
registerd_external_models = []
for quantile, model in models.items():
    quantile = str(quantile)
    ext_reg_model = dr.RegisteredModelVersion.create_for_external(
        name = f"quantile {quantile}", 
        target = {"type": "Regression", "name": "charges"},
        # datasets = {"trainingDataCatalogId": "67bf7d05ad1d7cda48cc2453"}, 
        registered_model_name = f"external quantile {quantile}",
        registered_model_description=f"quantile {quantile} model that has been packaged with umbrella model"
    )
    ## model pkl
    payload = dict(entityId = ext_reg_model.id, entityType = "modelPackage", category="artifact", valueType = "pickle", name = "model.pkl")
    files=[('file',('model.pkl',open(f'./models/quantile-{quantile}/model.pkl','rb'),'application/octet-stream'))  ]
    response = requests.request("POST", url, headers=headers, data=payload, files=files)
    ## requirements.txt
    payload = dict(entityId = ext_reg_model.id, entityType = "modelPackage", category="artifact", valueType = "binary", name = "requirements.txt")
    files=[
      ('file',('requirements.txt',open(f'./models/quantile-{quantile}/requirements.txt','rb'),'application/octet-stream'))
    ]
    response = requests.request("POST", url, headers=headers, data=payload, files=files)
    registerd_external_models.append(ext_reg_model)


In [35]:
deployments = []
for registerd_external_model in registerd_external_models:
    deployment = dr.Deployment.create_from_registered_model_version(
        model_package_id = registerd_external_model.id,
        label=registerd_external_model.name,
        description=f"external model deployment for {registerd_external_model.name}",
        # default_prediction_server_id=PREDICTION_SERVER.id,
        # importance="HIGH",
        prediction_environment_id='65f08b280c919297b297039c'
    )
    deployment.update_drift_tracking_settings(target_drift_enabled=True, feature_drift_enabled=False)
    # Enabling Accuracy
    deployment.update_association_id_settings(column_names=["ASSOCIATION_ID"], required_in_prediction_requests=False)
    # Enabling Challenger
    deployment.update_predictions_data_collection_settings(enabled=True)
    deployments.append(deployment)




In [38]:
import yaml 
routing_conf = []
for deployment in deployments:
    data = dict(
        deployment_id = deployment.id ,
        model_id = deployment.model["id"],
        tag = deployment.label, 
        target_type = "Regression", 
        url = f"https://app.datarobot.com/api/v2/deployments/{deployment.id}/predictions"
    )
    routing_conf.append(data)
with open("models/master-model-v2/routing_config.yaml", "w") as f:
    f.write(yaml.dump(routing_conf))

## Deployment of the Umbrella Model

In [None]:

import time
from pathlib import Path 
import pickle
import requests

def create_custom_model(name, artifact_folder, training_dataset_id, environment_id ):
    cm = dr.CustomInferenceModel.create(
        name, 
        target_name="charges",
        target_type= dr.enums.TARGET_TYPE.REGRESSION)
    cmv = dr.CustomModelVersion.create_clean(cm.id, 
                                            base_environment_id = environment_id,
                                            folder_path = artifact_folder, 
                                            # training_dataset_id=training_dataset_id
                                            )
    return cm, cmv

def build_custom_model_environment(cm, cmv):
    url = f"customModels/{cm.id}/versions/{cmv.id}/dependencyBuild/"
    build_req = client.post(url)
    build_info = dr.CustomModelVersionDependencyBuild.get_build_info(cm.id, cmv.id)
    # build = dr.CustomModelVersionDependencyBuild.start_build(cm.id, cmv.id, max_wait = 1200)
    return cm, cmv, build_info

def test_custom_model(cm, cmv, test_dataset_id):
    custom_model_test = dr.CustomModelTest.create(
        cm.id, 
        cmv.id, 
        dataset_id = testing_dataset_id, 
        network_egress_policy = dr.enums.NETWORK_EGRESS_POLICY.PUBLIC) 
    return cm, cmv
    
def register_custom_model(cm, cmv):
    registered_model_version = dr.RegisteredModelVersion.create_for_custom_model_version(
        custom_model_version_id =  cmv.id, 
        name = cm.name, 
        registered_model_name=  cm.name,
        description = cm.name,
    )
    return registered_model_version

def create_deployment(registered_model_version, prediction_environment_id):
    deployment = dr.Deployment.create_from_registered_model_version(
        registered_model_version.id,
        prediction_environment_id=prediction_environment_id,
        label = registered_model_version.name,
    )
    deployment.update_association_id_settings(["ASSOCIATION_ID"], required_in_prediction_requests=False)
    # deployment.update_drift_tracking_settings(target_drift_enabled=True, feature_drift_enabled=True)
    return deployment

def update_deployment_settings(deployment):
    deployment.update_association_id_settings(["ASSOCIATION_ID"], required_in_prediction_requests=False)
    deployment.update_drift_tracking_settings(target_drift_enabled=True, feature_drift_enabled=False)


In [None]:
environment = dr.ExecutionEnvironment.list("scikit").pop()
prediction_environment = [ pe for pe in dr.PredictionEnvironment.list() if pe.platform == "datarobotServerless"]
prediction_environment = prediction_environment[2]
prediction_environment

In [None]:
response = client.post("customModels", 
                       data = {
                        "customModelType": "inference",
                        "isProxyModel": False,
                        "isUnstructuredModelKind": True,
                        "name": 'Rental Calc Umbrella Model v2',
                        "targetName": "charges",
                        "targetType": "Regression",
                        }
                    )
umbrella_custom_model = dr.CustomInferenceModel.get(response.json()["id"])

umbrella_custom_model_version = dr.CustomModelVersion.create_clean(umbrella_custom_model.id, 
                                            base_environment_id = environment.id,
                                            folder_path = "./master-model",   
                                            training_dataset_id=training_data.id
                                            )
print("version created")
build = build_custom_model_environment(umbrella_custom_model, umbrella_custom_model_version)
while build[2].build_status == "submitted":
    build[2].refresh()
while build[2].build_status == "processing":
    build[2].refresh()
if build[2].build_status != "success":
    print("build comleted, status:")
    print(build)
registered_model_version = register_custom_model(umbrella_custom_model, umbrella_custom_model_version)
print("version registered")
deployment = create_deployment(registered_model_version, prediction_environment.id)
print("version deployed")
deployment.update_association_id_settings(["ASSOCIATION_ID"], required_in_prediction_requests=False)
deployment.update_drift_tracking_settings(target_drift_enabled=False, feature_drift_enabled=True)
print("feature drift enabled")


## tag deployments

In [None]:
MODEL_TAG = "Rental Calc Umbrella Model V3"

In [None]:
client.post(f"deployments/{deployment.id}/tags", data = {"name": "Umbrella Model", "value": MODEL_TAG})

In [43]:
with open("models/master-model-v2/routing_config.yaml", "r") as f:
    model_config = yaml.load(f, Loader = yaml.SafeLoader)

In [None]:
for model in model_config:
    dep_id = model["deployment_id"]
    try:
        client.post(f"deployments/{dep_id}/tags", data = {"name": "Umbrella Model", "value": MODEL_TAG})
    except Exception as e:
        print(e)

409 client error: {'message': 'The name is already in use for this deployment.'}
