## Create Registered Model Versions

Since we want to monitor predictions and maybe accuracy, we will want to register each sub model to DataRobot.  This could take a little bit of time.  In addition to registered the model, we'll add each submodel artifacts for safe keepping

In [24]:
# deployments = dr.Deployment.list() 
# q_deployments = [q for q in deployments if "ADJ_PRED_RENTAL_DAYS" in str(q)]
# for d in q_deployments:
#     d.delete() 
#     dr.RegisteredModel.archive(d.model_package["registered_model_id"])

In [1]:
from pathlib import Path
import yaml
import time
import numpy as np
import datarobot as dr 
import pandas as pd 
import datarobot as dr 
client = dr.Client() 

deployment_conf_path = Path("./deployment_conf.yaml")
if deployment_conf_path.exists():
    print("deployment_conf.yaml exists")
    with open(deployment_conf_path, "r") as f:
        deployment_conf = yaml.load(f, Loader = yaml.SafeLoader)
else:
    print("deployment_conf.yaml does not exists, making a new one")
    quantiles = np.linspace(0.05, 0.95, 19)
    quantiles = [np.round(x,3) for x in quantiles]
    deployment_conf = dict(deployments = [{"prediction_column": f"ADJ_PRED_RENTAL_DAYS_Q_{q}".replace(".", "_")} for q in quantiles])

deployment_conf.yaml does not exists, making a new one


In [2]:
# training_dataset = dr.Dataset.create_from_file("./data/training_data.csv")
training_dataset_id = deployment_conf.get("training_dataset_id")

training_df = pd.read_csv("./data/training_data.csv")
## we are limiting to 100 versions of a dataset.  so we need check number of versions and delete old ones
def purge_old_dataset_version(dataset_url):
    dataset_versions = client.get(dataset_url).json()
    if "offset" in dataset_url:
        print("off set present, deleting old versions")
        for d in dataset_versions["data"]:
            print(f"datasets/{d['datasetId']}/versions/{d['versionId']}/")
            delete_req = client.delete(f"datasets/{d['datasetId']}/versions/{d['versionId']}/")
            print(delete_req)
    if next := dataset_versions.get("next"):
        print(next)
        print(dataset_url)
        query_parameters = next.split("/")[-1]
        next_url = os.path.join( dataset_url.split("?")[0], query_parameters)
        print(next_url)
        purge_old_dataset_version(next_url)          

if training_dataset_id:
    print("purging old versions if number of versions >= 100")
    purge_old_dataset_version(f"datasets/{training_dataset_id}/versions?limit=99")
    print("register new dataset version")
    training_dataset = dr.Dataset.create_version_from_in_memory_data(training_dataset_id, training_df)
    print("done registereing dataset version")
else:
    print("register dataset")
    training_dataset = dr.Dataset.create_from_in_memory_data(training_df, fname = f"Rental Calc Training Data")
    print("done registering dataset ")
    
deployment_conf["training_dataset_id"] = training_dataset.id

register dataset
done registering dataset 


In [3]:
if pred_env_id := deployment_conf.get("prediction_environment_id"):
    print("prediction environment exists")
    prediction_environment = dr.PredictionEnvironment.get(pred_env_id)
else:
    print("prediction environment doesn't exist, creating one")
    prediction_environment = dr.PredictionEnvironment.create(name = "Rental Calc External Prediction Environment", 
                                platform = dr.enums.PredictionEnvironmentPlatform.OTHER,
                                description = "DataRobot Codespace Running Scheduled Notebooks")
    deployment_conf["prediction_environment_id"] = prediction_environment.id

prediction environment doesn't exist, creating one


In [4]:
import datetime
registerd_external_models = []
ts = datetime.datetime.now()
for model in deployment_conf["deployments"]:
    print(model)
    registered_model_id = model.get("registered_model_id")
    quantile = model["prediction_column"]
    if registered_model_id:
        registered_model_name = None
    else:
        registered_model_name = f"external {quantile} {ts}"
    ext_reg_model = dr.RegisteredModelVersion.create_for_external(
        name = quantile, 
        registered_model_id = registered_model_id,
        target = {"type": "Regression", "name": "charges"},
        datasets = {"trainingDataCatalogId": deployment_conf["training_dataset_id"]}, 
        registered_model_name = registered_model_name,
        registered_model_description=f"{quantile} model that has been packaged with umbrella model"
    )
    model["registered_model_id"] = ext_reg_model.registered_model_id 
    model["registered_model_version_id"] = ext_reg_model.id 
    registerd_external_models.append(ext_reg_model)
    print(model)
    ## need to pause jsut to not choke the server
    time.sleep(5)

{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_05'}
{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_05', 'registered_model_id': '6835d9a5286522231216decc', 'registered_model_version_id': '6835d9a5286522231216dece'}
{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_1'}
{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_1', 'registered_model_id': '6835d9ad1cd34a250016deee', 'registered_model_version_id': '6835d9ad1cd34a250016def0'}
{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_15'}
{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_15', 'registered_model_id': '6835d9b5a36c3ebef2397c0b', 'registered_model_version_id': '6835d9b6a36c3ebef2397c0d'}
{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_2'}
{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_2', 'registered_model_id': '6835d9be8a5772804870a7de', 'registered_model_version_id': '6835d9bf8a5772804870a7e0'}
{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_25'}
{'prediction_column': 'ADJ_PRED_RENTAL_DAYS_Q_0_25', 'registered_model_id': '6835d9c728

In [5]:
deployments = []
for model in deployment_conf["deployments"]:
    if deployment_id := model.get("deployment_id"):
        print("replacing model")
        deployment = dr.Deployment.get(deployment_id)
        deployment.replace(new_registered_model_version_id = model["registered_model_version_id"],
                           reason = "SCHEDULED_REFRESH")
    else:
        print("deploying model")
        deployment = dr.Deployment.create_from_registered_model_version(
            model_package_id = model["registered_model_version_id"],
            label= model["prediction_column"],
            description=f"external model deployment for {model['prediction_column']}",
            prediction_environment_id=prediction_environment.id
        )
    
    # # Enabling Accuracy
    deployment.update_association_id_settings(column_names=["ASSOCIATION_ID"], required_in_prediction_requests=False)
    # # Enabling Challenger
    deployment.update_predictions_data_collection_settings(enabled=True)
    # ## enable data drift and prediciton trakcign (really really slow)
    # deployment.update_drift_tracking_settings(target_drift_enabled=True, feature_drift_enabled=True)
    # ## direct patch of deployment -> seems to go quicker
    dep_patch = client.patch(f"deployments/{deployment.id}/settings/", data = {"targetDrift":{"enabled":True},"featureDrift":{"enabled":True,"featureSelection":"auto","trackedFeatures":[]}})
    model["deployment_id"] = deployment.id
    model["target_type"] = "Regression"
    model["url"] = f"https://app.datarobot.com/console-nextgen/deployments/{deployment.id}"
    deployments.append(deployment)

deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model
deploying model


In [6]:
with open("deployment_conf.yaml", "w") as f:
    f.write(yaml.dump(deployment_conf))