## Create sub models

This is just a toy example using open source data to generate sub models that will be utilized by one umbrella model

In [2]:
from sklearn.linear_model import QuantileRegressor
import numpy as np 
import pandas as pd 
import category_encoders.ordinal
import numpy
import pandas
import sklearn
import sklearn.impute
import xgboost
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer, make_column_selector
import datetime 

df = pd.read_csv("./data/training_data.csv")
X = df.drop(["charges"], axis=1)
y = df["charges"]

quantiles = np.linspace(0.05, 0.95, 19)
quantiles = [np.round(x,3) for x in quantiles]
models = {}
for quantile in quantiles:
    preprocessor = ColumnTransformer([
    ("CAT_ORDCAT2_1", Pipeline([("ORDCAT2_1", category_encoders.ordinal.OrdinalEncoder())]), make_column_selector(dtype_include='category')),
    ("NUM_PNI2_2", Pipeline([("PNI2_2", sklearn.impute.SimpleImputer(strategy='median'))]), make_column_selector(dtype_include=numpy.number)),
    ])
    estimator = Pipeline([("preprocessor", preprocessor), (f"quantile_{quantile}", QuantileRegressor(quantile=quantile, alpha=0))])
    estimator.fit(X, y)
    models[quantile] = {"model": estimator}

In [3]:
from pathlib import Path 
import pickle
for quantile, model in models.items():
    folder = Path(f"./custom-model/models/quantile-{quantile}")
    folder.mkdir(exist_ok=True, parents = True)
    with open(str(folder.absolute() / "model.pkl"), "wb") as f:
        pickle.dump(model["model"], f)
    with open(str(folder.absolute() / "requirements.txt"), "w") as f:
        f.write("category_encoders==2.6.0\n")
        f.write("scikit-learn==1.6.1")

## Create Registered Model Versions

Since we want to monitor predictions and maybe accuracy, we will want to register each sub model to DataRobot.  This could take a little bit of time.  In addition to registered the model, we'll add each submodel artifacts for safe keepping

In [4]:
import datarobot as dr
import requests
EXTERNAL_PREDICTION_ENV = '65fb005b00e2ba108b8758d0'
url = "https://app.datarobot.com/api/v2/keyValues/fromFile/"
client = dr.Client() 
headers = {
  'Authorization': f'Bearer {client.token}'
}
registerd_external_models = []
## this takes 
ts = datetime.datetime.now()
for quantile, model in models.items():
    quantile = str(quantile)
    ext_reg_model = dr.RegisteredModelVersion.create_for_external(
        name = f"quantile-{quantile}", 
        target = {"type": "Regression", "name": "charges"},
        # datasets = {"trainingDataCatalogId": "682de398a9f154ece8d25a8d"}, 
        registered_model_name = f"external quantile {quantile} {ts}",
        registered_model_description=f"quantile {quantile} model that has been packaged with umbrella model"
    )
    ## model pkl
    payload = dict(entityId = ext_reg_model.id, entityType = "modelPackage", category="artifact", valueType = "pickle", name = "model.pkl")
    files=[('file',('model.pkl',open(f'./custom-model/models/quantile-{quantile}/model.pkl','rb'),'application/octet-stream'))  ]
    response = requests.request("POST", url, headers=headers, data=payload, files=files)
    ## requirements.txt
    payload = dict(entityId = ext_reg_model.id, entityType = "modelPackage", category="artifact", valueType = "binary", name = "requirements.txt")
    files=[
      ('file',('requirements.txt',open(f'./custom-model/models/quantile-{quantile}/requirements.txt','rb'),'application/octet-stream'))
    ]
    response = requests.request("POST", url, headers=headers, data=payload, files=files)
    registerd_external_models.append(ext_reg_model)


## Deploy each sub model

Even though datarobot won't host the model, we are creating a special kind of deployment in DataRobot - an external deployment.  We will be reporting stuff back to this external deployment in datarobot so datarobot can keep track of feature drift, prediction tracking as well as model accuracy (if we ever provide actuals).  

For each deployment we need to record the deployment id and the registered model version id (aka model package id).  This detail will be used later when we need to report stuff back to datarobot. 

In [5]:
deployments = []
for registerd_external_model in registerd_external_models:
    deployment = dr.Deployment.create_from_registered_model_version(
        model_package_id = registerd_external_model.id,
        label=registerd_external_model.name,
        description=f"external model deployment for {registerd_external_model.name}",
        # default_prediction_server_id=PREDICTION_SERVER.id,
        # importance="HIGH",
        prediction_environment_id='65f08b280c919297b297039c'
    )
    deployment.update_drift_tracking_settings(target_drift_enabled=True, feature_drift_enabled=False)
    # Enabling Accuracy
    deployment.update_association_id_settings(column_names=["ASSOCIATION_ID"], required_in_prediction_requests=False)
    # Enabling Challenger
    deployment.update_predictions_data_collection_settings(enabled=True)
    deployments.append(deployment)




## Record the info

The recorded detail will be used in our umbrella model

In [6]:
import yaml 
routing_conf = []
for deployment in deployments:
    data = dict(
        deployment_id = deployment.id ,
        model_id = deployment.model["id"],
        tag = deployment.label, 
        target_type = "Regression", 
        url = f"https://app.datarobot.com/console-nextgen/deployments/{deployment.id}"
    )
    routing_conf.append(data)
with open("custom-model/routing_config.yaml", "w") as f:
    f.write(yaml.dump(routing_conf))

## Test out the umbrella model in Codespace before taking it to DataRobot

I ALWAYS test models out locally before taking them to DataRobot, especially unstructured models.  We will compelte this testing using DataRobot User Models (aka DRUM).  This will let us start an inference server locally and interact with the models exactly as we would interact with it when hosted in DR, or deployed to AKS via DR management agents.


This approach allows me to
* Test the exact payload i would send to the production deployment
* log, log, and log some more -> all will be visible in the codespace terminal.

In order to start the inference server, 
* open terminal in the dr codespace
* run ./start_server.sh
* Review the start up logs

IMPORTANT anytime you change your custom.py or associated files, you will need to stop and start the prediction server. Kill the prediction server in terminal with CTRL+C

the `./start_server.sh` script will set several environment variable to mock mock the reporting of prediction and service data back to datarobot (by utilizing the FS Spooler via DataRobot MLOps client).  If you review the `start_server.sh` in the `custom-model` folder, you will see different environment variables, which will utilize event hub for curating statistics.  





In [7]:
import pandas as pd 
df = pd.read_csv("./data/test_data.csv")

In [8]:
import requests
import pprint 
try: 
    pprint.pprint(requests.get("http://0.0.0.0:12345/info").json())
except:
    print("did you run `./start_server.sh` in cli?")

{'codeDir': '/home/notebooks/storage/custom-model',
 'drumServer': 'flask',
 'drumVersion': '1.16.3',
 'language': 'python',
 'modelMetadata': {'name': 'Rental Calc Master',
                   'runtimeParameterDefinitions': [{'credentialType': 'api_token',
                                                    'description': 'DataRobot '
                                                                   'API Token',
                                                    'fieldName': 'DATAROBOT_API_TOKEN',
                                                    'type': 'credential'}],
                   'targetType': 'unstructured',
                   'type': 'inference'},
 'predictor': None,
 'targetType': 'unstructured'}


In [9]:
response = requests.post("http://0.0.0.0:12345/predictUnstructured", headers = {"Content-Type": "application/text"}, data = df.to_csv(index = False))
pd.DataFrame(response.json())

Unnamed: 0,quantile-0.05,quantile-0.1,quantile-0.15,quantile-0.2,quantile-0.25,quantile-0.3,quantile-0.35,quantile-0.4,quantile-0.45,quantile-0.5,quantile-0.55,quantile-0.6,quantile-0.65,quantile-0.7,quantile-0.75,quantile-0.8,quantile-0.85,quantile-0.9,quantile-0.95
0,12503.586519,12775.434805,12869.411840,12998.213481,13060.075630,13206.862419,13310.110873,13473.861308,13634.031648,13749.553988,13915.441204,14084.625732,14424.821033,15019.760050,24131.744158,30808.644178,34412.034860,38023.057429,40604.791502
1,10178.359425,10410.703281,10508.447772,10651.402353,10748.259786,10907.633850,11027.417515,11189.075384,11359.919913,11474.704808,11651.276214,11814.856327,12099.022179,12559.703423,22084.073808,30156.926026,34839.891988,38695.283152,41886.072633
2,6304.556112,6484.504362,6595.293705,6725.987972,6842.534145,6978.093346,7104.548122,7237.313440,7371.523925,7473.698931,7631.935627,7779.690485,8071.328253,8503.775014,18716.920185,24192.602759,27666.480634,31534.640269,35120.384638
3,3913.250994,4066.654767,4180.300142,4286.268661,4381.201463,4496.143977,4615.924871,4727.555200,4849.812433,4928.525483,5065.445618,5217.410659,5629.419304,6248.355645,19136.039225,24353.443205,28580.936715,32797.399365,35919.509267
4,1083.341267,1190.806742,1311.529900,1429.296487,1563.631994,1684.411646,1821.294344,1924.988933,2045.610439,2123.482210,2266.663986,2407.853378,2755.195186,3221.127993,16219.792219,21746.739952,26321.260732,30683.352609,34524.332250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,6227.861797,6412.251280,6516.784818,6630.698544,6712.360404,6844.515666,6960.557984,7091.152138,7241.462979,7326.451745,7475.990438,7639.954374,8055.359185,8703.489241,21499.539058,28880.585003,34300.824762,38591.777257,41430.168487
130,2557.746338,2698.707916,2812.100878,2896.443372,2964.932431,3064.766540,3176.018506,3273.749419,3392.072808,3451.058248,3571.085013,3730.758985,4258.875985,5068.301033,20354.188619,25831.495566,31000.402127,35538.095387,38104.716852
131,8153.666683,8351.973701,8458.233466,8613.583251,8750.496198,8911.262698,9045.982279,9200.355125,9357.765180,9478.293902,9659.699358,9807.784135,10000.975369,10275.712967,18760.434177,25755.937738,29556.156676,33295.915159,37273.794241
132,9089.269707,9305.022623,9398.795488,9541.895900,9635.551632,9805.528032,9928.621903,10092.016272,10288.305271,10392.246586,10573.308677,10747.185926,11079.030160,11603.227748,23176.153990,34066.313123,41136.317759,45471.441634,48496.511112


Everything looks a-ok.  Now we need to take the model into DataRobot.  We'll mostly follow the same process as before we a few more steps. 

# Deployment of the Umbrella Model

## Create a Custom Model In the Custom Model Workshop

In [10]:

import time
from pathlib import Path 
import pickle
import requests
import datarobot as dr

def build_custom_model_environment(cm, cmv):
    url = f"customModels/{cm.id}/versions/{cmv.id}/dependencyBuild/"
    build_req = client.post(url)
    build_info = dr.CustomModelVersionDependencyBuild.get_build_info(cm.id, cmv.id)
    # build = dr.CustomModelVersionDependencyBuild.start_build(cm.id, cmv.id, max_wait = 1200)
    return cm, cmv, build_info
    
def register_custom_model(cm, cmv):
    registered_model_version = dr.RegisteredModelVersion.create_for_custom_model_version(
        custom_model_version_id =  cmv.id, 
        name = cm.name, 
        registered_model_name=  cm.name,
        description = cm.name,
    )
    return registered_model_version

def create_deployment(registered_model_version, prediction_environment_id):
    deployment = dr.Deployment.create_from_registered_model_version(
        registered_model_version.id,
        prediction_environment_id=prediction_environment_id,
        label = registered_model_version.name,
    )
    deployment.update_association_id_settings(["ASSOCIATION_ID"], required_in_prediction_requests=False)
    # deployment.update_drift_tracking_settings(target_drift_enabled=True, feature_drift_enabled=True)
    return deployment

def update_deployment_settings(deployment):
    deployment.update_association_id_settings(["ASSOCIATION_ID"], required_in_prediction_requests=False)
    deployment.update_drift_tracking_settings(target_drift_enabled=True, feature_drift_enabled=False)


In [11]:
environment = dr.ExecutionEnvironment.list("scikit").pop()
prediction_environment = [ pe for pe in dr.PredictionEnvironment.list() if pe.platform == "datarobotServerless"]
prediction_environment = prediction_environment[2]
prediction_environment

PredictionEnvironment('668401df486fcb136bf056d1', 'DataRobot Serverless Predictions', 'datarobotServerless', 'DataRobot Serverless Predictions')

In [0]:
training_data = dr.Dataset.create_from_file("./data/training_data.csv")

In [14]:
response = client.post("customModels", 
                       data = {
                        "customModelType": "inference",
                        "isProxyModel": False,
                        "isUnstructuredModelKind": True,
                        "name": f'Rental Calc Umbrella Model {ts}',
                        "targetName": "charges",
                        "targetType": "Regression",
                        }
                    )
umbrella_custom_model = dr.CustomInferenceModel.get(response.json()["id"])

umbrella_custom_model_version = dr.CustomModelVersion.create_clean(umbrella_custom_model.id, 
                                            base_environment_id = environment.id,
                                            folder_path = "./custom-model",   
                                            training_dataset_id=training_data.id
                                            )
print("version created")
build = build_custom_model_environment(umbrella_custom_model, umbrella_custom_model_version)
while build[2].build_status == "submitted":
    build[2].refresh()
while build[2].build_status == "processing":
    build[2].refresh()
if build[2].build_status != "success":
    print("build comleted, status:")
    print(build)
registered_model_version = register_custom_model(umbrella_custom_model, umbrella_custom_model_version)
print("version registered")



ERROR! Session/line number was not unique in database. History logging moved to new session 8
version created
version registered


In [15]:
deployment = create_deployment(registered_model_version, prediction_environment.id)
print("version deployed")
deployment.update_association_id_settings(["ASSOCIATION_ID"], required_in_prediction_requests=False)
deployment.update_drift_tracking_settings(target_drift_enabled=False, feature_drift_enabled=True)
print("feature drift enabled")

ERROR! Session/line number was not unique in database. History logging moved to new session 9
version deployed
feature drift enabled


## tag deployments

In [16]:
MODEL_TAG = "Rental Calc Umbrella Model"

In [17]:
client.post(f"deployments/{deployment.id}/tags", data = {"name": "Umbrella Model", "value": MODEL_TAG})

<Response [201]>

In [18]:
with open("custom-model/routing_config.yaml", "r") as f:
    model_config = yaml.load(f, Loader = yaml.SafeLoader)
for model in model_config:
    dep_id = model["deployment_id"]
    try:
        client.post(f"deployments/{dep_id}/tags", data = {"name": "Umbrella Model", "value": MODEL_TAG})
    except Exception as e:
        print(e)

In [24]:
import sys
import json
import requests
 
API_URL = 'https://app.datarobot.com/api/v2/deployments/{deployment_id}/predictionsUnstructured'
 
headers = {
    'Content-Type': '{};charset={}'.format("application/text", "UTF-8"),
    'Authorization': 'Bearer {}'.format(os.environ["DATAROBOT_API_TOKEN"]),
}
 
url = API_URL.format(deployment_id=deployment.id)

    # Make API request for predictions
predictions_response = requests.post(
    url,
    data=df.to_csv(index = False),
    headers=headers,
)

In [25]:
pd.DataFrame(predictions_response.json())

Unnamed: 0,quantile-0.05,quantile-0.1,quantile-0.15,quantile-0.2,quantile-0.25,quantile-0.3,quantile-0.35,quantile-0.4,quantile-0.45,quantile-0.5,quantile-0.55,quantile-0.6,quantile-0.65,quantile-0.7,quantile-0.75,quantile-0.8,quantile-0.85,quantile-0.9,quantile-0.95
0,12503.586519,12775.434805,12869.411840,12998.213481,13060.075630,13206.862419,13310.110873,13473.861308,13634.031648,13749.553988,13915.441204,14084.625732,14424.821033,15019.760050,24131.744158,30808.644178,34412.034860,38023.057429,40604.791502
1,10178.359425,10410.703281,10508.447772,10651.402353,10748.259786,10907.633850,11027.417515,11189.075384,11359.919913,11474.704808,11651.276214,11814.856327,12099.022179,12559.703423,22084.073808,30156.926026,34839.891988,38695.283152,41886.072633
2,6304.556112,6484.504362,6595.293705,6725.987972,6842.534145,6978.093346,7104.548122,7237.313440,7371.523925,7473.698931,7631.935627,7779.690485,8071.328253,8503.775014,18716.920185,24192.602759,27666.480634,31534.640269,35120.384638
3,3913.250994,4066.654767,4180.300142,4286.268661,4381.201463,4496.143977,4615.924871,4727.555200,4849.812433,4928.525483,5065.445618,5217.410659,5629.419304,6248.355645,19136.039225,24353.443205,28580.936715,32797.399365,35919.509267
4,1083.341267,1190.806742,1311.529900,1429.296487,1563.631994,1684.411646,1821.294344,1924.988933,2045.610439,2123.482210,2266.663986,2407.853378,2755.195186,3221.127993,16219.792219,21746.739952,26321.260732,30683.352609,34524.332250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,6227.861797,6412.251280,6516.784818,6630.698544,6712.360404,6844.515666,6960.557984,7091.152138,7241.462979,7326.451745,7475.990438,7639.954374,8055.359185,8703.489241,21499.539058,28880.585003,34300.824762,38591.777257,41430.168487
130,2557.746338,2698.707916,2812.100878,2896.443372,2964.932431,3064.766540,3176.018506,3273.749419,3392.072808,3451.058248,3571.085013,3730.758985,4258.875985,5068.301033,20354.188619,25831.495566,31000.402127,35538.095387,38104.716852
131,8153.666683,8351.973701,8458.233466,8613.583251,8750.496198,8911.262698,9045.982279,9200.355125,9357.765180,9478.293902,9659.699358,9807.784135,10000.975369,10275.712967,18760.434177,25755.937738,29556.156676,33295.915159,37273.794241
132,9089.269707,9305.022623,9398.795488,9541.895900,9635.551632,9805.528032,9928.621903,10092.016272,10288.305271,10392.246586,10573.308677,10747.185926,11079.030160,11603.227748,23176.153990,34066.313123,41136.317759,45471.441634,48496.511112
