## Generate some test data

This is an example of scoring data and monitoring with datarobot

In [1]:
import json
import pandas as pd 
import yaml
import datarobot as dr
from datarobot_mlops.mlops import MLOps
import time
import datarobot as dr 
import datetime
import requests 
import json 

example1 = { 
    "claimNumber": 12341234,
    "exposureType": "exposure type a",
    "modelConfidence": "low",  ## high, low, medium
    "claimantInfo": " [\{\}] "
}

import numpy as np 
def generate_data(n = 100):
    arr = ["low", "medium", "high"]
    modelConfidence = np.random.choice(["low", "medium", "high"], size=n, p = [0.6, 0.3, 0.1], replace=True)
    exposureType = np.random.choice(["exposure type a", "exposure type b", "exposure type c"], size=n, p = [1/3 ,1/3, 1/3], replace=True)
    claimNumber = np.random.randint(11111, 99999, size = n)
    
    payload = pd.DataFrame( dict( claimNumber = claimNumber, exposureType = exposureType, modelConfidence = modelConfidence))
    payload["claimantInfo"] = json.dumps( dict( field1 = "field1", field2 = "field2"))
    return payload

def ohe_prediction(x):
    if x == "low":
        return [1.0, 0.0, 0.0]
    elif x == "medium":
        return [0.0, 1.0, 0.0]
    elif x == "high":
        return [0.0, 0.0, 1.0]
    else:
        return [1/3, 1/3, 1/3]

In [2]:
client = dr.Client() 
payload = generate_data(n = 10000)
preds = payload["modelConfidence"].apply(ohe_prediction).values
preds_df = pd.DataFrame(list(preds), columns = ["PREDICTION_LOW", "PREDICTION_MEDIUM", "PREDICTION_HIGH"])
ts = datetime.datetime.now().isoformat()
# print(preds_df.describe() )
payload = payload.join(preds_df)
payload

Unnamed: 0,claimNumber,exposureType,modelConfidence,claimantInfo,PREDICTION_LOW,PREDICTION_MEDIUM,PREDICTION_HIGH
0,72046,exposure type a,low,"{""field1"": ""field1"", ""field2"": ""field2""}",1.0,0.0,0.0
1,45479,exposure type c,low,"{""field1"": ""field1"", ""field2"": ""field2""}",1.0,0.0,0.0
2,81315,exposure type c,medium,"{""field1"": ""field1"", ""field2"": ""field2""}",0.0,1.0,0.0
3,75508,exposure type c,low,"{""field1"": ""field1"", ""field2"": ""field2""}",1.0,0.0,0.0
4,12599,exposure type c,low,"{""field1"": ""field1"", ""field2"": ""field2""}",1.0,0.0,0.0
...,...,...,...,...,...,...,...
9995,14387,exposure type b,low,"{""field1"": ""field1"", ""field2"": ""field2""}",1.0,0.0,0.0
9996,67155,exposure type c,low,"{""field1"": ""field1"", ""field2"": ""field2""}",1.0,0.0,0.0
9997,53701,exposure type b,low,"{""field1"": ""field1"", ""field2"": ""field2""}",1.0,0.0,0.0
9998,39808,exposure type b,low,"{""field1"": ""field1"", ""field2"": ""field2""}",1.0,0.0,0.0


## Recommended Approach 

1.  Register Payload to DataRobot Dataset Register
2.  Create and trigger a monitoring job - DataRobot will use the uploaded dataset to monitoring inputs and predictions

## get deployment conf

this would have been created in the 01 notebook

In [3]:
with open("deployment.yaml", "r") as f:
    deployment_conf = yaml.load(f, Loader = yaml.SafeLoader)


deployment = dr.Deployment.get( deployment_conf.get("deployment_id"))
dataset_id = deployment_conf.get("dataset_id")
batch_monitoring_job_id = deployment_conf.get("batch_monitoring_job_id")

## delete old versions of prediction data if necesary

In [4]:
## check number of versions and delete old ones
def purge_old_dataset_version(dataset_url):
    dataset_versions = client.get(dataset_url).json()
    if "offset" in dataset_url:
        print("off set present, deleting old versions")
        for d in dataset_versions["data"]:
            print(f"datasets/{d['datasetId']}/versions/{d['versionId']}/")
            delete_req = client.delete(f"datasets/{d['datasetId']}/versions/{d['versionId']}/")
            print(delete_req)
    if next := dataset_versions.get("next"):
        print(next)
        print(dataset_url)
        query_parameters = next.split("/")[-1]
        next_url = os.path.join( dataset_url.split("?")[0], query_parameters)
        print(next_url)
        purge_old_dataset_version(next_url)          

## this will maintain the 99 latest versions of a dataset, so plus the new version from the next cell, that will put us at 100 versions
if dataset_id:
    purge_old_dataset_version(f"datasets/{dataset_id}/versions?limit=99")

## register new version (or dataset if necessary)

In [5]:
if dataset_id:
    print("register dataset version")
    dataset = dr.Dataset.create_version_from_in_memory_data(dataset_id, payload)
else:
    print("register dataset")
    dataset = dr.Dataset.create_from_in_memory_data(payload, fname = f"SUBRO monitoring dataset {datetime.datetime.now()}.csv")

register dataset version


## create a monitoring job if necessary

In [6]:
if batch_monitoring_job_id:
    print("monitoring job exists")
else: 
    print("creating monitoring job")
    monitoring_job_payload = {
        "deploymentId":deployment.id,
        "monitoringAggregation": None,
        "intakeSettings":{"type":"dataset","datasetId":dataset.id},
        "name":f"Subro model Job {ts}1 (UTC)",
        "enabled":False,
        "monitoringColumns": {
            "predictionsColumns":[
                {"className":"high","columnName":"PREDICTION_HIGH"},
                {"className":"medium","columnName":"PREDICTION_MEDIUM"},
                {"className":"low","columnName":"PREDICTION_LOW"}
            ]}}
    monitoring_job_response = client.post("batchMonitoringJobDefinitions/", data = monitoring_job_payload)
    monitoring_job_response.raise_for_status()
    payload_patch = {"monitoringAggregation":None}
    batch_monitoring_job_id = monitoring_job_response.json()["id"]
    patch_response = requests.patch( f"{client.endpoint}/batchMonitoringJobDefinitions/{batch_monitoring_job_id}/", 
                  headers = { 
                      "Authorization": f"Bearer {client.token}",
                      'Content-Type': "application/json" 
                  },
                  data = json.dumps(payload_patch))

monitoring job exists


## update the deployment config yaml

In [7]:
deployment_conf["dataset_id"] = dataset.id
deployment_conf["batch_monitoring_job_id"] = batch_monitoring_job_id
with open("deployment.yaml", "w") as f:
    f.write(yaml.dump( deployment_conf ))

## run the batch monitoring job

In [8]:
print("running monitoring job")
job_run_payload = {"jobDefinitionId":batch_monitoring_job_id}
job_run_response = client.post("batchJobs/fromJobDefinition/", data = job_run_payload)
job_run_response.raise_for_status()

running monitoring job


## Approach 2: Use the Filesystem as Spooler



In [115]:
service_stats = deployment.get_service_stats()
prediction_count = service_stats.metrics.get("totalPredictions")
print(prediction_count)

0


In [116]:
from pathlib import Path
from datarobot_mlops.mlops import MLOps
import os 
import glob 
import subprocess   
import time 

## thia is the spooler director that we are creating on the fly
spooler_dir = Path("/tmp/ta")
spooler_dir.mkdir(exist_ok = True)
## the environment variables are a must for the client created on line 20 of this cell
## and for the agent that gets started in a few cells
os.environ["MLOPS_SERVICE_URL"] = "https://app.datarobot.com"
os.environ['MLOPS_API_TOKEN'] = os.environ["DATAROBOT_API_TOKEN"]
os.environ['MLOPS_AGENT_VERIFY_SSL'] = "true"
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk/"
os.environ["MLOPS_SPOOLER_TYPE"]="FILESYSTEM"
os.environ["MLOPS_FILESYSTEM_DIRECTORY"] = "/tmp/ta"
os.environ["MLOPS_DEPLOYMENT_ID"] = deployment.id
os.environ["MLOPS_MODEL_ID"] = deployment.model.get("id")


In [117]:
from pathlib import Path
from datarobot_mlops.mlops import MLOps
import os 
import glob 
import subprocess   
import time 
import json

feature_types = [{ "name": "claimNumber", "feature_type": "number"}, {"name":"exposureType", "feature_type":"categorical"}, {"name": "modelConfidence", "feature_type": "categorical"}, {"name": "claimantInfo", "feature_type": "text"}]
## thia is the spooler director that we are creating on the fly
spooler_dir = Path("/tmp/ta")
spooler_dir.mkdir(exist_ok = True)
## the environment variables are a must for the client created on line 20 of this cell
## and for the agent that gets started in a few cells
os.environ["MLOPS_SERVICE_URL"] = "https://app.datarobot.com"
os.environ['MLOPS_API_TOKEN'] = os.environ["DATAROBOT_API_TOKEN"]
os.environ['MLOPS_AGENT_VERIFY_SSL'] = "true"
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk/"
os.environ["MLOPS_SPOOLER_TYPE"]="FILESYSTEM"
os.environ["MLOPS_FILESYSTEM_DIRECTORY"] = "/tmp/ta"
os.environ["MLOPS_FEATURE_TYPES_JSON"] = json.dumps(feature_types)
os.environ["MLOPS_DEPLOYMENT_ID"] = deployment.id
os.environ["MLOPS_MODEL_ID"] = deployment.model.get("id")

mlops = MLOps().init()
start = time.time() 
payload = generate_data(2)
predictions = payload["modelConfidence"].apply(ohe_prediction).tolist()
time.sleep(5)
end = time.time()
## score date 
mlops.report_deployment_stats(payload.shape[0], (end - start)*1000, deployment_id = deployment.id, model_id = deployment.model["id"]) 
mlops.report_predictions_data(predictions = [[0.0,1.0], [1.0,0.0]], class_names = ["0", "1"], deployment_id = deployment.id, model_id = deployment.model["id"])
mlops.shutdown()




In [118]:
!cat /tmp/ta/fs_spool.1

{"header":{"id":"11a7c1b5-e964-4071-891a-12d0429a02a9","deploymentId":"67e6b544ded4d5efd10694b2","dataType":"DEPLOYMENT_STATS","dataFormat":"json","dataLen":null,"version":2,"language":"python","libVersion":"11.0.0","reserved":""},"data":"{\"timestamp\":\"2025-05-20 15:55:42.930+0000\",\"modelId\":\"67e6b5445c34029dd5ea4e8c\",\"numPredictions\":2,\"executionTime\":5007.166624069214,\"userError\":false,\"systemError\":false}"}
{"header":{"id":"1dcb44f3-9a50-4f32-ba1c-a6f627cf1791","deploymentId":"67e6b544ded4d5efd10694b2","dataType":"PREDICTIONS_DATA","dataFormat":"json","dataLen":null,"version":2,"language":"python","libVersion":"11.0.0","reserved":""},"data":"{\"timestamp\":\"2025-05-20 15:55:42.931+0000\",\"modelId\":\"67e6b5445c34029dd5ea4e8c\",\"predictions\":[[0.0,1.0],[1.0,0.0]],\"classNames\":[\"0\",\"1\"]}"}


In [119]:
mlops.report_predictions_data??

[0;31mSignature:[0m
[0mmlops[0m[0;34m.[0m[0mreport_predictions_data[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mfeatures_df[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpredictions[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0massociation_ids[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclass_names[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdeployment_id[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmodel_id[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mskip_drift_tracking[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mskip_accuracy_tracking[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_id[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
    [0;32mdef[0m [0mreport_prediction

In [120]:
mlops = MLOps().init() 
start = time.time() 
####
payload = generate_data(10000)
## pump to kafka
####
predictions = payload["modelConfidence"].apply(ohe_prediction).tolist()
time.sleep(5)
end = time.time()
## score date 

mlops.report_deployment_stats(payload.shape[0], (end - start)*1000) 
mlops.report_predictions_data(features_df = payload, predictions = predictions, class_names = ["low", "medium", "high"])
mlops.shutdown()

In [121]:
## start the agent!  this will use the environments variables set up above
agents_dir = glob.glob("./datarobot_mlops*").pop(0)
try: 
    os.remove(os.path.join(agents_dir, "bin", "PID.agent"))
except Exception as e:
    print(e)

subprocess.call("{}/bin/start-agent.sh".format(agents_dir))

[Errno 2] No such file or directory: './datarobot_mlops_package-11.0.1/bin/PID.agent'
/usr/bin/java
INFO: MLOPS_AGENT_CONFIG_YAML=/home/notebooks/storage/datarobot_mlops_package-11.0.1/conf/mlops.agent.conf.yaml
INFO: MLOPS_AGENT_LOG_PROPERTIES=/home/notebooks/storage/datarobot_mlops_package-11.0.1/conf/mlops.log4j2.properties
INFO: MLOPS_AGENT_JVM_OPT=-Xmx1G
INFO: AGENT_CLASSPATH='/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/spooler-kafka-11.0.1.jar:/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/spooler-pubsub-11.0.1.jar:/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/spooler-rabbitmq-11.0.1.jar:/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/spooler-sqs-11.0.1.jar:/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/mlops-agent-11.0.1.jar'
INFO: AGENT_LOG_PATH=/home/notebooks/storage/datarobot_mlops_package-11.0.1/logs/mlops.agent.log

Running MLOps-Agent as a service


DataRobot MLOps-Agent is running.


0

## Give it some time to report back predictions

in a scheduled run, we need to block to make sure all predictions are reported back, otherwise the job will terminate before the spooler flushes all records to datarobot

In [122]:
predictions_reported = service_stats.metrics.get("totalPredictions") - prediction_count 
# predictions_reported != payload.shape[0]
while predictions_reported != payload.shape[0]:
    service_stats = deployment.get_service_stats()
    predictions_reported = service_stats.metrics.get("totalPredictions") - prediction_count 
print("all predictions reported")
print(prediction_count)
print(service_stats.metrics.get("totalPredictions"))


KeyboardInterrupt: 

## Using API as spooler

Recommended when you must want to aggregate statistcs and report.  not meant for sending through big payloads

In [4]:
!pip install datarobot-mlops-stats-aggregator

In [46]:
import pandas as pd 
import numpy as np 

# pd.DataFrame(dict( x = [1, 1, 1, 1], y = [2,2]))

payload_new = dict( x = [1, 1, 1, 1], y = [2,2, 2] )


np.array([payload_new[feat] for feat in ["x", "y"]])
# x = np.array(
#     [payload_new[feat] for feat in ["x", "y"]], dtype=np.float64
#     ).reshape(1, -1)


# pd.DataFrame(x)


Unnamed: 0,f1,f2,f3,f4
0,1,string,"[some, values]","[{'d1_f1': 'd1', 'd1_f2': 2.0}, {'d2_f1': 'd2'..."


In [0]:
array of dicts 
array of string 


In [10]:
from pathlib import Path
import vcr

from datarobot_mlops.mlops import MLOps
import os 
import glob 
import subprocess   
import time 
import json

feature_types = [{ "name": "claimNumber", "feature_type": "number"}, {"name":"exposureType", "feature_type":"categorical"}, {"name": "modelConfidence", "feature_type": "categorical"}, {"name": "claimantInfo", "feature_type": "text"}]
## thia is the spooler director that we are creating on the fly
spooler_dir = Path("/tmp/ta")
spooler_dir.mkdir(exist_ok = True)
## the environment variables are a must for the client created on line 20 of this cell
## and for the agent that gets started in a few cells
os.environ["MLOPS_SERVICE_URL"] = "https://app.datarobot.com"
os.environ['MLOPS_API_TOKEN'] = os.environ["DATAROBOT_API_TOKEN"]
os.environ['MLOPS_AGENT_VERIFY_SSL'] = "true"
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk/"
os.environ["MLOPS_SPOOLER_TYPE"]="API"
os.environ["MLOPS_FILESYSTEM_DIRECTORY"] = ""
os.environ["MLOPS_FEATURE_TYPES_JSON"] = json.dumps(feature_types)
os.environ["MLOPS_DEPLOYMENT_ID"] = "67eb011001e72224dd04e92e"
os.environ["MLOPS_MODEL_ID"] = "67eb00e92eec999f2c38737d"

mlops = MLOps().init()
start = time.time() 
payload = generate_data(5)
predictions = payload["modelConfidence"].apply(ohe_prediction).tolist()
time.sleep(3)
end = time.time()
## score date 


# x = np.random.rand( 1,69)
# search_features = [f"x{i}" for i in range(69)]
# x_df = pd.DataFrame( x, columns=search_features)

## provided x.shape == [1, 69] and len(search_features) == 69



mlops.report_deployment_stats( payload.shape[0], (end - start)*1000) 

mlops.report_predictions_data(
                              predictions = [ [0.956, 0.0944], [0.2, 0.8], [0.1, 0.9]], 
                              # association_ids = ["uid1", "uid2", "uid3", "uid4", "uid5"], 
                              class_names = ['0', '1'],     skip_drift_tracking=False,
skip_accuracy_tracking=False)

# mlops.shutdown()




In [20]:
predictions = [ [0.956, 0.0944], [0.2, 0.8], [0.1, 0.9]] 

[ p[::-1]  for p in predictions]

In [14]:
probabilities = np.array([
    [0.956, 0.0944], 
])

pred = probabilities[:, 1][0]


if pred > 0.5:
    print('postive')
else:
    print("negative")

In [11]:
(1+0.9+0.8)/3.0

In [80]:
if x.shape !=(1, 69) or len(search_features) != 69:
    raise Exception("something is wrong")

In [105]:
import pprint

payload = dict( f1 = 1, f2 = "string", f3 = ["some", "values"], f4 = [dict(d1_f1 = "d1", d1_f2 = 2.0), dict(d2_f1 = "d2", d2_f2 = 3.0)])
## scoring ##
payload_df = pd.DataFrame([payload])
for column in payload_df.select_dtypes(exclude=['number']).columns:
    payload_df[column] = payload_df[column].apply(json.dumps)
mlops.report_deployment_stats( payload_df.shape[0], (end - start)*1000) 
mlops.report_predictions_data(features_df = payload_df, 
                              predictions = predictions, 
                              )
# or 
x = np.random.rand( 1,69)
search_features = [f"x{i}" for i in range(69)]
## scoring ##
x_df = pd.DataFrame( x, columns=search_features)
mlops.report_deployment_stats( x_df.shape[0], (end - start)*1000) 
mlops.report_predictions_data(features_df = x_df, 
                              predictions = predictions, 
                              )

[dtype('int64'), dtype('O'), dtype('O'), dtype('O')]

In [92]:
if payload_df.dtypes.f2 == np.dtype("O"):
    print(True)
else:
    print(False)

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x51,x52,x53,x54,x55,x56,x57,x58,x59,x60
0,0.73097,0.678719,0.13506,0.695682,0.669662,0.026254,0.067041,0.764567,0.532572,0.12857,...,0.772635,0.065903,0.962612,0.959637,0.339789,0.859691,0.959601,0.502366,0.03464,0.091775


In [72]:
pd.DataFrame( [np.random.rand( 1,61)], columns=[f"x{i}" for i in range(61)])

In [61]:
pd.DataFrame([payload])

In [59]:
pd.DataFrame(payload)

In [3]:
import vcr
with vcr.use_cassette('fixtures/vcr_cassettes/register_data.yaml'):
    dataset =  dr.Dataset.create_from_in_memory_data(generate_data(10), fname = f"SUBRO monitoring dataset.csv")

In [19]:
import requests 
from io import StringIO

csv_data = StringIO()
generate_data(10).to_csv(csv_data, index=False)
csv_data.seek(0)  # Rewind to the start

files = {
    'file': ('data.csv', csv_data, 'text/csv')  # Send CSV as file
}

req = requests.post("https://app.datarobot.com/api/v2/datasets/fromFile/", 
            headers = {
                "Authorization":f"Bearer {os.environ['DATAROBOT_API_TOKEN']}",
            }, 
            files =  {"file": ("data.csv", csv_data, "text.csv")}
        )



In [30]:
check_status = f"https://app.datarobot.com/api/v2/status/{req.json()['statusId']}/"
c = requests.get(check_status, headers = {"Authorization": f"Bearer {os.environ['DATAROBOT_API_TOKEN']}"})

try:
    c.json()["status"]


'RUNNING'

In [14]:
req.json()

In [8]:
req.json()

In [16]:
mlops.report_predictions_data?

In [1]:
import yaml 
import json
with open("/home/notebooks/storage/fixtures/vcr_cassettes/report_prediction_data.yaml", "r") as f:
    report_preds = yaml.load(f, Loader = yaml.SafeLoader)
with open("/home/notebooks/storage/fixtures/vcr_cassettes/report_deployment_stats.yaml") as f:
    report_stats = yaml.load(f, Loader = yaml.SafeLoader)

In [35]:
json.loads(report_stats["interactions"][0]["request"]["body"])

In [7]:
report_stats["interactions"][0]["request"]["uri"]

In [22]:
json.loads(report_stats["interactions"][0]["request"]["body"])

In [21]:
json.loads(report_preds["interactions"][0]["request"]["body"])

In [0]:


predictions_data_dict = json.loads(report_preds["interactions"][0]["request"]["body"])

In [136]:
payload = generate_data(5)
list(map( lambda x: {"name": x[0], "values": x[1]} , payload.to_dict(orient = "list").items()))

In [137]:
import pprint 
pprint.pprint(predictions_data_dict)

In [47]:
data = """
{"data":[{"timestamp":"2025-03-27 17:13:29.362+0000","modelId":"67e32c08babf26d876865a84","features":[{"name":"claimNumber","values":[58928,81888,33617,63454,25663]},{"name":"exposureType","values":["exposure
      type a","exposure type b","exposure type a","exposure type c","exposure type
      a"]},{"name":"modelConfidence","values":["low","low","low","low","low"]},{"name":"claimantInfo","values":["{\"field1\":
      \"field1\", \"field2\": \"field2\"}","{\"field1\": \"field1\", \"field2\": \"field2\"}","{\"field1\":
      \"field1\", \"field2\": \"field2\"}","{\"field1\": \"field1\", \"field2\": \"field2\"}","{\"field1\":
      \"field1\", \"field2\": \"field2\"}"]}],"predictions":[[1.0,0.0,0.0],[1.0,0.0,0.0],[1.0,0.0,0.0],[1.0,0.0,0.0],[1.0,0.0,0.0]],"associationIds":["uid1","uid2","uid3","uid4","uid5"],"classNames":["low","medium","high"]}]}"""
   

In [16]:

print(now.isoformat())
print('2025-03-27 17:13:29.362+0000\n')
print(now.strftime("%Y-%m-%d %H:%M:%t.%s+0000"))
print(now.strftime("%Y-%m-%d %H:%M:%S.%f"))

print(formatted_dt)

In [58]:
import pprint
pprint.pprint(yaml.dump(predictions_data_dict))

In [28]:
deployment.model.get("id")

There are two post methods below.  One to report inference data (`predictionInputs/fromJson`) and predictions and the other to report number of predictions and latency (in microseconds)

These are both undocumented endpoints, so not meant to be publically facing.  
For `predictionsInputs/fromJson`, 
* associationsId: list[str] => optional
* classNames: list[str] => required if multiclass problem
* features: pd.DataFrame => required if you want to track feature drift.  the fucntion `inference_dat_to_json` can be used to get the input features ready.
* predictions: Union[list[float], list[list[float]] => if you want prediciton monitoring.  this is a list of predicitions.  in the example below, it is multiclass predictions, so it is a list of lists which sum to one
* modelId: str => REQUIRED. via dr client get this from dr.Deployment.get(deployment_id).model.get("id")
* timestamp: str => REQUIRED.  if your time stamp is dt = datetime.datetime.now(), you can format it with dt.strftime('%Y-%m-%d %H:%M:%S.') + f'{now.microsecond // 1000:03d}+0000'


For `predictionsRequests/fromJson`, 
* modelId: str => REQUIRED. via dr client get this from dr.Deployment.get(deployment_id).model.get("id")
* timestamp: str => REQUIRED.  if your time stamp is dt = datetime.datetime.now(), you can format it with dt.strftime('%Y-%m-%d %H:%M:%S.') + f'{now.microsecond // 1000:03d}+0000'
* numPredictions: int => number of predictions made
* executionTime: float => latency in milliseconds
* useError: bool => unknown, couldn't find documentation
* systemError: bool => unknown, couldnt' find documentation 

In [52]:
import requests 
import os

ENDPOINT = "https://app.datarobot.com/api/v2/deployments"

PREDICTION_INPUT_FROM_JSON = "predictionInputs/fromJSON"

inference_data_to_json = lambda df: list(map( lambda x: {"name": x[0], "values": x[1]} , df.to_dict(orient = "list").items()))

{'data': [{'associationIds': ['uid1', 'uid2', 'uid3', 'uid4', 'uid5'],
           'classNames': ['low', 'medium', 'high'],
           'features': [{'name': 'claimNumber',
                         'values': [58928, 81888, 33617, 63454, 25663]},
                        {'name': 'exposureType',
                         'values': ['exposure type a',
                                    'exposure type b',
                                    'exposure type a',
                                    'exposure type c',
                                    'exposure type a']},
                        {'name': 'modelConfidence',
                         'values': ['low', 'low', 'low', 'low', 'low']},
                        {'name': 'claimantInfo',
                         'values': ['{"field1": "field1", "field2": "field2"}',
                                    '{"field1": "field1", "field2": "field2"}',
                                    '{"field1": "field1", "field2": "field2"}',
                                    '{"field1": "field1", "field2": "field2"}',
                                    '{"field1": "field1", "field2": '
                                    '"field2"}']}],  
           'modelId': '67e32c08babf26d876865a84',
           'predictions': [[1.0, 0.0, 0.0],
                           [1.0, 0.0, 0.0],
                           [1.0, 0.0, 0.0],
                           [1.0, 0.0, 0.0],
                           [1.0, 0.0, 0.0]],
           'timestamp': '2025-03-27 17:13:29.362+0000'}]}

response = requests.post( os.path.join(ENDPOINT, deployment.id, PREDICTION_INPUT_FROM_JSON), 
            headers = {
                "Authorization": f"Bearer {os.environ['DATAROBOT_API_TOKEN']}",
                "Content-Type": "application/json"
            },
            data = json.dumps(predictions_data_dict))


## report deployment stats

import requests 
import os
ENDPOINT = "https://app.datarobot.com/api/v2/deployments"
PREDICTION_REQUESTS_FROM_JSON = "predictionRequests/fromJSON"  

import datetime 
now = datetime.datetime.now()
formatted_dt = now.strftime('%Y-%m-%d %H:%M:%S.') + f'{now.microsecond // 1000:03d}+0000'
payload = {'data': [{'timestamp': formatted_dt,
                     'modelId': '67e32c08babf26d876865a84',  ## get this from dr.Deployment.get(deployment_id).model.get("id")
                     'numPredictions': 5,  ## reprot number of predictions in payload 
                     'executionTime': 5002.68292427063, ## report amount of time in microseconds
                     'userError': False,
                     'systemError': False}]}

response = requests.post( os.path.join(ENDPOINT, deployment.id, PREDICTION_INPUT_FROM_JSON), 
            headers = {
                "Authorization": f"Bearer {os.environ['DATAROBOT_API_TOKEN']}",
                "Content-Type": "application/json"
            },
            data = json.dumps(predictions_data_dict))

{'message': 'ok'}

In [26]:
mlops.report_predictions_data?