## Generate some test data

This is an example of scoring data and monitoring with datarobot

In [1]:
import json
import pandas as pd 
import yaml
import datarobot as dr
from datarobot_mlops.mlops import MLOps
import time

example1 = { 
    "claimNumber": 12341234,
    "exposureType": "exposure type a",
    "modelConfidence": "low",  ## high, low, medium
    "claimantInfo": " [\{\}] "
 }

import numpy as np 
def generate_data(n = 100):
    arr = ["low", "medium", "high"]
    modelConfidence = np.random.choice(["low", "medium", "high"], size=n, p = [0.6, 0.3, 0.1], replace=True)
    exposureType = np.random.choice(["exposure type a", "exposure type b", "exposure type c"], size=n, p = [1/3 ,1/3, 1/3], replace=True)
    claimNumber = np.random.randint(11111, 99999, size = n)
    
    payload = pd.DataFrame( dict( claimNumber = claimNumber, exposureType = exposureType, modelConfidence = modelConfidence))
    payload["claimantInfo"] = json.dumps( dict( field1 = "field1", field2 = "field2"))
    return payload

def ohe_prediction(x):
    if x == "low":
        return [1.0, 0.0, 0.0]
    elif x == "medium":
        return [0.0, 1.0, 0.0]
    elif x == "high":
        return [0.0, 0.0, 1.0]
    else:
        return [1/3, 1/3, 1/3]


In [2]:
with open("deployment.yaml", "r") as f:
    deployment_conf = yaml.load(f, Loader = yaml.SafeLoader)
deployment = dr.Deployment.get( deployment_conf.get("deployment_id"))

In [3]:
service_stats = deployment.get_service_stats()
prediction_count = service_stats.metrics.get("totalPredictions")
print(prediction_count)

130200


## Use the Filesystem as Spooler



In [4]:
from pathlib import Path
from datarobot_mlops.mlops import MLOps
import os 
import glob 
import subprocess   
import time 

## thia is the spooler director that we are creating on the fly
spooler_dir = Path("/tmp/ta")
spooler_dir.mkdir(exist_ok = True)
## the environment variables are a must for the client created on line 20 of this cell
## and for the agent that gets started in a few cells
os.environ["MLOPS_SERVICE_URL"] = "https://app.datarobot.com"
os.environ['MLOPS_API_TOKEN'] = os.environ["DATAROBOT_API_TOKEN"]
os.environ['MLOPS_AGENT_VERIFY_SSL'] = "true"
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk/"
os.environ["MLOPS_SPOOLER_TYPE"]="FILESYSTEM"
os.environ["MLOPS_FILESYSTEM_DIRECTORY"] = "/tmp/ta"
os.environ["MLOPS_DEPLOYMENT_ID"] = deployment.id
os.environ["MLOPS_MODEL_ID"] = deployment.model.get("id")


In [5]:
mlops = MLOps().init() 
start = time.time() 
payload = generate_data(10000)
predictions = payload["modelConfidence"].apply(ohe_prediction).tolist()
time.sleep(5)
end = time.time()
## score date 

mlops.report_deployment_stats(payload.shape[0], (end - start)*1000) 
mlops.report_predictions_data(features_df = payload, predictions = predictions)
mlops.shutdown()

In [6]:
## start the agent!  this will use the environments variables set up above
agents_dir = glob.glob("./datarobot_mlops*").pop(0)
try: 
    os.remove(os.path.join(agents_dir, "bin", "PID.agent"))
except Exception as e:
    print(e)

subprocess.call("{}/bin/start-agent.sh".format(agents_dir))

INFO: MLOPS_AGENT_CONFIG_YAML=/home/notebooks/storage/datarobot_mlops_package-11.0.1/conf/mlops.agent.conf.yaml
INFO: MLOPS_AGENT_LOG_PROPERTIES=/home/notebooks/storage/datarobot_mlops_package-11.0.1/conf/mlops.log4j2.properties
INFO: MLOPS_AGENT_JVM_OPT=-Xmx1G
INFO: AGENT_CLASSPATH='/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/spooler-kafka-11.0.1.jar:/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/spooler-pubsub-11.0.1.jar:/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/spooler-rabbitmq-11.0.1.jar:/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/spooler-sqs-11.0.1.jar:/home/notebooks/storage/datarobot_mlops_package-11.0.1/lib/mlops-agent-11.0.1.jar'
INFO: AGENT_LOG_PATH=/home/notebooks/storage/datarobot_mlops_package-11.0.1/logs/mlops.agent.log

Running MLOps-Agent as a service


DataRobot MLOps-Agent is running.


0

## Give it some time to report back predictions

in a scheduled run, we need to block to make sure all predictions are reported back, otherwise the job will terminate before the spooler flushes all records to datarobot

In [7]:
predictions_reported = service_stats.metrics.get("totalPredictions") - prediction_count 
# predictions_reported != payload.shape[0]
while predictions_reported != payload.shape[0]:
    service_stats = deployment.get_service_stats()
    predictions_reported = service_stats.metrics.get("totalPredictions") - prediction_count 
print("all predictions reported")
print(prediction_count)
print(service_stats.metrics.get("totalPredictions"))


all predictions reported
130200
140200
