In [54]:
import pandas as pd
import numpy as np
import datarobot as dr
import os
import time
import requests
pd.options.display.max_colwidth = 200
pd.options.display.max_columns = 200

USERNAME = os.environ['DATAROBOT_USERNAME']
API_KEY = os.environ['DATAROBOT_API_TOKEN']
DATAROBOT_KEY = os.environ['DATAROBOT_KEY']
API_URL = 'https://mlops.dynamic.orm.datarobot.com/predApi/v1.0/deployments/{deployment_id}/predictions'
ENDPOINT = 'https://app.datarobot.com/api/v2'

dr.Client(token=API_KEY, endpoint=ENDPOINT)  

<datarobot.rest.RESTClientObject at 0x7f8350835580>

In [30]:
# Deployment for the single series store sales demo dataset
deployment = dr.Deployment.get('62181396a59f117c303f2192')

model_id = deployment.model.get('id')
project_id = deployment.model.get('project_id')

model = dr.Model.get(project_id, model_id)
project = dr.Project.get(project_id)

In [42]:
# Files
filename = './data/Store_Sales_Single_Series_Forecast_New-orig no holiday features-pred.csv'

df_pred = pd.read_csv(filename)
data_json = df_pred.to_json(orient='records')  # if submit predictions as a datframe.  Good for inline code.

data = open(filename, 'rb').read()  # if submit predictions freom a file.

df_pred

Unnamed: 0,Date,Num_Employees,Returns_Pct,Sales,Num_Customers,Pct_On_Sale,Marketing,DestinationEvent,Pct_Promotional,Econ_ChangeGDP,EconJobsChange,AnnualizedCPI
0,2014-05-17,7.0,0.899978,15952.44,67.0,12.658062,ID33.468 In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,No,0.050355,,-250.0,
1,2014-05-18,7.0,0.96921,14220.27,53.0,11.964098,ID89.1782 May In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,No,0.05129,,,
2,2014-05-19,4.0,0.114602,12930.11,35.0,12.2559,ID5.6386 May In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,No,0.05129,,,
3,2014-05-20,4.0,0.70842,13315.2,35.0,9.387212,ID69.0117 May In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,No,0.05129,,,
4,2014-05-21,5.0,1.019298,14386.56,39.0,10.959396,ID46.0198 May In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,No,0.05129,,,
5,2014-05-22,7.0,0.582042,13178.32,40.0,11.012531,ID102.6724 May In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,No,0.05129,,,
6,2014-05-23,7.0,1.082894,14329.64,52.0,9.884081,ID26.4268 May In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,No,0.05129,,,
7,2014-05-24,21.0,0.575577,29507.07,281.0,11.580541,ID44.9422 May In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,Yes,0.05129,,-9000.0,
8,2014-05-25,21.0,0.709947,13530.83,229.0,11.993482,ID117.3612 May In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,Yes,0.053888,,,
9,2014-05-26,16.0,0.536938,12324.85,216.0,14.57553,ID113.6161 May In Store Credit Card Signup Discount; Ready for Summer Campaign; Memorial Day Email,Yes,0.053888,,,


## Real time Predictions
### Docs: https://docs.datarobot.com/en/docs/predictions/predapi/time-pred.html#time-series-predictions-for-deployments

In [46]:
import argparse
import json
import requests
import sys

# parser = argparse.ArgumentParser()
# parser.add_argument('filename', help='The input data file')
# parser.add_argument('--forecast_point', help='The forecast point')
# parser.add_argument('--predictions_start_date', help='Start date for historical predictions')
# parser.add_argument('--predictions_end_date', help='End date for historical predictions')

API_URL = 'https://cfds-ccm-prod.orm.datarobot.com/predApi/v1.0/deployments/{deployment_id}/predictions'    # noqa

DEPLOYMENT_ID = '62180519f9672b7dfdeff310'

# Don't change this. It is enforced server-side too.
MAX_PREDICTION_FILE_SIZE_BYTES = 52428800  # 50 MB


class DataRobotPredictionError(Exception):
    """Raised if there are issues getting predictions from DataRobot"""


def make_datarobot_deployment_predictions(
        data,
        deployment_id,
        forecast_point=None,
        predictions_start_date=None,
        predictions_end_date=None,
):
    """
    Make predictions on data provided using DataRobot deployment_id provided.
    See docs for details:
         https://app.datarobot.com/docs/predictions/api/dr-predapi.html

    Parameters
    ----------
    data : str
        Feature1,Feature2
        numeric_value,string
    deployment_id : str
        Deployment ID to make predictions with.
    forecast_point : str, optional
        Forecast point as timestamp in ISO format
    predictions_start_date : str, optional
        Start of predictions as timestamp in ISO format
    predictions_end_date : str, optional
        End of predictions as timestamp in ISO format

    Returns
    -------
    Response schema:
        https://app.datarobot.com/docs/predictions/api/dr-predapi.html#response-schema

    Raises
    ------
    DataRobotPredictionError if there are issues getting predictions from DataRobot
    """
    # Set HTTP headers. The charset should match the contents of the file.
    headers = {
        'Content-Type': 'text/plain; charset=UTF-8',  # for file
        'Authorization': 'Bearer {}'.format(API_KEY),
        'DataRobot-Key': DATAROBOT_KEY,
    }
    
    # for a dataframe orient json
    headers = {
        'Content-Type': 'application/json; charset=UTF-8', 
        'Authorization': 'Bearer {}'.format(API_KEY),
        'datarobot-key': DATAROBOT_KEY}

    url = API_URL.format(deployment_id=deployment_id)

    # Prediction Explanations:
    # See the documentation for more information:
    # https://app.datarobot.com/docs/predictions/api/dr-predapi.html#request-pred-explanations
    # Should you wish to include Prediction Explanations or Prediction Warnings in the result,
    # Change the parameters below accordingly, and remove the comment from the params field below:

    params = {
        'forecastPoint': forecast_point,
        'predictionsStartDate': predictions_start_date,
        'predictionsEndDate': predictions_end_date,
        # If explanations are required, uncomment the line below
        # 'maxExplanations': 3,
        # 'thresholdHigh': 0.5,
        # 'thresholdLow': 0.15,
        # Uncomment this for Prediction Warnings, if enabled for your deployment.
        # 'predictionWarningEnabled': 'true',
    }

    # Make API request for predictions
    predictions_response = requests.post(url, data=data, headers=headers, params=params)
    _raise_dataroboterror_for_status(predictions_response)
    # Return a Python dict following the schema in the documentation
    return predictions_response.json()


def _raise_dataroboterror_for_status(response):
    """Raise DataRobotPredictionError if the request fails along with the response returned"""
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        err_msg = '{code} Error: {msg}'.format(
            code=response.status_code, msg=response.text)
        raise DataRobotPredictionError(err_msg)

try:
    predictions = make_datarobot_deployment_predictions(
        data_json,
#         data,
        deployment.id # ,
#         forecast_point,
#         predictions_start_date,
#         predictions_end_date,
    )
except DataRobotPredictionError as exc:
    print(exc)
print(json.dumps(predictions))


{"data": [{"rowId": 29, "prediction": 13659.6498587112, "predictionValues": [{"label": "Sales (actual)", "value": 13659.6498587112}], "timestamp": "2014-06-15T00:00:00.000000Z", "forecastDistance": 1, "forecastPoint": "2014-06-14T00:00:00Z", "seriesId": null, "deploymentApprovalStatus": "APPROVED"}, {"rowId": 30, "prediction": 12445.6180366337, "predictionValues": [{"label": "Sales (actual)", "value": 12445.6180366337}], "timestamp": "2014-06-16T00:00:00.000000Z", "forecastDistance": 2, "forecastPoint": "2014-06-14T00:00:00Z", "seriesId": null, "deploymentApprovalStatus": "APPROVED"}, {"rowId": 31, "prediction": 12783.0989944951, "predictionValues": [{"label": "Sales (actual)", "value": 12783.0989944951}], "timestamp": "2014-06-17T00:00:00.000000Z", "forecastDistance": 3, "forecastPoint": "2014-06-14T00:00:00Z", "seriesId": null, "deploymentApprovalStatus": "APPROVED"}, {"rowId": 32, "prediction": 14063.983171474, "predictionValues": [{"label": "Sales (actual)", "value": 14063.98317147

## Batch Predictions

In [58]:
t0 = time.time()

job = dr.BatchPredictionJob.score(
    deployment.id,
    intake_settings={
        'type': 'localFile',
        'file': filename
    },
    output_settings={
        'type': 'localFile',
    },
)

while job.get_status().get('status') == "RUNNING":
    time.sleep(10)

print('- Batch prediction file upload done: %.3f min' % ((time.time() - t0)/60))

job.get_status()

- Batch prediction file upload done: 0.039 min


{'id': '6218226ec921b6934d0eb7d4',
 'status': 'INITIALIZING',
 'percentage_completed': 0.0,
 'elapsed_time_sec': 1,
 'links': {'self': 'https://app.datarobot.com/api/v2/batchPredictions/6218226ec921b6934d0eb7d4/',
  'csv_upload': 'https://app.datarobot.com/api/v2/batchPredictions/6218226ec921b6934d0eb7d4/csvUpload/'},
 'job_spec': {'num_concurrent': 3,
  'deployment_id': '62181396a59f117c303f2192',
  'max_explanations': 0,
  'intake_settings': {'type': 'localFile'},
  'output_settings': {'type': 'localFile'},
  'timeseries_settings': {'type': 'forecast'},
  'chunk_size': 'auto',
  'csv_settings': {'delimiter': ',', 'quotechar': '"', 'encoding': 'utf-8'},
  'abort_on_error': True,
  'skip_drift_tracking': False,
  'include_probabilities': True,
  'include_prediction_status': False,
  'include_probabilities_classes': [],
  'disable_row_level_error_handling': False},
 'status_details': 'Job created by matthew.cohen@datarobot.com at 2022-02-25 00:27:26.482000',
 'created': '2022-02-25T00:2

In [57]:
t0 = time.time()

cnt = 1
while (job.get_status().get('status') == "INITIALIZING") or (job.get_status().get('status') == "RUNNING"):
    print(cnt)
    cnt += 1
    print(job.get_status())
    time.sleep(60)
print('- status change -')

print('- Predition request complete: %.3f min' % ((time.time() - t0)/60))

print(job.get_status())

t0 = time.time()

with open('./predictions.csv', 'wb') as f:
    job.download(f, timeout=1200)

print('- Prediction download complete')

1
{'id': '62181e727835bd5b3688b3bb', 'status': 'INITIALIZING', 'percentage_completed': 0.0, 'elapsed_time_sec': 417, 'links': {'self': 'https://app.datarobot.com/api/v2/batchPredictions/62181e727835bd5b3688b3bb/', 'csv_upload': 'https://app.datarobot.com/api/v2/batchPredictions/62181e727835bd5b3688b3bb/csvUpload/'}, 'job_spec': {'num_concurrent': 3, 'deployment_id': '62181396a59f117c303f2192', 'max_explanations': 0, 'intake_settings': {'type': 'localFile'}, 'output_settings': {'type': 'localFile'}, 'timeseries_settings': {'type': 'forecast'}, 'chunk_size': 'auto', 'csv_settings': {'delimiter': ',', 'quotechar': '"', 'encoding': 'utf-8'}, 'abort_on_error': True, 'skip_drift_tracking': False, 'include_probabilities': True, 'include_prediction_status': False, 'include_probabilities_classes': [], 'disable_row_level_error_handling': False}, 'status_details': 'Job created by matthew.cohen@datarobot.com at 2022-02-25 00:10:26.216000', 'created': '2022-02-25T00:10:26.216000Z', 'created_by': {'

- status change -
- Predition request complete: 7.054 min
{'id': '62181e727835bd5b3688b3bb', 'status': 'COMPLETED', 'percentage_completed': 100.0, 'elapsed_time_sec': 836, 'links': {'download': 'https://app.datarobot.com/api/v2/batchPredictions/62181e727835bd5b3688b3bb/download/', 'self': 'https://app.datarobot.com/api/v2/batchPredictions/62181e727835bd5b3688b3bb/', 'csv_upload': 'https://app.datarobot.com/api/v2/batchPredictions/62181e727835bd5b3688b3bb/csvUpload/'}, 'job_spec': {'num_concurrent': 3, 'deployment_id': '62181396a59f117c303f2192', 'max_explanations': 0, 'intake_settings': {'type': 'localFile'}, 'output_settings': {'type': 'localFile'}, 'timeseries_settings': {'type': 'forecast'}, 'chunk_size': 'auto', 'csv_settings': {'delimiter': ',', 'quotechar': '"', 'encoding': 'utf-8'}, 'abort_on_error': True, 'skip_drift_tracking': False, 'include_probabilities': True, 'include_prediction_status': False, 'include_probabilities_classes': [], 'disable_row_level_error_handling': False