# Concept Drift - Deployer
Deploy a streaming Concept Drift detector on a labeled stream.  

This function is the Deployment step for the Streaming Concept Drift Detector.  It will initialize the selected drift detectors with the base_dataset's statistics and deploy the [concept_drift_streaming serverless Nuclio function](../concept_drift_streaming/concept_drift_streaming.ipynb) with them for streaming concept-drift detection on top of a labeled stream.

### **Data exploration**

In [1]:
import pandas as pd
data_path = 'https://s3.wasabisys.com/iguazio/data/function-marketplace-data/concept_drift/mixed_0101_abrupto.csv'
predicted_train_data_path = 'https://s3.wasabisys.com/iguazio/data/function-marketplace-data/concept_drift/predicted_abrupto_train.csv'
predicted_test_data_path = 'https://s3.wasabisys.com/iguazio/data/function-marketplace-data/concept_drift/predicted_abrupto_test.csv'
original_data = pd.read_csv(data_path)
original_data.head()

Unnamed: 0,X1,X2,X3,X4,class
0,0.0,1.0,0.460101,0.592744,1.0
1,1.0,1.0,0.588788,0.574984,0.0
2,0.0,0.0,0.401641,0.679325,1.0
3,1.0,1.0,0.306076,0.182108,0.0
4,0.0,0.0,0.962847,0.579245,1.0


In [2]:
predicted_test = pd.read_csv(predicted_test_data_path)
predicted_test.tail()

Unnamed: 0,X1,X2,X3,X4,class,predicted_col
34995,0.0,0.0,0.010106,0.647269,0.0,1.0
34996,1.0,1.0,0.293651,0.737291,1.0,0.0
34997,0.0,0.0,0.848546,0.552337,0.0,1.0
34998,1.0,1.0,0.614754,0.859896,1.0,0.0
34999,1.0,0.0,0.265306,0.843716,0.0,1.0


### **Setup function parameters**

### **Importing the function**

In [3]:
# Importing the function
import mlrun
mlrun.set_environment(project='function-marketplace')


fn = mlrun.import_function("hub://concept_drift")
fn.apply(mlrun.auto_mount())

> 2021-10-21 05:31:11,291 [info] created and saved project function-marketplace


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7fb3f374c2d0>

In [4]:
predicted_train_path = 'https://s3.wasabisys.com/iguazio/data/function-marketplace-data/concept_drift/predicted_abrupto_train.csv'

In [5]:
import v3io.dataplane
import os 

container = os.path.join('/',os.environ['V3IO_HOME'].split('/')[0])
user = os.environ["V3IO_USERNAME"]
rel_path = os.getcwd()[6:] + '/artifacts'
cwd = os.path.join(container,user,rel_path)
base_input_stream = os.path.join(user,rel_path) + "/inputs_stream"
input_stream = os.path.join(container,base_input_stream)
base_output_stream = os.path.join(user,rel_path) + "/output_stream"
output_stream = os.path.join(container,user,rel_path) + "/output_stream"
tsdb_path = os.path.join(container,user,rel_path) + "/output_tsdb"
stream_consumer_group = 'cg'
http_input_stream = f'http://{os.environ["V3IO_API"]}{input_stream}@{stream_consumer_group}'


client = v3io.dataplane.Client()
response = client.stream.create(container = container,
                                stream_path=base_input_stream,
                                shard_count=1,
                                raise_for_status = v3io.dataplane.RaiseForStatus.never)
response.raise_for_status([409, 204])

In [81]:
drift_run = fn.run(name='concept_drift',
                   params={'input_stream'    : http_input_stream,
                           'output_stream'   : output_stream,
                           'output_tsdb'     : tsdb_path,
                           'tsdb_batch_size' : 1,
                           'models'          : ['ddm', 'eddm', 'pagehinkley'], # defaults
                           'label_col'       : 'class',
                           'prediction_col'  : 'predicted_col',
                           'hub_url'         : '/User/test/functions/{name}/function.yaml'},
                   inputs={'base_dataset'    : predicted_train_path},
                   artifact_path = os.path.join(os.getcwd(), 'artifacts'),
                   local=False)

> 2021-10-21 06:15:41,904 [info] starting run concept_drift uid=f7d3d58d83b94eaa810ed406f7dcd455 DB=http://mlrun-api:8080
> 2021-10-21 06:15:42,041 [info] Job is running in the background, pod: concept-drift-4xtgp
> 2021-10-21 06:15:48,336 [info] Loading base dataset
> 2021-10-21 06:15:49,671 [info] Creating models
> 2021-10-21 06:15:49,671 [info] Streaming data to models
> 2021-10-21 06:15:49,788 [info] Logging ready models
> 2021-10-21 06:15:49,969 [info] Deploying Concept Drift Streaming function
> 2021-10-21 06:15:49,972 [info] Starting remote function deploy
2021-10-21 06:15:50  (info) Deploying function
2021-10-21 06:15:50  (info) Building
2021-10-21 06:15:50  (info) Staging files and preparing base images
2021-10-21 06:15:50  (info) Building processor image
2021-10-21 06:15:55  (info) Build complete
2021-10-21 06:16:01  (info) Function deploy complete
> 2021-10-21 06:16:01,897 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-function-marketplace-conce

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
function-marketplace,...f7dcd455,0,Oct 21 06:15:48,completed,concept_drift,v3io_user=danikind=jobowner=danihost=concept-drift-4xtgp,base_dataset,"input_stream=http://v3io-webapi.default-tenant.svc:8081/users/dani/test/functions/concept_drift/artifacts/inputs_stream@cgoutput_stream=/users/dani/test/functions/concept_drift/artifacts/output_streamoutput_tsdb=/users/dani/test/functions/concept_drift/artifacts/output_tsdbtsdb_batch_size=1models=['ddm', 'eddm', 'pagehinkley']label_col=classprediction_col=predicted_colhub_url=/User/test/functions/{name}/function.yaml",,eddm_concept_driftpagehinkley_concept_driftddm_concept_drift





> 2021-10-21 06:16:10,412 [info] run executed, status=completed


In [137]:
import json
event_data = {"class": 1.0,
              "request": {"instances": [{"X1": 0.0, "X2": 1.0, "X3": 0.0, "X4": 0.0}]},
              "resp": [0],
              "when": ["2021-10-21 05:45:56.358580"],
              "model": ["sababa"]}

In [138]:
import requests
print(requests.put('http://default-tenant.app.dev39.lab.iguazeng.com:31079',json=json.dumps(event_data)).text)




In [30]:
records[40]

{'data': '{"class": 1.0, "request": {"instances": [{"X1": 0.0, "X2": 0.0, "X3": 0.9780597499, "X4": 0.4262973477}]}, "resp": [1]}'}

In [15]:
drift_run

<mlrun.model.RunObject at 0x7fb3f3fe2c10>

In [None]:
dict([(k, record[k]) for k in ["when", "class", "model", "resp", "request"]])

In [39]:
import datetime
print(datetime.datetime.now())

2021-10-21 05:45:56.358580


In [141]:
print(json.loads(records[1]['data']))

{'class': 0.0, 'request': {'instances': [{'X1': 1.0, 'X2': 1.0, 'X3': 0.2862386695, 'X4': 0.9841967075}]}, 'resp': [0], 'when': ['2021-10-21 05:45:56.358580'], 'model': ['ddm']}


In [9]:
sssssss

NameError: name 'sssssss' is not defined

In [149]:
base_input_stream

'dani/test/functions/concept_drift/artifacts/inputs_stream'

In [146]:
import json

def restructure_stream_event(context, event):
    instances = [dict()]
    for key in predicted_test.keys():
        if key not in ['when', 'class', 'model', 'worker', 'hostname', 'predicted_col']:
            instances[0].update({key: event.pop(key)})
    event['request'] = {'instances': instances}
    event['resp'] = [int(event.pop('predicted_col'))]
    event['when'] = ["2021-10-21 05:45:56.358580"]
    event['model'] = ['real_records']
    return event
    
    
records = json.loads(predicted_test.to_json(orient='records'))
records = [{'data': json.dumps(restructure_stream_event(context, record))} for record in records]

In [150]:
# batch

v3io_client = v3io.dataplane.Client()
step = 1
for idx in range(0, len(records), step):
    response = v3io_client.put_records(container=container,
                                       path=base_input_stream, 
                                       records=records[idx:idx+step])

In [None]:
output_stream

In [127]:
records[2:3]

[{'data': '{"class": 1.0, "request": {"instances": [{"X1": 0.0, "X2": 0.0, "X3": 0.8317721351, "X4": 0.7652358167}]}, "resp": [1], "when": ["2021-10-21 05:45:56.358580"], "model": ["ddm"]}'}]

In [126]:
response = v3io_client.put_records(container=container,
                                       path=base_input_stream, 
                                       records=records[idx:idx+step])
response.body

b'{ "FailedRecordCount":0,"Records": [{ "SequenceNumber":85904,"ShardId":0 } ] }'

b'{ "FailedRecordCount":0,"Records": [{ "SequenceNumber":85903,"ShardId":0 } ] }'

In [None]:
## Getting a model to monitor concept drift on
import mlrun
mlrun.set_environment(project='function-marketplace')

# Importing sklearn_classifier function from the hub to easy train a model
fn = mlrun.import_function("hub://sklearn_classifier")
fn.apply(mlrun.auto_mount())

In [None]:
# Training the model

classifier_run = fn.run(name= "sklearn_classifier",
                        params= {"sample"             : -1,
                                 "test_size"          : 0.05,
                                 "train_val_split"    : 0.95,
                                 "random_state"       : 1,
                                 "n_jobs"             : -1,
                                 "label_column"       : 'class',
                                 "model_pkg_class"    : "sklearn.ensemble.RandomForestClassifier"},
                        inputs={"dataset"  : train_data_path},
                        local=True)

## Local test
A usecase based run example

In [None]:
from mlrun import run_local, NewTask

In [None]:
container = 'bigdata'
base_table = os.path.join('/', container, 'network-operations')
stream_consumer_group = 'cd'
artifacts_path = os.path.join(os.getcwd(), 'artifacts')

task = NewTask(name='concept_drift_deployer',
        project='network-operations',
        handler=concept_drift_deployer,
        params={'models': ['ddm', 'eddm', 'pagehinkley'],
                'label_col': 'is_error',
                'prediction_col': 'yscore',
                'output_tsdb': os.path.join(base_table, 'drift_tsdb'),
                'input_stream': f'http://{os.environ["V3IO_API"]}{os.path.join(base_table, 'inference_stream')}@{stream_consumer_group}',
                'output_stream': os.path.join(base_table, 'drift_stream')},
        inputs={'base_dataset': 'store://network-operations/test_test_set_preds'},
        artifact_path=artifacts_path)

In [None]:
artifacts_path = os.path.join(os.getcwd(), 'artifacts')
artifacts_path

In [None]:
run_local(task)

## Save function yaml

In [None]:
from os import path
from mlrun import run_local, NewTask, mlconf, import_function, mount_v3io, code_to_function
mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'

In [None]:
# create job function object from notebook code
fn = code_to_function("concept_drift", 
                      kind='job',
                      with_doc=True,
                      embed_code=True)

# add metadata (for templates and reuse)
fn.spec.default_handler = "concept_drift_deployer"
fn.spec.description = "Deploy a streaming Concept Drift detector on a labeled stream"
fn.metadata.categories = ["ml", "serve"]
fn.metadata.labels = {"author": "orz", "framework": "sklearn"}
fn.export("function.yaml")

In [None]:
fn.apply(mount_v3io())

## Stream testing

In [None]:
fn.deploy()

In [None]:
fn.run(task)