# Concept Drift - Deployer
Deploy a streaming Concept Drift detector on a labeled stream.  

This function is the Deployment step for the Streaming Concept Drift Detector.  It will initialize the selected drift detectors with the base_dataset's statistics and deploy the [concept_drift_streaming serverless Nuclio function](../concept_drift_streaming/concept_drift_streaming.ipynb) with them for streaming concept-drift detection on top of a labeled stream.

### **Data exploration**

In [1]:
import pandas as pd
data_path = 'https://s3.wasabisys.com/iguazio/data/function-marketplace-data/concept_drift/mixed_0101_abrupto.csv'
predicted_train_data_path = 'https://s3.wasabisys.com/iguazio/data/function-marketplace-data/concept_drift/predicted_abrupto_train.csv'
predicted_test_data_path = 'https://s3.wasabisys.com/iguazio/data/function-marketplace-data/concept_drift/predicted_abrupto_test.csv'
original_data = pd.read_csv(data_path)
original_data.head()

Unnamed: 0,X1,X2,X3,X4,class
0,0.0,1.0,0.460101,0.592744,1.0
1,1.0,1.0,0.588788,0.574984,0.0
2,0.0,0.0,0.401641,0.679325,1.0
3,1.0,1.0,0.306076,0.182108,0.0
4,0.0,0.0,0.962847,0.579245,1.0


In [2]:
predicted_test = pd.read_csv(predicted_test_data_path)
predicted_test.tail()

Unnamed: 0,X1,X2,X3,X4,class,predicted_col
34995,0.0,0.0,0.010106,0.647269,0.0,1.0
34996,1.0,1.0,0.293651,0.737291,1.0,0.0
34997,0.0,0.0,0.848546,0.552337,0.0,1.0
34998,1.0,1.0,0.614754,0.859896,1.0,0.0
34999,1.0,0.0,0.265306,0.843716,0.0,1.0


### **Setup function parameters**

### **Importing the function**

In [3]:
# Importing the function
import mlrun
mlrun.set_environment(project='function-marketplace')


fn = mlrun.import_function("hub://concept_drift")
# fn = mlrun.import_function("function.yaml")
fn.apply(mlrun.auto_mount())

> 2021-10-24 09:18:04,857 [info] loaded project function-marketplace from MLRun DB


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7ff4d5fb3550>

In [4]:
fn.set_envs({'project' : 'function-marketplace'})

<mlrun.runtimes.kubejob.KubejobRuntime at 0x7ff4d5fb3550>

In [5]:
predicted_train_path = 'https://s3.wasabisys.com/iguazio/data/function-marketplace-data/concept_drift/predicted_abrupto_train.csv'

### **Making the input stream**

In [26]:
import v3io.dataplane
import os 

container = os.path.join('/',os.environ['V3IO_HOME'].split('/')[0])
# container='/bigdata'
user = os.environ["V3IO_USERNAME"]
rel_path = os.getcwd()[6:] + '/artifacts'
# rel_path = 'streaming'
cwd = os.path.join(container,user,rel_path)
base_input_stream = os.path.join(user,rel_path) + "/inputs_stream"
input_stream = os.path.join(container,base_input_stream)
base_output_stream = os.path.join(user,rel_path) + "/output_stream"
output_stream = os.path.join(container,user,rel_path) + "/output_stream"
tsdb_path = os.path.join(container,user,rel_path) + "/output_tsdb"
stream_consumer_group = 'cg45'
webapi_url = 'http://v3io-webapi:8081'
http_input_stream = f'http://{os.environ["V3IO_API"]}{input_stream}@{stream_consumer_group}'


client = v3io.dataplane.Client()
response = client.stream.create(container = container,
                                stream_path=base_input_stream,
                                shard_count=3,
                                raise_for_status = v3io.dataplane.RaiseForStatus.never)
response.raise_for_status([409, 204])

In [27]:
http_input_stream

'http://v3io-webapi.default-tenant.svc:8081/users/dani/test/functions/concept_drift/artifacts/inputs_stream@cg45'

In [30]:
base_input_stream

'dani/test/functions/concept_drift/artifacts/inputs_stream'

### **Running the function**

In [29]:
drift_run = fn.run(name='concept_drift',
                   params={'input_stream'    : http_input_stream,
                           'output_stream'   : output_stream,
                           'output_tsdb'     : tsdb_path,
                           'tsdb_batch_size' : 1,
                           'models'          : ['ddm', 'eddm', 'pagehinkley'], # defaults
                           'label_col'       : 'class',
                           'prediction_col'  : 'predicted_col',
                           'hub_url'         : '/User/test/functions/{name}/function.yaml'},
                   inputs={'base_dataset'    : predicted_train_path},
                   artifact_path = os.path.join(os.getcwd(), 'artifacts'),
                   local=True)

> 2021-10-24 09:22:59,044 [info] starting run concept_drift uid=82ea81c819d44bbabefe2398476c1bbc DB=http://mlrun-api:8080
> 2021-10-24 09:22:59,178 [info] Loading base dataset
> 2021-10-24 09:23:00,982 [info] Creating models
> 2021-10-24 09:23:00,982 [info] Streaming data to models
> 2021-10-24 09:23:01,099 [info] Logging ready models
> 2021-10-24 09:23:01,271 [info] Deploying Concept Drift Streaming function
> 2021-10-24 09:23:01,272 [info] Starting remote function deploy
2021-10-24 09:23:01  (info) Deploying function
2021-10-24 09:23:01  (info) Building
2021-10-24 09:23:01  (info) Staging files and preparing base images
2021-10-24 09:23:01  (info) Building processor image
2021-10-24 09:23:03  (info) Build complete
2021-10-24 09:23:09  (info) Function deploy complete
> 2021-10-24 09:23:09,551 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-function-marketplace-concept-drift-streaming.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['de

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
function-marketplace,...476c1bbc,0,Oct 24 09:22:59,completed,concept_drift,v3io_user=danikind=owner=danihost=jupyter-dani-6bfbd76d96-zxx6f,base_dataset,"input_stream=http://v3io-webapi.default-tenant.svc:8081/users/dani/test/functions/concept_drift/artifacts/inputs_stream@cg45output_stream=/users/dani/test/functions/concept_drift/artifacts/output_streamoutput_tsdb=/users/dani/test/functions/concept_drift/artifacts/output_tsdbtsdb_batch_size=1models=['ddm', 'eddm', 'pagehinkley']label_col=classprediction_col=predicted_colhub_url=/User/test/functions/{name}/function.yaml",,eddm_concept_driftpagehinkley_concept_driftddm_concept_drift





> 2021-10-24 09:23:09,660 [info] run executed, status=completed


In [10]:
sssssss

NameError: name 'sssssss' is not defined

In [None]:
import json
event_data = {"class": 1.0,
              "request": {"instances": [{"X1": 0.0, "X2": 1.0, "X3": 0.0, "X4": 0.0}]},
              "resp": [0],
              "when": ["2021-10-21 05:45:56.358580"],
              "model": ["sababa"]}

In [None]:
import requests
print(requests.put('http://default-tenant.app.dev39.lab.iguazeng.com:30550',json=json.dumps(event_data)).text)

In [11]:
import json

def restructure_stream_event(context, event):
    instances = [dict()]
    for key in predicted_test.keys():
        if key not in ['when', 'class', 'model', 'worker', 'hostname', 'predicted_col']:
            instances[0].update({key: event.pop(key)})
    event['request'] = {'instances': instances}
    event['resp'] = [int(event.pop('predicted_col'))]
    event['when'] = "2021-10-18 09:50:32.324000"
    event['model'] = 'real_records'
    return event
    
    
records = json.loads(predicted_test.to_json(orient='records'))
records = [{'data': json.dumps(restructure_stream_event(context, record))} for record in records]

In [12]:
# batch

v3io_client = v3io.dataplane.Client()
# step = 1
# for idx in range(0, len(records), step):
#     response = v3io_client.put_records(container=container,
#                                        path=base_input_stream, 
#                                        records=records[idx:idx+step])

In [38]:
response = v3io_client.put_records(container=container,
                                       path=base_input_stream, 
                                       records=records[10000:11000])

In [None]:
input_stream

In [None]:
records[0]

In [None]:
from nuclio.triggers import V3IOStreamTrigger
V3IOStreamTrigger()