In [1]:
import hkube_notebook
from hkube_notebook import AlgorithmBuilder, PipelineBuilder, PipelineExecutor, TrackerType
print(hkube_notebook.__version__)
import time
# put appropriate api-server URL
api_server = 'http://localhost:3000/api/v1'
#api_server = 'https://10.32.10.11/hkube/api-server/api/v1'

0.2


# Build Pipeline

In [4]:
zazaBuilder = PipelineBuilder(name='zaza', api_server_base_url=api_server)
zazaBuilder.add_node(node_name='green', alg_name='green-alg', input=["@flowInput.tata"])
zazaBuilder.add_node(node_name='yellow', alg_name='yellow-alg', input=["@green"])
zazaBuilder.add_node(node_name='black', alg_name='black-alg', input=["@yellow"])
# add unknown algorithm => error
#zazaBuilder.add_node(node_name='gold', alg_name='gold-alg', input=["@black"])
zazaBuilder.get_raw()

{'name': 'zaza',
 'nodes': [{'nodeName': 'green',
   'algorithmName': 'green-alg',
   'input': ['@flowInput.tata']},
  {'nodeName': 'yellow', 'algorithmName': 'yellow-alg', 'input': ['@green']},
  {'nodeName': 'black', 'algorithmName': 'black-alg', 'input': ['@yellow']}],
 'options': {'progressVerbosityLevel': 'debug'},
 'flowInput': {}}

# Execute raw pipeline from builder (webhook tracker)

In [5]:
zazaRawExec = PipelineExecutor(raw=zazaBuilder.get_raw(), api_server_base_url=api_server)
results = zazaRawExec.exec(input={ 'tata': {} })

HBox(children=(IntProgress(value=0), HTML(value='')))

>>>>> running flask ubuntu-amiryi-ww:57140
OK - pipeline is running, jobId: raw-zaza-b36ae9e467:bc0df3f4-b3fb-4bfe-b025-872b24852f10.raw-zaza-b36ae9e467
shutdown flask server...
flask server ended
ListenerTracker thread finished. jobId: raw-zaza-b36ae9e467:bc0df3f4-b3fb-4bfe-b025-872b24852f10.raw-zaza-b36ae9e467
getting results...
pipeline "raw-zaza-b36ae9e467" status: completed
timeTook: 206.924 seconds
RESULT (1 of 1 items):
RESULT ITEM 1:
{
    "output": 42
}
<<<<< finished


# Store pipeline, async execute 3 times (webhook listener)

In [7]:
zazaBuilder.store()

OK: pipeline "zaza" was stored successfully!


True

In [8]:
zazaStoredListenerExec = PipelineExecutor(name='zaza', api_server_base_url=api_server)
jobId1 = zazaStoredListenerExec.exec_async(input={})
jobId2 = zazaStoredListenerExec.exec_async(input={})
jobId2 = zazaStoredListenerExec.exec_async(input={})

HBox(children=(IntProgress(value=0), HTML(value='')))

>>>>> running flask ubuntu-amiryi-ww:50408
OK - pipeline is running, jobId: zaza:38669c23-aa90-4204-b687-0b54f641e720.zaza


HBox(children=(IntProgress(value=0), HTML(value='')))

>>>>> running flask ubuntu-amiryi-ww:50409
OK - pipeline is running, jobId: zaza:9baa6e03-badc-4fb3-a7e3-cfb930ddfcad.zaza


HBox(children=(IntProgress(value=0), HTML(value='')))

>>>>> running flask ubuntu-amiryi-ww:50410
OK - pipeline is running, jobId: zaza:44e02551-bca7-446b-ba7e-da940c8ecc85.zaza
shutdown flask server...
flask server ended
ListenerTracker thread finished. jobId: zaza:38669c23-aa90-4204-b687-0b54f641e720.zaza
shutdown flask server...
flask server ended
ListenerTracker thread finished. jobId: zaza:9baa6e03-badc-4fb3-a7e3-cfb930ddfcad.zaza
shutdown flask server...
flask server ended
ListenerTracker thread finished. jobId: zaza:44e02551-bca7-446b-ba7e-da940c8ecc85.zaza


In [9]:
# run it when above jobs are still running to see their status
status_list = zazaStoredListenerExec.get_all_status()

executor has no active jobs


# Async excute stored pipeline 3 times (status polling), stop one

In [None]:
zazaStoredPollingExec = PipelineExecutor(name='zaza', api_server_base_url=api_server, tracker=TrackerType.POLLING)
# async execution
jobId1 = zazaStoredPollingExec.exec_async(input={})
jobId2 = zazaStoredPollingExec.exec_async(input={})
jobId3 = zazaStoredPollingExec.exec_async(input={})
time.sleep(4)
zazaStoredPollingExec.stop(jobId=jobId2)

In [None]:
# get result of the first
results1 = zazaStoredPollingExec.get_results(jobId=jobId3, max_display=10)

# Execute pipeline with (too small) timeout

In [None]:
results = zazaStoredPollingExec.exec(input={ 'tata': {} }, timeout_sec=2)

# Delete stored pipeline

In [None]:
zazaBuilder.delete()

# Build & exec eval node pipeline, control dispaled size

In [None]:
splitBuilder = PipelineBuilder(name='multi_result', api_server_base_url=api_server)
eval_split = {
    "code": [
    "function split(input) {",
    "return input[0].split(' ');",
    "}"]
}
splitBuilder.add_node(node_name='split', alg_name='eval-alg', input=["#@flowInput.text"],
                extra_data=eval_split)
splitBuilder.get_raw()

In [None]:
splitExec = PipelineExecutor(raw=splitBuilder.get_raw(), api_server_base_url=api_server)
input = {
        "text": ["In mathematics and computer science, a directed acyclic graph ",
    "(DAG (About this sound listen)), is a finite directed graph with",
    " no directed cycles. That is, it consists of finitely many vertices and",
    "edges, with each edge directed from one vertex to another, ",
    "such that there is no way to start at any vertex v and follow",
    "a consistently-directed sequence of edges that eventually ",
    "loops back to v again. Equivalently, a DAG is a directed graph",
    "that has a topological ordering, a sequence of the vertices ",
    "such that every edge is directed from earlier to later in the sequence.",
    "The corresponding concept for undirected graphs is a forest, an undirected graph without ",
    "cycles. Choosing an orientation for a forest produces a special kind of directed acyclic graph",
    "called a polytree. However there are many other kinds of directed acyclic graph that are not"]
}
results = splitExec.exec(input=input, max_displayed_results=3)

In [None]:
# execute without results display
results = splitExec.exec(input=input, max_displayed_results=0)

# Iterate over results

In [None]:
i = 0
print("TOTAL: {} results".format(len(results)))
for item in results:
    print('ITEM {}: {}'.format(i, results[i]))
    i += 1

# Get pipelines

In [11]:
# Get all stored pipelines
stored = PipelineExecutor.get_all_stored(api_server)
# ...now you may iterate on 'stored'...

# Get all running pipeline jobs
running = PipelineExecutor.get_running_jobs(api_server)

Got 12 stored pipelines: ['trigger-4', 'trigger-3', 'trigger-1', 'batch_trigger', 'simple', 'big-batch', 'simple-wait-batch', 'batch-on-node', 'trigger-2', 'big-wait-batch', 'batch', 'zaza']
Got 0 running jobs:


# Get Algorithms

In [10]:
alg_mgr = AlgorithmBuilder(api_server_base_url=api_server)
algs = alg_mgr.get_all()

Got 5 algorithms: ['black-alg', 'eval-alg', 'cpu-load', 'green-alg', 'yellow-alg']


# Create Algorithm

In [None]:
folder = '/home/amiryi/dev/hkube/hkube_notebook/hkube_notebook/test/test_algorithm'
tarfilename = alg_mgr.create_algfile_by_folder(folder)
config = alg_mgr.create_config('test-alg', 'main.py')
alg_mgr.apply(compressed_alg_file=tarfilename, config=config)
algs = alg_mgr.get_all()
