# RADICAL-Cybertools: RADICAL-EnTK Tutorial

One has to handle RADICAL-EnTK applications with some care when running them in a Jupyter notebook.  In particular one should avoid to run cells out of order.  It is usually best to cleanly terminate the kernel before rerunning any / all cells.  This notebook thus puts the exercise code into a *single* cell which you can edit freely and then execute.

## Exercise 2: Add a fourth stage which computes the square root of the sum
  - The kernel could be something like:  `echo "sqrt($(cat sum.txt))" | bc`
  - Output staging should move from previous last stage (`s3`) to the new stage (`s4`)


In [None]:

import os

mdb_host = os.environ.get('MDB_HOST', 'mongodb')
mdb_port = os.environ.get('MDB_PORT', '27017')
mdb_name = os.environ.get('MDB_USER', 'guest')
mdb_pswd = os.environ.get('MDB_PSWD', 'guest')
mdb_dtbs = os.environ.get('MDB_DTBS', 'default')

%env RADICAL_PILOT_DBURL=mongodb://$mdb_name:$mdb_pswd@$mdb_host:$mdb_port/$mdb_dtbs

%env RADICAL_LOG_LVL=OFF
%env RADICAL_REPORT_ANIME=FALSE

import radical.entk as re


def get_stage_1(sandbox):
    '''
    first stage: create 1 task to generate a random seed number
    '''
    
    s1 = re.Stage()

    t1 = re.Task()
    t1.executable = '/bin/sh'
    t1.arguments  = ['-c', 'od -An -N1 -i /dev/random']
    t1.stdout     = 'random.txt'
    t1.sandbox    = sandbox

    s1.add_tasks(t1)
    return s1


def get_stage_2(sandbox):
    '''
    second stage: create 10 tasks to compute the n'th power of a random seed
    '''
    
    s2 = re.Stage()

    n_simulations = 10
    for i in range(n_simulations):
        t2 = re.Task()
        t2.executable = '/bin/sh'
        t2.arguments  = ['-c', "echo '$(cat random.txt) ^ %d' | bc" % i]
        t2.stdout     = 'power.%03d.txt' % i
        t2.sandbox    = sandbox
        s2.add_tasks(t2)
    
    return s2


def get_stage_3(sandbox):
    '''
    third stage: compute sum over all powers
    '''
    
    s3 = re.Stage()

    t3 = re.Task()
    t3.executable = '/bin/sh'
    t3.arguments  = ['-c', 'cat power.*.txt | paste -sd+ | bc']
    t3.stdout     = 'sum.txt'
    t3.sandbox    = sandbox

    # download the result while renaming to get unique files per pipeline
    t3.download_output_data = ['sum.txt > %s.sum.txt' % sandbox]
    
    s3.add_tasks(t3)
    return s3


def generate_pipeline(uid):
    '''
    Generate a single simulation pipeline, i.e., a new ensemble member.
    The pipeline structure consisting of three steps as described above.
    '''

    # all tasks in this pipeline share the same sandbox
    sandbox = uid

    # assemble three stages into a pipeline and return it
    p = re.Pipeline()
    p.add_stages([get_stage_1(sandbox), 
                  get_stage_2(sandbox), 
                  get_stage_3(sandbox)])

    return p

appman = re.AppManager()

appman.resource_desc = {
    'resource': 'local.localhost_test',
    'walltime': 10,
    'cpus'    : 2
}

n_pipelines = 2

ensemble = set()
for cnt in range(n_pipelines):
    ensemble.add(generate_pipeline(uid='pipe.%03d' % cnt))

appman.workflow = ensemble
appman.run()

for cnt in range(n_pipelines):
    data = open('pipe.%03d.sum.txt' % cnt).read()
    print('%3d -- %25d' % (cnt, int(data)))

[94mEnTK session: re.session.thinkie.merzky.019614.0029
[39m[0m[94mCreating AppManager
[39m[0m[94mSetting up ZMQ queues[39m[0m[92m                                                         ok
[39m[0m

env: RADICAL_PILOT_DBURL=mongodb://guest:guest@mongodb:27017/default
env: RADICAL_PILOT_DBURL=mongodb://am:Mz0PliY3ajLOgj@95.217.193.116:27017/am
env: RADICAL_LOG_LVL=OFF
env: RADICAL_REPORT_ANIME=FALSE


[94mAppManager initialized[39m[0m[92m                                                        ok
[39m[0m[94mValidating and assigning resource manager[39m[0m[92m                                     ok
[39m[0m[94mSetting up ZMQ queues[39m[0m[92m                                                        n/a
[39m[0m[94mnew session: [39m[0m[re.session.thinkie.merzky.019614.0029][39m[0m[94m                           \
database   : [39m[0m[mongodb://am:****@95.217.193.116:27017/am][39m[0m[92m                      ok
[39m[0m[94mcreate pilot manager[39m[0m[92m                                                          ok
[39m[0m[94msubmit 1 pilot(s)[39m[0m
        pilot.0000   local.localhost_test      2 cores       0 gpus[39m[0m[92m           ok
[39m[0m[92m[92mUpdate: [39m[0mAll components created
[39m[0m[94mpipeline.0001 state: SCHEDULING
[39m[0m[92mUpdate: [39m[0m[94mpipeline.0001.stage.0003 state: SCHEDULING
[39m[0m[92mUpdate: [39m[0m

  0 --            36319351833633
  1 --           335566482753810
