# Radical Pilot Tutorial


## 1. Installation

    pip install radical.pilot
    
## 2. A Simple Workload consisting of a Bag-of-Tasks


In [2]:
import os, sys
import radical.pilot as rp

os.environ["RADICAL_PILOT_DBURL"]="mongodb://ec2-54-221-194-147.compute-1.amazonaws.com:24242/sc15tut"

#os.environ["RADICAL_PILOT_DBURL"]="mongodb://129.114.108.124:27017/sctut15"
#os.environ["RADICAL_PILOT_VERBOSE"]="DEBUG"




## 2.a. Submit Pilot

In [3]:
# Create a new session. No need to try/except this: if session creation
# fails, there is not much we can do anyways...
session = rp.Session()
print "session id: %s" % session.uid
# all other pilot code is now tried/excepted.  If an exception is caught, we
# can rely on the session object to exist and be valid, and we can thus tear
# the whole RP stack down via a 'session.close()' call in the 'finally'
# clause...

try:
    # ----- CHANGE THIS -- CHANGE THIS -- CHANGE THIS -- CHANGE THIS ------
    # 
    # Change the user name below if you are using a remote resource 
    # and your username on that resource is different from the username 
    # on your local machine. 
    #
    #c = rp.Context('ssh')
    #c.user_id = "tg824689"
    #session.add_context(c)
    # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
    print "Initializing Pilot Manager ..."
    pmgr = rp.PilotManager(session=session)
    
    # ----- CHANGE THIS -- CHANGE THIS -- CHANGE THIS -- CHANGE THIS ------
    # 
    # If you want to run this example on your local machine, you don't have 
    # to change anything here. 
    # 
    # Change the resource below if you want to run on a remote resource. 
    # You also might have to set the 'project' to your allocation ID if 
    # your remote resource does compute time accounting. 
    #
    # A list of preconfigured resources can be found at: 
    # http://radicalpilot.readthedocs.org/en/latest/machconf.html#preconfigured-resources
    # 
    pdesc = rp.ComputePilotDescription ()
    pdesc.resource = "local.localhost_anaconda"  # NOTE: This is a "label", not a hostname
    pdesc.runtime  = 10 # minutes
    pdesc.cores    = 16
    pdesc.cleanup  = False
    #pdesc.project  = 'TG-MCB090174'
    #pdesc.queue    = 'development'
    # submit the pilot.
    print "Submitting Compute Pilot to Pilot Manager ..."
    pilot = pmgr.submit_pilots(pdesc)
    # Combine the ComputePilot, the ComputeUnits and a scheduler via
    # a UnitManager object.
    print "Initializing Unit Manager ..."
    umgr = rp.UnitManager (session=session,
                           scheduler=rp.SCHED_DIRECT_SUBMISSION)
    
    # Add the created ComputePilot to the UnitManager.
    print "Registering Compute Pilot with Unit Manager ..."
    umgr.add_pilots(pilot)
    NUMBER_JOBS  = 64 # the total number of cus to run
    # submit CUs to pilot job
    cudesc_list = []
    for i in range(NUMBER_JOBS):
        # -------- BEGIN USER DEFINED CU DESCRIPTION --------- #
        cudesc = rp.ComputeUnitDescription()
        cudesc.environment = {'CU_NO': i}
        cudesc.executable  = "/bin/echo"
        cudesc.arguments   = ['I am CU number $CU_NO']
        cudesc.cores       = 1
        # -------- END USER DEFINED CU DESCRIPTION --------- #
        cudesc_list.append(cudesc)
    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    print "Submit Compute Units to Unit Manager ..."
    cu_set = umgr.submit_units (cudesc_list)
    print "Waiting for CUs to complete ..."
    umgr.wait_units()
    print "All CUs completed successfully!"
    for unit in cu_set:
        print "* CU %s, state %s, exit code: %s, stdout: %s" \
         % (unit.uid, unit.state, unit.exit_code, unit.stdout)
        
except Exception as e:
    # Something unexpected happened in the pilot code above
    print "caught Exception: %s" % e
    raise
    
except (KeyboardInterrupt, SystemExit) as e:
    # the callback called sys.exit(), and we can here catch the
    # corresponding KeyboardInterrupt exception for shutdown.  We also catch
    # SystemExit (which gets raised if the main threads exits for some other
    # reason).
    print "need to exit now: %s" % e
    
finally:
    # always clean up the session, no matter if we caught an exception or
    # not.
    print "closing session"
    session.close ()

    # the above is equivalent to
    #
    #   session.close (cleanup=True, terminate=True)
    #
    # it will thus both clean out the session's database record, and kill
    # all remaining pilots (none in our example).

session id: rp.session.ip-10-99-194-113.ec2.internal.radical.016747.0022
Initializing Pilot Manager ...
Submitting Compute Pilot to Pilot Manager ...
Initializing Unit Manager ...
Registering Compute Pilot with Unit Manager ...
Submit Compute Units to Unit Manager ...
Waiting for CUs to complete ...
All CUs completed successfully!
* CU unit.000000, state Done, exit code: 0, stdout: I am CU number 0

* CU unit.000001, state Done, exit code: 0, stdout: I am CU number 1

* CU unit.000002, state Done, exit code: 0, stdout: I am CU number 2

* CU unit.000003, state Done, exit code: 0, stdout: I am CU number 3

* CU unit.000004, state Done, exit code: 0, stdout: I am CU number 4

* CU unit.000005, state Done, exit code: 0, stdout: I am CU number 5

* CU unit.000006, state Done, exit code: 0, stdout: I am CU number 6

* CU unit.000007, state Done, exit code: 0, stdout: I am CU number 7

* CU unit.000008, state Done, exit code: 0, stdout: I am CU number 8

* CU unit.000009, state Done, exit co