Skip to content

Commit

Permalink
Project/molssi (#1648)
Browse files Browse the repository at this point in the history
* iteration

* add config stub

* cleanup;
  • Loading branch information
andre-merzky committed Jul 2, 2018
1 parent 71566f2 commit 6fa9375
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 17 deletions.
34 changes: 23 additions & 11 deletions examples/01_unit_details.py
Expand Up @@ -79,18 +79,22 @@
# Create a workload of ComputeUnits.
# Each compute unit runs '/bin/date'.

n = 128 # number of units to run
report.info('create %d unit description(s)\n\t' % n)
report.info('create %d unit description(s)\n\t' % 2)

cuds = list()
for i in range(0, n):

# create a new CU description, and fill it.
# Here we don't use dict initialization.
cud = rp.ComputeUnitDescription()
cud.executable = '/bin/date'
cuds.append(cud)
report.progress()

cud = rp.ComputeUnitDescription()
cud.executable = '/usr/bin/mongo'
cud.arguments = 'mongodb://$RP_APP_TUNNEL/ --eval db.stats()'.split()
cuds.append(cud)
report.progress()

cud = rp.ComputeUnitDescription()
cud.executable = 'echo'
cud.arguments = '$RP_APP_TUNNEL'
cuds.append(cud)
report.progress()

report.ok('>>ok\n')

# Submit the previously created ComputeUnit descriptions to the
Expand All @@ -106,13 +110,21 @@
for unit in units:
report.plain(' * %s: %s, exit: %3s, out: %s\n' \
% (unit.uid, unit.state[:4],
unit.exit_code, unit.stdout.strip()[:35]))
unit.exit_code, unit.stderr[:35]))

# get some more details for one unit:
unit_dict = units[0].as_dict()
report.plain("unit workdir : %s\n" % unit_dict['unit_sandbox'])
report.plain("pilot id : %s\n" % unit_dict['pilot'])
report.plain("exit code : %s\n" % unit_dict['exit_code'])
report.plain("exit stdout : %s\n" % unit_dict['stdout'])

# get some more details for one unit:
unit_dict = units[1].as_dict()
report.plain("unit workdir : %s\n" % unit_dict['unit_sandbox'])
report.plain("pilot id : %s\n" % unit_dict['pilot'])
report.plain("exit code : %s\n" % unit_dict['exit_code'])
report.plain("exit stdout : %s\n" % unit_dict['stdout'])


except Exception as e:
Expand Down
56 changes: 50 additions & 6 deletions src/radical/pilot/agent/bootstrap_0.sh
Expand Up @@ -1546,17 +1546,35 @@ RUNTIME=$((RUNTIME * 60))
# down on its own
RUNTIME=$((RUNTIME + 60))

# ------------------------------------------------------------------------------
# If the host that will run the agent is not capable of communication
# with the outside world directly, we will setup a tunnel.
if [[ $FORWARD_TUNNEL_ENDPOINT ]]; then
get_tunnel(){

addr=$1

profile_event 'tunnel_setup_start'

echo "# -------------------------------------------------------------------"
echo "# Setting up forward tunnel for MongoDB to $FORWARD_TUNNEL_ENDPOINT."
echo "# Setting up forward tunnel to $addr."

# Bind to localhost
BIND_ADDRESS=`/sbin/ifconfig $TUNNEL_BIND_DEVICE|grep "inet addr"|cut -f2 -d:|cut -f1 -d" "`
BIND_ADDRESS=$(/sbin/ifconfig $TUNNEL_BIND_DEVICE|grep "inet addr"|cut -f2 -d:|cut -f1 -d" ")

if test -z "$BIND_ADDRESS"
then
BIND_ADDRESS=$(/sbin/ifconfig lo | grep 'inet' | xargs echo | cut -f 2 -d ' ')
fi

if test -z "$BIND_ADDRESS"
then
BIND_ADDRESS=$(ip addr
| grep 'state UP' -A2
| grep 'inet'
| awk '{print $2}'
| cut -f1 -d'/')
# BIND_ADDRESS="127.0.0.1"
fi

# Look for an available port to bind to.
# This might be necessary if multiple agents run on one host.
Expand All @@ -1573,25 +1591,51 @@ if [[ $FORWARD_TUNNEL_ENDPOINT ]]; then
# Set up tunnel
# TODO: Extract port and host
FORWARD_TUNNEL_ENDPOINT_PORT=22
if test "$FORWARD_TUNNEL_ENDPOINT" = "BIND_ADDRESS"; then

if test -z "$FORWARD_TUNNEL_ENDPOINT"
then
FORWARD_TUNNEL_ENDPOINT_HOST=$BIND_ADDRESS

elif test "$FORWARD_TUNNEL_ENDPOINT" = "BIND_ADDRESS"; then
# On some systems, e.g. Hopper, sshd on the mom node is not bound to 127.0.0.1
# In those situations, and if configured, bind to the just obtained bind address.
FORWARD_TUNNEL_ENDPOINT_HOST=$BIND_ADDRESS

else
# FIXME: ensur FT_EP is set
FORWARD_TUNNEL_ENDPOINT_HOST=$FORWARD_TUNNEL_ENDPOINT
fi
ssh -o StrictHostKeyChecking=no -x -a -4 -T -N -L $BIND_ADDRESS:$DBPORT:$HOSTPORT -p $FORWARD_TUNNEL_ENDPOINT_PORT $FORWARD_TUNNEL_ENDPOINT_HOST &

# FIXME: check if tunnel stays up
echo ssh -o StrictHostKeyChecking=no -x -a -4 -T -N -L $BIND_ADDRESS:$DBPORT:$addr -p $FORWARD_TUNNEL_ENDPOINT_PORT $FORWARD_TUNNEL_ENDPOINT_HOST
ssh -o StrictHostKeyChecking=no -x -a -4 -T -N -L $BIND_ADDRESS:$DBPORT:$addr -p $FORWARD_TUNNEL_ENDPOINT_PORT $FORWARD_TUNNEL_ENDPOINT_HOST &

# Kill ssh process when bootstrap_0 dies, to prevent lingering ssh's
trap 'jobs -p | grep ssh | xargs kill' EXIT

# and export to agent
export RADICAL_PILOT_DB_HOSTPORT=$BIND_ADDRESS:$DBPORT
export RP_BS_TUNNEL="$BIND_ADDRESS:$DBPORT"

profile_event 'tunnel_setup_stop'
}

if ! test -z "$FORWARD_TUNNEL_ENDPOINT"
then
get_tunnel "$HOSTPORT"
export RADICAL_PILOT_DB_HOSTPORT="$RP_BS_TUNNEL"
fi

# we also set up a tunnel for the application to use, if a respective endpoint
# is requested in the environment
if ! test -z "$RP_APP_TUNNEL_ADDR"
then
echo "app tunnel addr : $RP_APP_TUNNEL_ADDR"
get_tunnel "$RP_APP_TUNNEL_ADDR"
export RP_APP_TUNNEL="$RP_BS_TUNNEL"
echo "app tunnel setup: $RP_APP_TUNNEL"
fi


rehash "$PYTHON"

# ready to setup the virtenv
Expand Down
3 changes: 3 additions & 0 deletions src/radical/pilot/agent/executing/popen.py
Expand Up @@ -257,6 +257,9 @@ def spawn(self, launcher, cu):
else:
env_string += 'unset RP_PROF\n'

if 'RP_APP_TUNNEL' in os.environ:
env_string += 'export RP_APP_TUNNEL="%s"\n' % os.environ['RP_APP_TUNNEL']

env_string += '''
prof(){
if test -z "$RP_PROF"
Expand Down
4 changes: 4 additions & 0 deletions src/radical/pilot/configs/resource_local.json
Expand Up @@ -14,6 +14,10 @@
"job_manager_endpoint" : "fork://localhost/",
"filesystem_endpoint" : "file://localhost/"
},
"pre_bootstrap_1" : [
"export RP_APP_TUNNEL_ADDR=144.76.72.175:27017",
"echo $RP_APP_TUNNEL_ADDR"
],
"default_remote_workdir" : "$HOME",
"lrms" : "FORK",
"agent_scheduler" : "CONTINUOUS",
Expand Down
50 changes: 50 additions & 0 deletions src/radical/pilot/configs/resource_vtarc_dt.json
@@ -0,0 +1,50 @@

{
"stampede_ssh": {
"description" : "The XSEDE 'Stampede' cluster at TACC (https://www.tacc.utexas.edu/stampede/).",
"notes" : "Always set the ``project`` attribute in the ComputePilotDescription or the pilot will fail.",
"schemas" : ["gsissh", "ssh", "go"],
"mandatory_args" : ["project"],
"gsissh" :
{
"job_manager_endpoint" : "slurm+gsissh://stampede.tacc.utexas.edu:2222/",
"filesystem_endpoint" : "gsisftp://stampede.tacc.utexas.edu:2222/"
},
"ssh" :
{
"job_manager_endpoint" : "slurm+ssh://stampede.tacc.utexas.edu/",
"filesystem_endpoint" : "sftp://stampede.tacc.utexas.edu/"
},
"go":
{
"job_manager_endpoint" : "slurm+ssh://stampede.tacc.utexas.edu/",
"filesystem_endpoint" : "go://xsede#stampede/"
},
"default_queue" : "normal",
"lrms" : "SLURM",
"agent_scheduler" : "CONTINUOUS",
"agent_spawner" : "POPEN",
"agent_launch_method" : "SSH",
"task_launch_method" : "SSH",
"mpi_launch_method" : "MPIRUN_RSH",
"pre_bootstrap_1" : ["module purge",
"module load TACC",
"module load intel/15.0.2",
"module load python/2.7.12",
"module unload xalt",
# Load ICC license so we can build during bootstrap
"source ~train00/ssi_sourceme",
"export TACC_DELETE_FILES=TRUE"
],
"default_remote_workdir" : "$WORK",
"valid_roots" : ["/scratch", "$SCRATCH", "/work", "$WORK"],
"rp_version" : "local",
"virtenv_mode" : "create",
"python_dist" : "default",
"export_to_cu" : ["LMOD_CMD",
"LMOD_SYSTEM_DEFAULT_MODULES",
"LD_LIBRARY_PATH"],
"cu_pre_exec" : ["module restore"]
}
}

0 comments on commit 6fa9375

Please sign in to comment.