Skip to content

Commit

Permalink
Enable simpler yaml when no output collection.
Browse files Browse the repository at this point in the history
  • Loading branch information
MichelleGower committed Mar 16, 2023
1 parent 5f1f637 commit fe2d0b7
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 184 deletions.
114 changes: 37 additions & 77 deletions config/bps_idf.yaml
Original file line number Diff line number Diff line change
@@ -1,72 +1,35 @@
# PANDA plugin specific settings:
# iddsServer: "https://aipanda015.cern.ch:443/idds"
placeholderParams: ['qgraphNodeId', 'qgraphId']
defaultPreCmdOpts: "--long-log --log-level=VERBOSE --log-file payload-log.json"
includeConfigs:
- ${CTRL_BPS_PANDA_DIR}/config/bps_panda.yaml

# lsst Docker image location in GAR (Google Artifact Registry)
# Must end with slash. Override with empty string to use images from the Docker hub instead.
dockerImageLocation: "us-central1-docker.pkg.dev/panda-dev-1a74/"

# Limit the number of jobs in a single PanDA task
maxJobsPerTask: 30000

# IDF PanDA specific settings:
# One cloud can have multiple sites. One site can have multiple queues.
# For LSST cloud, there is one one site LSST.
# If there are multiple sites in a cloud, computeSite can be used.
computeCloud: "LSST"
computeSite: "LSST"
project: dev
campaign: quick
computeCloud: LSST
computeSite: LSST
pipelineYaml: "${OBS_LSST_DIR}/pipelines/imsim/DRP.yaml#step1"

payload:
s3EndpointUrl: "https://storage.googleapis.com"
payloadFolder: payload
fileDistributionEndPoint: "s3://butler-us-central1-panda-dev/dc2/{payloadFolder}/{uniqProcName}/"
butlerConfig: s3://butler-us-central1-panda-dev/dc2/butler-external.yaml

# SLAC PanDA specific settings:
# computingCloud: US
# computeSite: DOMA_LSST_SLAC_TEST

executionButler:
queue: "DOMA_LSST_GOOGLE_MERGE"

pipetask:
pipetaskInit:
# This is different from the ctrl_bps default only in the addition of {fileDistributionEndPoint}
runQuantumCommand: >
${CTRL_MPEXEC_DIR}/bin/pipetask {initPreCmdOpts} run
-b {butlerConfig}
-i {inCollection}
-o {output}
--output-run {outputRun}
--qgraph {fileDistributionEndPoint}/{qgraphFile}
--qgraph-id {qgraphId} --qgraph-node-id {qgraphNodeId}
--clobber-outputs
--init-only
--extend-run {extraInitOptions}
forcedPhotCoadd:
queue: "DOMA_LSST_GOOGLE_TEST_HIMEM_NON_PREEMPT"

# This is different from the ctrl_bps default only in the addition of {fileDistributionEndPoint}
runQuantumCommand: >
${CTRL_MPEXEC_DIR}/bin/pipetask {runPreCmdOpts} run
-b {butlerConfig}
-i {inCollection}
-o {output}
--output-run {outputRun}
--qgraph {fileDistributionEndPoint}/{qgraphFile}
--qgraph-id {qgraphId}
--qgraph-node-id {qgraphNodeId}
--clobber-outputs
--skip-init-writes
--extend-run {extraRunQuantumOptions}
# Job environment setup
custom_lsst_setup: ""
setupLSSTEnv: >
source /opt/lsst/software/stack/loadLSST.bash;
setup lsst_distrib;
{custom_lsst_setup}
# This is a series of setup commands preceding the actual core SW execution with running prmon (memory monitor)
runnerCommand: >
logDir=/tmp/panda/${PANDAID};
mkdir ${logDir};
# lsst Docker image location in GAR (Google Artifact Registry)
# Must end with slash. Override with empty string to use images from the Docker hub instead.
dockerImageLocation: "us-central1-docker.pkg.dev/panda-dev-1a74/"
payloadFolder: payload
fileDistributionEndPoint: "s3://butler-us-central1-panda-dev/dc2/{payloadFolder}/{uniqProcName}/"
s3EndpointUrl: "https://storage.googleapis.com"
jobLogDir: "/tmp/panda/${PANDAID}"
jobInitDir: "/tmp"
jobContainer: >
logFile=${logDir}/${REALTIME_LOGFILES};
touch ${logFile};
chmod ugo+w ${logFile} ${logDir}; ln -s ${logFile} ./;
chmod ugo+w ${logFile}; ln -s ${logFile} ./;
docker run
--rm
-v ${logDir}:${logDir}
Expand All @@ -77,19 +40,16 @@ runnerCommand: >
--env AWS_SECRET_ACCESS_KEY=$(</credentials/AWS_SECRET_ACCESS_KEY)
--env PGPASSWORD=$(</credentials/PGPASSWORD)
--env S3_ENDPOINT_URL=${S3_ENDPOINT_URL} {sw_image}
/bin/bash -c "source /opt/lsst/software/stack/loadLSST.bash;
cd /tmp;
ls -al;
setup lsst_distrib;
pwd;
prmon
-i 5
-f ${logDir}/prmon.txt
-j ${logDir}/prmon.json
--
python3 \${CTRL_BPS_PANDA_DIR}/python/lsst/ctrl/bps/panda/edgenode/cmd_line_decoder.py _cmd_line_;" >&2;
retStat=$?;
ln -fs ${logDir}/prmon.txt ./memory_monitor_output.txt;
ln -fs ${logDir}/prmon.json ./memory_monitor_summary.json;
exit $retStat
wmsServiceClass: lsst.ctrl.bps.panda.PanDAService
/bin/bash -c "{payloadCommand}" >&2;
jobCleanup: ""

# IDF is configured with many PanDA queues
# non-preemption is currently not a feature currently
# matched when selecting destination queue automatically.
executionButler:
queue: "DOMA_LSST_GOOGLE_MERGE"

pipetask:
forcedPhotCoadd:
queue: "DOMA_LSST_GOOGLE_TEST_HIMEM_NON_PREEMPT"
30 changes: 0 additions & 30 deletions config/bps_idf_new.yaml

This file was deleted.

77 changes: 24 additions & 53 deletions config/bps_panda.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# PANDA plugin specific settings:
wmsServiceClass: lsst.ctrl.bps.panda.PanDAService
placeholderParams: ['qgraphNodeId', 'qgraphId']
defaultPreCmdOpts: "--long-log --log-level=VERBOSE --log-file payload-log.json"
qgraphPreCmdOpts: "--long-log --log-level=VERBOSE" # don't need payload-log.json on submit side.

# Limit the number of jobs in a single PanDA task
maxJobsPerTask: 30000
Expand All @@ -14,58 +16,18 @@ priority: 500
# Default number of retries
numberOfRetries: 3

pipetask:
pipetaskInit:
# This is different from the ctrl_bps default only in the addition of {fileDistributionEndPoint}
runQuantumCommand: >
${CTRL_MPEXEC_DIR}/bin/pipetask {initPreCmdOpts} run
-b {butlerConfig}
-i {inCollection}
-o {output}
--output-run {outputRun}
--qgraph {fileDistributionEndPoint}/{qgraphFile}
--qgraph-id {qgraphId} --qgraph-node-id {qgraphNodeId}
--clobber-outputs
--init-only
--extend-run {extraInitOptions}
# This is different from the ctrl_bps default only in the addition of {fileDistributionEndPoint}
runQuantumCommand: >
${CTRL_MPEXEC_DIR}/bin/pipetask {runPreCmdOpts} run
-b {butlerConfig}
-i {inCollection}
-o {output}
--output-run {outputRun}
--qgraph {fileDistributionEndPoint}/{qgraphFile}
--qgraph-id {qgraphId}
--qgraph-node-id {qgraphNodeId}
--clobber-outputs
--skip-init-writes
--extend-run {extraRunQuantumOptions}
# Job values which sites need to define for runnerCommand:
# custom_lsst_setup - user defined special environments
# setupLSSTEnv - how to get lsst_distrib set up
# jobLogDir - where prmon and other non-payload logs go
# jobInitDir - directory were should be when job starts
# jobContainer - any commands needed to set up container
# jobCleanup - any cleanup commands to run after job

# This is a series of setup commands preceding the actual core SW execution with running prmon (memory monitor)
runnerCommand: >
logDir=/tmp/panda/${PANDAID};
mkdir ${logDir};
logFile=${logDir}/${REALTIME_LOGFILES};
touch ${logFile};
chmod ugo+w ${logFile} ${logDir}; ln -s ${logFile} ./;
file=${logDir}/memory_monitor_output.txt; touch ${file}; chmod ugo+w ${file}; ln -s ${file} ./;
file=${logDir}/memory_monitor_summary.json; touch ${file}; chmod ugo+w ${file}; ln -s ${file} ./;
docker run
--rm
-v ${logDir}:${logDir}
-v ${logFile}:/tmp/${REALTIME_LOGFILES}
--network host
--privileged
--env AWS_ACCESS_KEY_ID=$(</credentials/AWS_ACCESS_KEY_ID)
--env AWS_SECRET_ACCESS_KEY=$(</credentials/AWS_SECRET_ACCESS_KEY)
--env PGPASSWORD=$(</credentials/PGPASSWORD)
--env S3_ENDPOINT_URL=${S3_ENDPOINT_URL} {sw_image}
/bin/bash -c "source /opt/lsst/software/stack/loadLSST.bash;
cd /tmp;
payloadCommand: >
cd {jobInitDir};
ls -al;
setup lsst_distrib;
{setupLSSTEnv}
pwd;
python3 \${CTRL_BPS_PANDA_DIR}/python/lsst/ctrl/bps/panda/edgenode/cmd_line_decoder.py _cmd_line_ & pJob=\$!;
prmon -i 5
Expand All @@ -75,8 +37,17 @@ runnerCommand: >
wait \$pJob;
ret=\$?;
wait \$mJob;
exit \$ret;" >&2;
{jobCleanup}
exit \$ret;
runnerCommand: >
logDir={jobLogDir};
mkdir -p ${logDir};
chmod ugo+w ${logDir};
file=${logDir}/memory_monitor_output.txt; touch ${file}; chmod ugo+w ${file};
if [ ! -e memory_monitor_output.txt ]; then ln -s ${file} ./; fi;
file=${logDir}/memory_monitor_summary.json; touch ${file}; chmod ugo+w ${file};
if [ ! -e memory_monitor_summary.json ]; then ln -s ${file} ./; fi;
{jobContainer}
retStat=$?;
exit $retStat
wmsServiceClass: lsst.ctrl.bps.panda.PanDAService
37 changes: 15 additions & 22 deletions config/bps_usdf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,39 +7,32 @@ project: dev
campaign: quick
computeCloud: US
computeSite: SLAC
requestMemory: 2048
# PanDA does the scheduling based on memory request
s3EndpointUrl: "https://storage.googleapis.com"
payloadFolder: payload
fileDistributionEndPoint: "file:///sdf/group/rubin/sandbox/{operator}/panda_cache_box/{payloadFolder}/{uniqProcName}/"

# location of main butler repo at USDF
payload:
butlerConfig: /sdf/group/rubin/repo/main

# Job environment setup
custom_lsst_setup: ""
setupLSSTEnv: >
unset PYTHONPATH;
source /cvmfs/sw.lsst.eu/linux-x86_64/lsst_distrib/{LSST_VERSION}/loadLSST.bash;
setup lsst_distrib;
{custom_lsst_setup}
# To override the IDF runnerCommand. It can be dropped when a universal runnerCommand is available in bps_panda.yaml
runnerCommand: >
unset PYTHONPATH;
{setupLSSTEnv}
pwd; ls -al;
python3 ${CTRL_BPS_PANDA_DIR}/python/lsst/ctrl/bps/panda/edgenode/cmd_line_decoder.py _cmd_line_ & pJob=$!;
prmon -i 5
-f memory_monitor_output.txt
-j memory_monitor_summary.json
-p $pJob & mJob=$!;
wait $pJob;
retStat=$?;
wait $mJob;
rm -fr EXEC_REPO-*;
exit $retStat
# Other job variables
jobInitDir: "`pwd`"
jobLogDir: "{jobInitDir}"
jobContainer: >
/bin/bash -c "{payloadCommand}" >&2;
jobCleanup: "rm -fr EXEC_REPO-*;"

payload:
s3EndpointUrl: "https://storage.googleapis.com"
butlerConfig: /sdf/group/rubin/repo/main
payloadFolder: payload
fileDistributionEndPoint: "file:///sdf/group/rubin/sandbox/{operator}/panda_cache_box/{payloadFolder}/{uniqProcName}/"

# Specify memory request for executionButler, pipetaskInit and forcedPhotCoadd, placeholder for now
requestMemory: 2048 # PanDA does the scheduling based on memory request
executionButler:
requestMemory: 7000
queue: "SLAC_Rubin_Merge"
Expand Down
3 changes: 3 additions & 0 deletions doc/changes/DM-38307.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Update some default yaml values to more easily allow parts to be
modified as well as provide values to go with the updated bps
default yaml (e.g., no longer need runQuantumCommands).
5 changes: 3 additions & 2 deletions python/lsst/ctrl/bps/panda/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,9 @@ def add_decoder_prefix(config, cmd_line, distribution_path, files):
_LOG.debug("files_plc_hldr[%s] = %s", key, files_plc_hldr[key])

cmdline_hex = convert_exec_string_to_hex(cmd_line)
_, decoder_prefix = config.search("runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False})
decoder_prefix = decoder_prefix.replace(
_, runner_command = config.search("runnerCommand", opt={"replaceEnvVars": False, "expandEnvVars": False})
runner_command = runner_command.replace("\n", " ")
decoder_prefix = runner_command.replace(
"_cmd_line_",
str(cmdline_hex)
+ " ${IN/L} "
Expand Down

0 comments on commit fe2d0b7

Please sign in to comment.