Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-18352: update allocateNodes.py with ability to start HTCondor with partitionable slots #12

Merged
merged 6 commits into from
Mar 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
sudo: false
language: python
matrix:
include:
- python: '3.6'
install:
- pip install flake8
script: flake8
6 changes: 2 additions & 4 deletions bin.src/allocateNodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,11 @@
from __future__ import print_function
import sys
import os
import shutil
import lsst.utils
from lsst.ctrl.execute.namedClassFactory import NamedClassFactory
from lsst.ctrl.execute.allocator import Allocator
from lsst.ctrl.execute.allocatorParser import AllocatorParser
from lsst.ctrl.execute.condorConfig import CondorConfig
from lsst.ctrl.execute import envString
from string import Template


def main():
Expand All @@ -53,13 +50,14 @@ def main():

# create the plugin class
schedulerName = configuration.platform.scheduler
schedulerClass = NamedClassFactory.createClass("lsst.ctrl.execute." + schedulerName +"Plugin")
schedulerClass = NamedClassFactory.createClass("lsst.ctrl.execute." + schedulerName + "Plugin")

# create the plugin
scheduler = schedulerClass(platform, p.getArgs(), configuration, "$HOME/.lsst/condor-info.py")

# submit the request
scheduler.submit(platform, platformPkgDir)


if __name__ == "__main__":
main()
5 changes: 3 additions & 2 deletions bin.src/runOrca.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def main():

platformPkgDir = lsst.utils.getPackageDir("ctrl_platform_"+creator.platform)
if args.platformConfig is None:
configFileName = os.path.join(platformPkgDir,
"etc", "config", "execConfig.py")
configFileName = os.path.join(platformPkgDir,
"etc", "config", "execConfig.py")
else:
configFileName = args.platformConfig

Expand Down Expand Up @@ -68,5 +68,6 @@ def main():
os.execvp(cmd_split[0], cmd_split)
os.wait()[0]


if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion etc/scripts/generateDag.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ def writeDagFile(pipeline, templateFile, infile, workerdir, prescriptFile, runid
#
# Searching for a space detects
# extended input like : visit=887136081 raft=2,2 sensor=0,1
# If there is no space, the dataid is something simple like a skytile id
# If there is no space, the dataid is something simple like
# a skytile id
newData = myData
visit = str(count // 100)

Expand Down Expand Up @@ -214,5 +215,6 @@ def main():

sys.exit(0)


if __name__ == '__main__':
main()
3 changes: 2 additions & 1 deletion python/lsst/ctrl/execute/allocationConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class AllocatedPlatformConfig(pexConfig.Config):

class AllocationConfig(pexConfig.Config):
"""A pex_config file describing the platform specific information required
to fill out a scheduler file which will be used to submit a scheduler request.
to fill out a scheduler file which will be used to submit a scheduler
request.
"""
# this is done on two levels instead of one for future expansion of this
# config class, which may require local attributes to be specified.
Expand Down
41 changes: 25 additions & 16 deletions python/lsst/ctrl/execute/allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,22 @@
from __future__ import print_function
from builtins import str
from builtins import object
import os, sys
import os
import sys
import pwd
from datetime import datetime
from string import Template
from lsst.ctrl.execute import envString
from lsst.ctrl.execute.allocationConfig import AllocationConfig
from lsst.ctrl.execute.condorConfig import CondorConfig
from lsst.ctrl.execute.condorInfoConfig import CondorInfoConfig
from lsst.ctrl.execute.templateWriter import TemplateWriter
from lsst.ctrl.execute.seqFile import SeqFile


class Allocator(object):
"""A class which consolidates allocation pex_config information with override
information (obtained from the command line) and produces a PBS file using
these values.
"""A class which consolidates allocation pex_config information with
override information (obtained from the command line) and produces a
PBS file using these values.

Parameters
----------
Expand Down Expand Up @@ -98,7 +98,7 @@ def __init__(self, platform, opts, configuration, condorInfoFileName):
self.commandLineDefaults = {}

self.commandLineDefaults["NODE_COUNT"] = self.opts.nodeCount
self.commandLineDefaults["SLOTS"] = self.opts.slots
self.commandLineDefaults["CPUS"] = self.opts.cpus
self.commandLineDefaults["WALL_CLOCK"] = self.opts.maximumWallClock

self.commandLineDefaults["QUEUE"] = self.opts.queue
Expand Down Expand Up @@ -151,8 +151,8 @@ def load(self):
self.defaults["SCHEDULER"] = self.configuration.platform.scheduler

def loadAllocationConfig(self, name, suffix):
"""Loads all values from allocationConfig and command line overrides into
data structures suitable for use by the TemplateWriter object.
"""Loads all values from allocationConfig and command line overrides
into data structures suitable for use by the TemplateWriter object.
"""
resolvedName = envString.resolve(name)
allocationConfig = AllocationConfig()
Expand Down Expand Up @@ -208,7 +208,7 @@ def loadAllocationConfig(self, name, suffix):
self.defaults["GENERATED_CONFIG"] = os.path.basename(self.condorConfigFileName)
self.defaults["CONFIGURATION_ID"] = self.uniqueIdentifier
return allocationConfig

def createSubmitFile(self, inputFile):
"""Creates a PBS file using the file "input" as a Template

Expand Down Expand Up @@ -313,11 +313,11 @@ def getNodes(self):
"""
return self.getParameter("NODE_COUNT")

def getSlots(self):
"""Accessor for SLOTS
@return the value of SLOTS
def getCPUs(self):
"""Accessor for CPUS
@return the value of CPUS
"""
return self.getParameter("SLOTS")
return self.getParameter("CPUS")

def getWallClock(self):
"""Accessor for WALL_CLOCK
Expand Down Expand Up @@ -350,14 +350,23 @@ def getParameter(self, value):

def printNodeSetInfo(self):
nodes = self.getNodes()
slots = self.getSlots()
cpus = self.getCPUs()
wallClock = self.getWallClock()
nodeString = ""

if int(nodes) > 1:
nodeString = "s"
print("%s node%s will be allocated on %s with %s slots per node and maximum time limit of %s" %
(nodes, nodeString, self.platform, slots, wallClock))
if self.opts.dynamic is None:
print("%s node%s will be allocated on %s with %s cpus per node and maximum time limit of %s" %
(nodes, nodeString, self.platform, cpus, wallClock))
elif self.opts.dynamic == '__default__':
print("%s node%s will be allocated on %s using default dynamic slots configuration \
with %s cpus per node and maximum time limit of %s" %
(nodes, nodeString, self.platform, cpus, wallClock))
else:
print("%s node%s will be allocated on %s using dynamic slot block specified in \
'%s' with %s cpus per node and maximum time limit of %s" %
(nodes, nodeString, self.platform, self.opts.dynamic, cpus, wallClock))
print("Node set name:")
print(self.getNodeSetName())

Expand Down
10 changes: 6 additions & 4 deletions python/lsst/ctrl/execute/allocatorParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ def parseArgs(self, basename):
parser.add_argument("platform", help="node allocation platform")
parser.add_argument("-n", "--node-count", action="store", default=None,
dest="nodeCount", help="number of nodes to use", type=int, required=True)
parser.add_argument("-s", "--slots", action="store", default=None, dest="slots",
help="slots per node", type=int, required=True)

parser.add_argument("-c", "--cpus", action="store", default=None, dest="cpus",
help="cpus per node (WAS '-s' (--slots) option)", type=int, required=True)
parser.add_argument("-m", "--maximum-wall-clock", action="store", dest="maximumWallClock",
default=None, help="maximum wall clock time", type=str, required=True)
parser.add_argument("-N", "--node-set", action="store",
Expand All @@ -82,7 +81,10 @@ def parseArgs(self, basename):
parser.add_argument("-g", "--glidein-shutdown", action="store", dest="glideinShutdown",
type=int, default=None, help="glide-in inactivity shutdown time in seconds")
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose")
parser.add_argument("-r", "--reservation", action="store", dest="reservation", default=None, help="run id")
parser.add_argument("-r", "--reservation", action="store", dest="reservation",
default=None, help="run id")
parser.add_argument("-d", "--dynamic", const='__default__', nargs='?', action="store",
dest="dynamic", type=str, default=None, help="configure to use dynamic slots")

self.args = parser.parse_args()

Expand Down
3 changes: 2 additions & 1 deletion python/lsst/ctrl/execute/condorConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class PlatformConfig(pexConfig.Config):
scheduler = pexConfig.Field(doc="scheduler type", dtype=str, default=None)
manager = pexConfig.Field(doc="workflow manager", dtype=str, default=None)
setup_using = pexConfig.Field(doc="environment setup type", dtype=str, default=None)
manager_software_home = pexConfig.Field(doc="location of workflow manager software", dtype=str, default=None)
manager_software_home = pexConfig.Field(doc="location of workflow manager software",
dtype=str, default=None)


class CondorConfig(pexConfig.Config):
Expand Down
1 change: 1 addition & 0 deletions python/lsst/ctrl/execute/condorInfoConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class CondorInfoConfig(pexConfig.Config):
"""
platform = pexConfig.ConfigChoiceField("platform info", FakeTypeMap(UserConfig))


if __name__ == "__main__":
config = CondorInfoConfig()
filename = "$HOME/.lsst/condor-info.py"
Expand Down
26 changes: 15 additions & 11 deletions python/lsst/ctrl/execute/configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@ def __init__(self, opts, configFileName):

self.platform = self.opts.platform

# Look up the user's name and home directory in the $HOME//.lsst/condor-info.py file
# If the platform is lsst, and the user_name or user_home is not in there, then default to
# user running this command and the value of $HOME, respectively.
# Look up the user's name and home directory in the
# $HOME//.lsst/condor-info.py file. If the platform
# is lsst, and the user_name or user_home is not in
# there, then default to user running this comman
# and the value of $HOME, respectively.
user_name = None
user_home = None
for name in list(condorInfoConfig.platform.keys()):
Expand All @@ -75,7 +77,8 @@ def __init__(self, opts, configFileName):
# If we're on the lsst platform and the condorInfoConfig didn't
# have an entry for lsst user name and home, set to reasonable values
# These really do need to be set for all the other platforms, since
# while the user name may be the same, it's unlikely the home directory will be.
# while the user name may be the same, it's unlikely the home
# directory will be.
if self.platform == "lsst":
if user_name is None:
user_name = pwd.getpwuid(os.geteuid()).pw_name
Expand Down Expand Up @@ -142,12 +145,12 @@ def getGenericConfigFileName(self):
executePkgDir = lsst.utils.getPackageDir('ctrl_execute')

name = "config_with_%s.py.template" % self.setup_using
genericConfigName = os.path.join(executePkgDir,
"etc", "templates", self.manager, name)
genericConfigName = os.path.join(executePkgDir,
"etc", "templates", self.manager, name)
if os.path.exists(genericConfigName):
return genericConfigName
raise RuntimeError("File %s not found; check etc/templates." %
genericConfigName)
raise RuntimeError("File %s not found; check etc/templates." %
genericConfigName)

def createRunId(self):
"""create a unique runid
Expand Down Expand Up @@ -215,7 +218,8 @@ def load(self, name):
USER_NAME=self.commandLineDefaults["USER_NAME"])

tempLocalScratch = Template(configuration.platform.localScratch)
self.defaults["LOCAL_SCRATCH"] = tempLocalScratch.substitute(USER_NAME=self.commandLineDefaults["USER_NAME"])
self.defaults["LOCAL_SCRATCH"] = \
tempLocalScratch.substitute(USER_NAME=self.commandLineDefaults["USER_NAME"])
self.defaults["IDS_PER_JOB"] = configuration.platform.idsPerJob
self.defaults["DATA_DIRECTORY"] = envString.resolve(configuration.platform.dataDirectory)
self.defaults["FILE_SYSTEM_DOMAIN"] = configuration.platform.fileSystemDomain
Expand Down Expand Up @@ -261,8 +265,8 @@ def isVerbose(self):

def getParameter(self, value):
"""Accessor for generic value
@return None if value is not set. Otherwise, use the comand line override
(if set), or the default Config value
@return None if value is not set. Otherwise, use the comand line
override (if set), or the default Config value
"""
if value in self.commandLineDefaults:
return self.commandLineDefaults[value]
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/ctrl/execute/envString.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@


def resolve(strVal):
p = re.compile('\$[a-zA-Z0-9_]+')
p = re.compile(r'\$[a-zA-Z0-9_]+')
retVal = strVal
exprs = p.findall(retVal)
for i in exprs:
Expand Down
12 changes: 7 additions & 5 deletions python/lsst/ctrl/execute/namedClassFactory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,29 @@
# the GNU General Public License along with this program. If not,
# see <http://www.lsstcorp.org/LegalNotices/>.
#

from builtins import object



class NamedClassFactory(object):
"""Create a new "name" class object

Parameters
----------
name : `str`
the fully qualified name of an object

Returns
-------
classobj : `object`
an object of the specified name
"""

def createClass(name):
dot = name.rindex('.')
pack = name[0:dot]
modname = name[dot+1:]
modname = modname[0].capitalize()+modname[1:]
# -1 is no longer accepted in python 3
# module = __import__(name, globals(), locals(), [modname], -1)
module = __import__(name, globals(), locals(), [modname], 0)
Expand Down