Skip to content

Commit

Permalink
Add a script to generate predictions by submitting to an SGE grid
Browse files Browse the repository at this point in the history
  • Loading branch information
kboone committed May 20, 2019
1 parent c35d744 commit afe22d2
Showing 1 changed file with 109 additions and 0 deletions.
109 changes: 109 additions & 0 deletions scripts/avocado_predict_submit
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python
"""Submit jobs to an SGE queue to generate predictions for a dataset using
avocado
This requires that avocado be installed and that the avocado_predict script
is on the PATH.
"""

import argparse
import os
import subprocess

import avocado

sge_template = """
#!/bin/bash
#$ -V
#$ -S /bin/bash
#$ -N {job_name}
#$ -o {jobs_directory}/{job_name}.out
#$ -e {jobs_directory}/{job_name}.err
# Use a single core for each job. This parallelizes better than trying to use
# multiple cores per job.
export MKL_NUM_THREADS=1
export NUMEXPR_NUM_THREADS=1
export OMP_NUM_THREADS=1
cd {working_directory}
avocado_predict \\
{dataset} \\
{classifier} \\
--chunk {job} \\
--num_chunks {num_jobs} \\
"""

if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'dataset',
help='Name of the dataset to generate predictions for.'
)
parser.add_argument(
'classifier',
help='Name of the classifier to use.'
)
parser.add_argument(
'--num_jobs',
type=int,
default=100,
help='Number of jobs to submit to process the dataset. '
'(default: %(default)s)',
)
parser.add_argument(
'--working_directory',
default=None,
help='Working directory. Default is the current directory.'
)
parser.add_argument(
'--jobs_directory',
default=None,
help='Jobs directory for qsub scripts and output. Default is '
'"[working_directory]/jobs/predict_[dataset]/"'
)
parser.add_argument(
'--qsub_arguments',
default='',
help='Additional arguments to pass to qsub'
)

raw_args = parser.parse_args()

# Build a dictionary with the arguments that will be used to format the
# submit script.
args = vars(raw_args).copy()

# Update the working directory if it wasn't set.
if args['working_directory'] is None:
args['working_directory'] = os.getcwd()

# Update the log directory if it wasn't set and make sure that it exists.
if args['jobs_directory'] is None:
args['jobs_directory'] = os.path.join(
args['working_directory'], 'jobs',
'predict_%s_%s' % (args['dataset'], args['classifier'])
)
os.makedirs(args['jobs_directory'], exist_ok=True)


# Create and submit the jobs one by one
for job_id in range(args['num_jobs']):
job_args = args.copy()

job_args['job'] = job_id

job_name = 'predict_%04d_%s_%s' % (job_id, args['dataset'],
args['classifier'])
job_args['job_name'] = job_name

job_path = '{jobs_directory}/{job_name}.sh'.format(**job_args)

job_template = sge_template.format(**job_args)

# Write the jobs file
with open(job_path, 'w') as job_file:
job_file.write(job_template)

# Submit the job
subprocess.call(["qsub"] + args['qsub_arguments'].split() + [job_path])

0 comments on commit afe22d2

Please sign in to comment.