Skip to content

Commit

Permalink
Add featurize scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
kboone committed May 17, 2019
1 parent 0434f03 commit ce913fa
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 4 deletions.
4 changes: 2 additions & 2 deletions avocado/astronomical_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ def grad_neg_ln_like(p):
# Fit failed. Print out a warning, and use the initial guesses for
# fit parameters. This only really seems to happen for objects
# where the lightcurve is almost entirely noise.
logger.warn("GP fit failed for %s! Using guessed GP parameters." %
self)
logger.warn("GP fit failed for %s! Using guessed GP parameters. "
"This is usually OK." % self)
gp.set_parameter_vector(guess_parameters)

if verbose:
Expand Down
9 changes: 7 additions & 2 deletions scripts/avocado_augment_submit
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Submit jobs to an SGE queue to augment a dataset using avocado
This requires that avocado be installed and that the avocado_augment script is
on the PATH of the submitted job.
on the PATH.
"""

import argparse
Expand Down Expand Up @@ -69,6 +69,11 @@ if __name__ == "__main__":
help='Jobs directory for qsub scripts and output. Default is '
'"[working_directory]/jobs/[augmented_dataset]/"'
)
parser.add_argument(
'--qsub_arguments',
default='',
help='Additional arguments to pass to qsub'
)

raw_args = parser.parse_args()

Expand Down Expand Up @@ -106,4 +111,4 @@ if __name__ == "__main__":
job_file.write(job_template)

# Submit the job
subprocess.call(["qsub", job_path])
subprocess.call(["qsub"] + args['qsub_arguments'].split() + [job_path])
59 changes: 59 additions & 0 deletions scripts/avocado_featurize
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python
"""Featurize a dataset using avocado"""

import argparse

import avocado


if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'dataset',
help='Name of the dataset to featurize.'
)
parser.add_argument(
'--chunk',
type=int,
default=None,
help='If set, process the dataset by chunks. This sets the chunk '
'number of the dataset to use.',
)
parser.add_argument(
'--num_chunks',
type=int,
default=100,
help='If chunk is set, this is the total number of chunks to use for '
'processing the dataset. (default: %(default)s)',
)
parser.add_argument(
'--tag',
default=avocado.settings['features_tag'],
help='The tag to use for these features. The default is set in '
'avocado_settings.json. (default: %(default)s',
)

args = parser.parse_args()

# Load the reference dataset
print("Loading dataset...")
dataset = avocado.load(
args.dataset,
chunk=args.chunk,
num_chunks=args.num_chunks,
)

# Load the featurizer. For now, we only have the PLAsTiCC featurizer
# although this could be an option in the future.
print("Loading featurizer...")
featurizer = avocado.plasticc.PlasticcFeaturizer()

# Featurize the dataset
print("Featurizing the dataset...")
dataset.extract_raw_features(featurizer)

# Save the features.
print("Saving the features...")
dataset.write_raw_features(tag=args.tag)

print("Done!")
115 changes: 115 additions & 0 deletions scripts/avocado_featurize_submit
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env python
"""Submit jobs to an SGE queue to featurize a dataset using avocado
This requires that avocado be installed and that the avocado_featurize script
is on the PATH.
"""

import argparse
import os
import subprocess

import avocado

sge_template = """
#!/bin/bash
#$ -V
#$ -S /bin/bash
#$ -N {job_name}
#$ -o {jobs_directory}/{job_name}.out
#$ -e {jobs_directory}/{job_name}.err
# Use a single core for each job. This parallelizes better than trying to use
# multiple cores per job.
export MKL_NUM_THREADS=1
export NUMEXPR_NUM_THREADS=1
export OMP_NUM_THREADS=1
cd {working_directory}
avocado_featurize \\
{dataset} \\
--chunk {job} \\
--num_chunks {num_jobs} \\
--tag {tag}
"""

if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'dataset',
help='Name of the dataset to featurize.'
)
parser.add_argument(
'--chunk',
type=int,
default=None,
help='If set, process the dataset by chunks. This sets the chunk '
'number of the dataset to use.',
)
parser.add_argument(
'--num_jobs',
type=int,
default=100,
help='Number of jobs to submit to process the dataset. '
'(default: %(default)s)',
)
parser.add_argument(
'--tag',
default=avocado.settings['features_tag'],
help='The tag to use for these features. The default is set in '
'avocado_settings.json. (default: %(default)s',
)
parser.add_argument(
'--working_directory',
default=None,
help='Working directory. Default is the current directory.'
)
parser.add_argument(
'--jobs_directory',
default=None,
help='Jobs directory for qsub scripts and output. Default is '
'"[working_directory]/jobs/featurize_[dataset]/"'
)
parser.add_argument(
'--qsub_arguments',
default='',
help='Additional arguments to pass to qsub'
)

raw_args = parser.parse_args()

# Build a dictionary with the arguments that will be used to format the
# submit script.
args = vars(raw_args).copy()

# Update the working directory if it wasn't set.
if args['working_directory'] is None:
args['working_directory'] = os.getcwd()

# Update the log directory if it wasn't set and make sure that it exists.
if args['jobs_directory'] is None:
args['jobs_directory'] = os.path.join(
args['working_directory'], 'jobs', 'featurize_%s' % args['dataset']
)
os.makedirs(args['jobs_directory'], exist_ok=True)


# Create and submit the jobs one by one
for job_id in range(args['num_jobs']):
job_args = args.copy()

job_args['job'] = job_id

job_name = 'featurize_%04d_%s' % (job_id, args['dataset'])
job_args['job_name'] = job_name

job_path = '{jobs_directory}/{job_name}.sh'.format(**job_args)

job_template = sge_template.format(**job_args)

# Write the jobs file
with open(job_path, 'w') as job_file:
job_file.write(job_template)

# Submit the job
subprocess.call(["qsub"] + args['qsub_arguments'].split() + [job_path])

0 comments on commit ce913fa

Please sign in to comment.