-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #99 from roocs/cli
Added features for using `daops` with EOEPCA: - command-line interface and tests: `daops/cli.py` and `tests/test_cli.py` - Docker file: `Dockerfile` - CWL file: `app-package.cwl`
- Loading branch information
Showing
17 changed files
with
1,029 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
##================================================================================= | ||
## | ||
## EXAMPLE USAGE | ||
## | ||
## $ docker build -t daops . | ||
## $ mkdir ~/container-outputs | ||
## $ docker run -it \ | ||
## --mount type=bind,source=$HOME/container-outputs,target=/outputs \ | ||
## daops | ||
## | ||
## # id=cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga | ||
## # path=/root/.mini-esgf-data/test_data/badc/cmip5/data/$(echo $id | tr / .) | ||
## # ncdump -h $path/*.nc | grep UNLIMITED | ||
## time = UNLIMITED ; // (1140 currently) | ||
## # rm /outputs/*.nc | ||
## # daops subset --output-dir /outputs --time=2010-1-1/2015-1-1 $id | ||
## # ncdump -h /outputs/*.nc | grep UNLIMITED | ||
## time = UNLIMITED ; // (60 currently) | ||
## # exit | ||
## | ||
## $ ls ~/container-outputs/ | ||
## zostoga_mon_inmcm4_rcp45_r1i1p1_20100116-20141216.nc | ||
## | ||
##================================================================================= | ||
|
||
FROM ubuntu:20.04 | ||
|
||
SHELL ["/bin/bash", "-c"] | ||
|
||
ENV BASH_ENV=~/.bashrc \ | ||
MAMBA_ROOT_PREFIX=/srv/conda \ | ||
PATH=$PATH:/srv/conda/envs/daops/bin | ||
|
||
|
||
# ==== Install apt-packages and micromamba ==== | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y ca-certificates ttf-dejavu file wget bash bzip2 git && \ | ||
wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba --strip-components=1 && \ | ||
./micromamba shell init -s bash -p ~/micromamba && \ | ||
apt-get clean autoremove --yes && \ | ||
cp ./micromamba /usr/bin && \ | ||
rm -fr /srv/conda/pkgs | ||
|
||
|
||
# ==== Set up conda environment from yml file ==== | ||
|
||
ARG tmp_env=/tmp/environment.yml | ||
ADD environment.yml $tmp_env | ||
RUN micromamba create -f $tmp_env && \ | ||
rm -fr $tmp_env /srv/conda/pkgs | ||
|
||
|
||
# ==== Clone the data repo ==== | ||
|
||
ARG data_dir=/root/.mini-esgf-data | ||
ARG data_repo_url=https://github.com/roocs/mini-esgf-data | ||
ARG data_repo_branch=master | ||
RUN git clone $data_repo_url $data_dir && \ | ||
cd $data_dir && \ | ||
git checkout $data_repo_branch && \ | ||
rm -fr .git | ||
|
||
|
||
# ==== Set up the roocs.ini file with paths pointing to the data repo ==== | ||
# ==== and ensure that ROOCS_CONFIG environment variable points to it ==== | ||
|
||
ARG config_file=/root/roocs.ini | ||
ARG config_tmpl=/tmp/roocs.ini.tmpl | ||
COPY roocs.ini.tmpl $config_tmpl | ||
RUN sed "s,DATA_DIR,$data_dir,g" $config_tmpl > $config_file && \ | ||
rm $config_tmpl && \ | ||
echo "export ROOCS_CONFIG=$config_file" >> /root/.bashrc | ||
|
||
|
||
# ==== Install the daops app ==== | ||
|
||
ARG tmp_install_dir=/tmp/daops-install | ||
RUN mkdir $tmp_install_dir | ||
COPY . $tmp_install_dir | ||
RUN cd $tmp_install_dir && \ | ||
/srv/conda/envs/daops/bin/python setup.py install && \ | ||
rm -fr $tmp_install_dir && \ | ||
echo "export USE_PYGEOS=0" >> /root/.bashrc | ||
|
||
# ==== Create a directory that we can bind-mount ==== | ||
RUN mkdir /outputs | ||
|
||
|
||
# ==== Some tidying up (NB further apt-install not possible after this) ==== | ||
|
||
RUN rm -fr /var/lib/{apt,dpkg,cache,log} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
$graph: | ||
|
||
- class: Workflow | ||
doc: Runs daops subsetting process | ||
id: daops | ||
requirements: | ||
- class: ScatterFeatureRequirement | ||
inputs: | ||
area: | ||
doc: Area | ||
label: Area | ||
type: string[] | ||
time: | ||
doc: Time | ||
label: Time | ||
type: string[] | ||
time_components: | ||
doc: Time Components | ||
label: Time Components | ||
type: string[] | ||
level: | ||
doc: Level | ||
label: Level | ||
type: string[] | ||
output_format: | ||
doc: Output Format | ||
label: Output Format | ||
type: string[] | ||
file_namer: | ||
doc: File Namer | ||
label: File Namer | ||
type: string[] | ||
output_dir: | ||
doc: Output dir | ||
label: Output dir | ||
type: string[] | ||
collection: | ||
doc: Collection | ||
label: Collection | ||
type: string[] | ||
label: data-aware operations (daops) | ||
outputs: | ||
- id: wf_outputs | ||
outputSource: | ||
- step_1/results | ||
type: | ||
Directory[] | ||
|
||
steps: | ||
step_1: | ||
in: | ||
area: area | ||
time: time | ||
time_components: time_components | ||
level: level | ||
output_format: output_format | ||
file_namer: file_namer | ||
output_dir: output_dir | ||
collection: collection | ||
out: | ||
- results | ||
run: '#clt' | ||
scatter: [area, time, time_components, level, output_format, file_namer, output_dir, collection] | ||
scatterMethod: flat_crossproduct | ||
|
||
- baseCommand: daops | ||
class: CommandLineTool | ||
|
||
id: clt | ||
|
||
arguments: | ||
- --area | ||
- valueFrom: $( inputs.area ) | ||
- --time | ||
- valueFrom: ${ inputs.time } | ||
- --time-components | ||
- valueFrom: ${ inputs.time_components } | ||
- --levels | ||
- valueFrom: ${ inputs.levels } | ||
- --output-format | ||
- valueFrom: ${ inputs.output_format } | ||
- --file-namer | ||
- valueFrom: ${ inputs.file_namer } | ||
- --output-dir | ||
- valueFrom: ${ inputs.output_dir } | ||
- --collection | ||
- valueFrom: ${ inputs.collection } | ||
|
||
inputs: | ||
area: | ||
type: string | ||
time: | ||
type: string | ||
time_components: | ||
type: string | ||
level: | ||
type: string | ||
output_format: | ||
type: string | ||
file_namer: | ||
type: string | ||
output_dir: | ||
type: string | ||
collection: | ||
type: string | ||
|
||
outputs: | ||
results: | ||
outputBinding: | ||
glob: . | ||
type: Directory | ||
requirements: | ||
EnvVarRequirement: | ||
envDef: | ||
PATH: /bin:/srv/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin | ||
ResourceRequirement: {} | ||
InlineJavascriptRequirement: {} | ||
DockerRequirement: | ||
dockerPull: iwi/daops:0.0.1 | ||
#stderr: std.err | ||
#stdout: std.out | ||
|
||
cwlVersion: v1.0 | ||
|
||
$namespaces: | ||
s: https://schema.org/ | ||
s:softwareVersion: 0.3.0 | ||
schemas: | ||
- http://schema.org/version/9.0/schemaorg-current-http.rdf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
"""Console script for daops.""" | ||
|
||
__author__ = """Alan Iwi""" | ||
__contact__ = 'alan.iwi@stfc.ac.uk' | ||
__copyright__ = "Copyright 2023 United Kingdom Research and Innovation" | ||
__license__ = "BSD - see LICENSE file in top-level package directory" | ||
|
||
import os | ||
import sys | ||
import argparse | ||
import dateutil.parser | ||
import configparser | ||
|
||
from daops.ops.subset import subset | ||
from roocs_utils.utils.file_utils import FileMapper | ||
|
||
def parse_args(): | ||
|
||
parser = argparse.ArgumentParser() | ||
sub_parsers = parser.add_subparsers() | ||
sub_parsers.required = True | ||
|
||
parser_subset = sub_parsers.add_parser('subset', help='subset data') | ||
parser_subset.add_argument('--area', '-a', type=str, | ||
help=('area in format w,s,e,n. Hint: if w is negative, include an "=" sign ' | ||
'e.g. --area=-10,...')) | ||
parser_subset.add_argument('--time', '-t', type=str, metavar='time_window', | ||
help='time window e.g. 1999-01-01T00:00:00/2100-12-30T00:00:00') | ||
parser_subset.add_argument('--time-components', '-c', type=str, | ||
help="time components e.g. month:dec,jan,feb or 'year:1970,1980|month:01,02,03'") | ||
parser_subset.add_argument('--levels', '-l', type=str, | ||
help=('comma-separated list of levels (e.g. 500,1000,2000) ' | ||
'or slash-separated range (e.g. 50/2000 for 50 to 2000)')) | ||
parser_subset.add_argument('--output-format', '-f', type=str, metavar='format', | ||
choices=('netcdf', 'nc', 'zarr'), default='netcdf') | ||
parser_subset.add_argument('--file-namer', '-F', type=str, | ||
choices=('simple', 'standard'), default='standard') | ||
parser_subset.add_argument('--output-dir', '-d', type=str, metavar='output_directory', required=True) | ||
parser_subset.add_argument('collection', type=str, nargs='+', default=list) | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def get_params(args): | ||
|
||
collection = args.collection if len(args.collection) == 1 else FileMapper(args.collection) | ||
|
||
return {'collection': collection, | ||
'time': args.time, | ||
'time_components': args.time_components, | ||
'area': args.area, | ||
'level': args.levels, | ||
'output_type': args.output_format, | ||
'output_dir': args.output_dir, | ||
'file_namer': args.file_namer, | ||
'apply_fixes': False | ||
} | ||
|
||
|
||
def check_env(): | ||
""" | ||
Check that ROOCS_CONFIG points to a valid config file | ||
(although for certain types of invalid file, in fact main is never called, | ||
so exit might not always be graceful in these cases). | ||
Call this after get_params() so that 'help' still works even if this is not set. | ||
""" | ||
config_env_var = 'ROOCS_CONFIG' | ||
c = configparser.ConfigParser() | ||
try: | ||
ret = c.read(os.environ[config_env_var]) | ||
except (KeyError, configparser.Error): | ||
ret = None | ||
if not ret: | ||
print(f'Environment variable {config_env_var} must contain the path name of a config file in ini format') | ||
sys.exit(1) | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
params = get_params(args) | ||
check_env() | ||
ret = subset(**params) | ||
for uri in ret.file_uris: | ||
print(uri) | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) # pragma: no cover |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[project:cmip5] | ||
base_dir = DATA_DIR/test_data/badc/cmip5/data/cmip5 | ||
|
||
[project:cmip6] | ||
base_dir = DATA_DIR/test_data/badc/cmip6/data/CMIP6 | ||
|
||
[project:cordex] | ||
base_dir = DATA_DIR/test_data/badc/cordex/data/cordex | ||
|
||
[project:c3s-cmip5] | ||
base_dir = DATA_DIR/test_data/gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5 | ||
|
||
[project:c3s-cmip6] | ||
base_dir = DATA_DIR/test_data/badc/cmip6/data/CMIP6 | ||
|
||
[project:c3s-cordex] | ||
base_dir = DATA_DIR/test_data/gws/nopw/j04/cp4cds1_vol1/data/c3s-cordex |
Oops, something went wrong.