In [1]:
# salloc --partition=shared --time=00:30:00 -A mh0731
# cdo gendis,griddes_tropics.txt -setgrid,icon_grid_0015_R02B09_G.nc pr_20200120.nc weight_file_tropics.nc
# cdo remap,griddes_tropics.txt,weight_file_tropics.nc -setgrid,icon_grid_0015_R02B09_G.nc pr_20200120.nc pr_20200120_reggrid.nc

In [2]:
from getpass import getuser # Libary to copy things
from pathlib import Path    # Object oriented libary to deal with paths
import os
from tempfile import NamedTemporaryFile, TemporaryDirectory # Creating temporary Files/Dirs
from subprocess import run, PIPE
import sys
 
import dask # Distributed data libary
from dask_jobqueue import SLURMCluster # Setting up distributed memories via slurm
from distributed import Client, progress, wait # Libaray to orchestrate distributed resources
import xarray as xr # Libary to work with labeled n-dimensional data and dask

import warnings
warnings.filterwarnings(action='ignore')

In [3]:
# Set some user specific variables
scratch_dir = Path('/scratch') / getuser()[0] / getuser() # Define the users scratch dir

# Create a temp directory where the output of distributed cluster will be written to, after this notebook
# is closed the temp directory will be closed
dask_tmp_dir = TemporaryDirectory(dir=scratch_dir, prefix='reggrid_')

cluster = SLURMCluster(memory='512GiB',# '256GiB',
                       cores=72,# 48,
                       project='mh0731',
                       walltime='0:45:00',
                       queue='gpu',
                       name='reggrid',
                       scheduler_options={'dashboard_address': ':12435'},
                       local_directory=dask_tmp_dir.name,
                       job_extra=[f'-J rggrd', 
                                  f'-D {dask_tmp_dir.name}',
                                  f'--begin=now',
                                  f'--output={dask_tmp_dir.name}/LOG_cluster.%j.o',
                                  f'--output={dask_tmp_dir.name}/LOG_cluster.%j.o'
                                 ],
                       interface='ib0')

cluster.scale(jobs=1)
dask_client = Client(cluster)
dask_client.wait_for_workers(9)#(6)


In [4]:
@dask.delayed
def run_cmd(cmd, path_extra=Path(sys.exec_prefix)/'bin'):
    '''Run a bash command.'''
    env_extra = os.environ.copy()
    env_extra['PATH'] = str(path_extra) + ':' + env_extra['PATH']
    
    status = run(cmd, check=False, stderr=PIPE, stdout=PIPE, env=env_extra)
    
    if status.returncode != 0:
        error = f'''{' '.join(cmd)}: {status.stderr.decode('utf-8')}'''
        raise RuntimeError(f'{error}')
    return status.stdout.decode('utf-8')

In [10]:
data_path = Path('/work/mh0731/m300414/DyWinter_b10/Tropics_fromGrib/')
glob_pattern = 'omega500_'
# if dont take first element(s), theres a subdir with more matching files, we dont want that
data_files = sorted([str(f) for f in data_path.rglob(f'*{glob_pattern}*.nc')])

work_dir = Path('/work/mh0731/m300414/')
gridfile = Path('/pool/data/ICON/grids/public/mpim/0017/icon_grid_0017_R02B10_G.nc')
grid_description = work_dir / 'DyWinter_b10' /'Cartesian_Grid' / 'griddes_tropics.txt'
weight_file      = work_dir / 'DyWinter_b10' /'Cartesian_Grid' / 'weight_file_tropics.nc'

In [16]:
run_futures = []
for infile in data_files[28:]: # 7 increment?
    
    print(infile[-24:-11])
    date = infile[-24:-11]
    
    outfile = work_dir / 'DyWinter_b10' / 'Cartesian_Grid' / f'omega500_{date}_reggrid.nc'
    command = ('cdo', '-P', '8', f'remap,{grid_description},{weight_file}', f'-setgrid,{gridfile}', f'{infile}', f'{outfile}')
    
    run_futures.append(run_cmd(command))

20200217T0000
20200218T0000
20200219T0000
20200220T0000
20200221T0000
20200222T0000
20200223T0000
20200224T0000
20200225T0000
20200226T0000
20200227T0000
20200228T0000
20200229T0000
20200301T0000


In [17]:
run_jobs = dask.persist(run_futures)
progress(run_jobs, notebook=False)

[########################################] | 100% Completed |  4min 40.5s

In [18]:
print('done.')

done.
