In [1]:
from getpass import getuser # Libaray to copy things
from pathlib import Path # Object oriented libary to deal with paths
import os
from tempfile import NamedTemporaryFile, TemporaryDirectory # Creating temporary Files/Dirs
from subprocess import run, PIPE
import sys
 
import dask # Distributed data libary
from dask_jobqueue import SLURMCluster # Setting up distributed memories via slurm
from distributed import Client, progress, wait # Libaray to orchestrate distributed resources

import xarray as xr # Libary to work with labeled n-dimensional data and dask
import numpy as np
import skimage.util as sutil
import matplotlib.pyplot as plt

# sys.path.insert(0, os.path.abspath('/home/mpim/m300414/phd/'))
from org_metrics import Pairs, gen_regionprops_objects_all, gen_shapely_objects_all, gen_tuplelist
from org_metrics import radar_organisation_metric, avg_area, lower_rom_limit

In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
# Set some user specific variables
scratch_dir = Path('/scratch') / getuser()[0] / getuser() # Define the users scratch dir

# Create a temp directory where the output of distributed cluster will be written to, after this notebook
# is closed the temp directory will be closed
dask_tmp_dir = TemporaryDirectory(dir=scratch_dir, prefix='rome_')
cluster = SLURMCluster(memory='500GiB',
                       cores=72,
                       project='mh0731',
                       walltime='01:20:00',
                       queue='gpu',
                       name='rome',
                       scheduler_options={'dashboard_address': ':12435'},
                       local_directory='/home/mpim/m300414/phd/Notebooks/',
                       job_extra=[f'-J rome', 
                                  f'-D /home/mpim/m300414/phd/Notebooks/',
                                  f'--begin=now',
                                  f'--output={dask_tmp_dir.name}/LOG_cluster.%j.o',
                                  f'--output={dask_tmp_dir.name}/LOG_cluster.%j.o'
                                 ],
                       interface='ib0')

cluster.scale(jobs=2) # requests whole nodes
dask_client = Client(cluster)
dask_client.wait_for_workers(18) # gpu-partition has 9 workers per node

In [12]:
data_path = Path('/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/')
glob_pattern_2d = 'rh500_*.nc'
 
# Collect all file names with pathlib's rglob and list compressions 
file_names = sorted([str(f) for f in data_path.rglob(f'{glob_pattern_2d}')])[11:]
file_names

['/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200131T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200201T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200202T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200203T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200204T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200205T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200206T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200207T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200208T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200209T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200210T0000_reggrid.nc',
 '/work/mh0731/m300414/DyWinter_b10/Cartesian_Grid/rh500_20200211T0000_reggr

In [13]:
@dask.delayed
def slide_domain_over_tropics(var_large_field):
    
#     domain_size = (117, 117)
#     assert domain_size[0]     == domain_size[1] # domain is quadratic
#     assert domain_size[0] % 2 == 1              # number of pixels is not even
#     stride_between_domains = domain_size[0] // 2 + 1
    
#     radar_domains = sutil.view_as_windows(
#         np.array(var_large_field),
#         window_shape=domain_size,
#         step=stride_between_domains
#     )
    
#     # define the array (map) to contain calculated values
#     map_shape = radar_domains.shape[:2]
#     mid_point = domain_size[0] // 2
#     latitude  = var_large_field['lat'][mid_point :: stride_between_domains][:map_shape[0]]
#     longitude = var_large_field['lon'][mid_point :: stride_between_domains][:map_shape[1]]
#     metric_map = xr.DataArray(
#         np.zeros(shape=map_shape),
#         coords={'lat': latitude, 'lon': longitude},
#         dims=('lat', 'lon')
#     )

    domain_size = (117, 117)
    half_size = domain_size[0] // 2
    assert domain_size[0]     == domain_size[1] # domain is quadratic
    assert domain_size[0] % 2 == 1              # number of pixels is not even
    stride_between_domains = half_size + 1
    
    # add half the domain on both right-left sides, but one extra on left side
    halo_field = xr.DataArray(np.zeros(shape=(var_large_field.shape[0],
                                              var_large_field.shape[1] + (2 * half_size) + 1)))
    # Halo swap
    halo_field[:,           :(half_size + 1)] = var_large_field[:, -(half_size + 1):          ]
    halo_field[:, -half_size:               ] = var_large_field[:,                 :half_size ]
    # Inner part
    halo_field[:, (half_size + 1):-half_size] = var_large_field[:, :]
    
    radar_domains = sutil.view_as_windows(
        np.array(halo_field),
        window_shape=domain_size,
        step=stride_between_domains
    )

    map_shape = radar_domains.shape[:2]
    latitude  = var_large_field['lat'][half_size :: stride_between_domains][:map_shape[0]]
    longitude = var_large_field['lon'][0         :: stride_between_domains][:map_shape[1]]
    
    metric_map = xr.DataArray(
        np.zeros(shape=map_shape),
        coords={'lat': latitude, 'lon': longitude},
        dims=('lat', 'lon')
    )
    
    for i in range(map_shape[0]):
        for j in range(map_shape[1]):
            metric_map[i, j] = radar_domains[i, j, :, :].mean()
                    
    return metric_map

In [14]:
var_to_process = xr.open_mfdataset(file_names)['r'].squeeze()

In [15]:
# parallelisation on time level
map_singletime = []
for t in var_to_process.time:
    map_singletime.append( slide_domain_over_tropics(var_to_process.sel(time=str(t.values)) ))

In [16]:
jobs = dask.persist(map_singletime)
progress(jobs, notebook=False)

[########################################] | 100% Completed | 35min 39.3s

In [17]:
result = xr.concat(dask.compute(*map_singletime), dim=var_to_process.time)

In [18]:
result.name = 'rh_avg'
result.attrs['units'] = '%'
result.attrs['long_name'] = 'Avg. RH at 500hPa across (117*2.5)x(117*2.5) km domain.'

In [19]:
result.to_netcdf('/work/mh0731/m300414/DyWinter_b10/RadarDomain_Grid/rh500.nc')