In [1]:
from getpass import getuser # Libaray to copy things
from pathlib import Path # Object oriented libary to deal with paths
import os
from tempfile import NamedTemporaryFile, TemporaryDirectory # Creating temporary Files/Dirs
from subprocess import run, PIPE
import sys
from dask.utils import format_bytes
import dask # Distributed data libary
from dask_jobqueue import SLURMCluster # Setting up distributed memories via slurm
from distributed import Client, progress, wait # Libaray to orchestrate distributed resources
import xarray as xr
import numpy as np

In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
# # Set some user specific variables
# scratch_dir = Path('/scratch') / getuser()[0] / getuser() # Define the users scratch dir
# # Create a temp directory where the output of distributed cluster will be written to, after this notebook
# # is closed the temp directory will be closed
# dask_tmp_dir = TemporaryDirectory(dir=scratch_dir, prefix='threshold')
# cluster = SLURMCluster(memory='500GiB',
#                        cores=72,
#                        project='mh0731',
#                        walltime='1:00:00',
#                        queue='gpu',
#                        name='threshold',
#                        scheduler_options={'dashboard_address': ':12435'},
#                        local_directory=dask_tmp_dir.name,
#                        job_extra=[f'-J threshold', 
#                                   f'-D {dask_tmp_dir.name}',
#                                   f'--begin=now',
#                                   f'--output={dask_tmp_dir.name}/LOG_cluster.%j.o',
#                                   f'--output={dask_tmp_dir.name}/LOG_cluster.%j.o'
#                                  ],
#                        interface='ib0')
# cluster.scale(jobs=1) # requests whole nodesy
# dask_client = Client(cluster)
# # dask_client.wait_for_workers(18) # gpu-partition has 9 workers per node

In [4]:
data_path = Path('/work/mh0731/m300414/DyWinter_b9/Tropics_20to20/Daily')
glob_pattern_2d = 'pr_*[0-9].nc'
 
# Collect all file names with pathlib's rglob and list compressions 
file_names = sorted([str(f) for f in data_path.rglob(f'{glob_pattern_2d}')])[10:] #[1:]
# dset = xr.open_mfdataset(file_names)
# var_names = ['pr']
# da = dset[var_names] #.persist()
# da

Radar data is in mm/hour and model data in kg/m2s. Conversion factor to get from mm/hour to kg/m2s is 1/3600.

In [5]:
rain_threshold = 3/3600

In [15]:
# opening all consecutive data does not work well later with the numpy-arrays
# ds = xr.open_mfdataset(file_names, combine='by_coords', parallel=True) # doesnt want to work, maybe too much although in dask
# ds
conv_rain = []
conv_percentage = []
for file in file_names:
    rain    = xr.open_dataset(file)['pr']
    rain_np = np.asarray(rain)
    n_rainy_cells = (rain_np > 0.).sum()
    conv_mask     =  rain_np > rain_threshold
    n_conv_cells  = conv_mask.sum()
    conv_rain.append( rain_np[conv_mask] )
    conv_percentage.append( conv_rain[-1].sum() / n_rainy_cells )

In [22]:
rain_conv = np.concatenate(conv_rain)

In [23]:
print(f'On average {np.asarray(conv_percentage).mean()*100}% of rainy cells are of convective nature, given a rain threshold of {rain_threshold*3600} mm/hour.')

On average 0.0041698179858736% of rainy cells are of convective nature, given a rain threshold of 3.0 mm/hour.


In [27]:
xr.DataArray(rain_conv).to_netcdf('/work/mh0731/m300414/DyWinter_b9/Tropics_20to20/conv_rain_3mmhour.nc')