In [19]:
"""
Created on Tue Jan 26 2021
@author: Nicole Albern

Plot vertical profiles of cloud heating rates for each
cloud class.
"""

import matplotlib.pyplot as plt
#import matplotlib as mpl
import numpy as np
import xarray as xr

import shutil
import zarr

import sys
sys.path.append('/pf/b/b380490/jupyter_notebooks/nawdex_hackathon')
import dict_nawdexsims
simdict = dict_nawdexsims.simdictionary()
#colordict = dict_nawdexsims.colordictionary()

from nawdexutils import drop_first_day, select_analysis_days

#import dask
#from dask.distributed import Client
#client = Client()
#client


## remove first day from dataset
#def drop_first_day(ds):
#    ntime = ds.time.size                   # number of time steps
#    firstday = ds.isel(time=0).time.dt.day # first day 
#    t_list = []                            # list of timesteps that do not belong to first day
#    for i in range(ntime):
#        if ds.isel(time=i).time.dt.day != firstday:
#            t_list.append(i)
#    return ds.isel(time=t_list)


In [20]:
# Trying out this dask thing from Aiko's GitHub repository :p
from tempfile import NamedTemporaryFile, TemporaryDirectory # Creating temporary Files/Dirs
import dask # Distributed data libary
from dask_jobqueue import SLURMCluster # Setting up distributed memories via slurm
from distributed import Client, progress, wait # Libaray to orchestrate distributed resources

In [21]:
# Set some user specific variables
account_name = 'bb1018'
partition = 'compute'
job_name = 'cloud3d' # Job name that is submitted via sbatch
memory = '64GiB' # Max memory per node that is going to be used - this depends on the partition
cores = 48 # Max number of cores per that are reserved - also partition dependent
walltime = '01:00:00' #'12:00:00' # Walltime - also partition dependent

In [22]:
scratch_dir = '/scratch/b/b380873/' # Define the users scratch dir
# Create a temp directory where the output of distributed cluster will be written to, after this notebook
# is closed the temp directory will be closed
dask_scratch_dir = TemporaryDirectory(dir=scratch_dir, prefix=job_name)
cluster = SLURMCluster(memory=memory,
                       cores=cores,
                       project=account_name,
                       walltime=walltime,
                       queue=partition,
                       name=job_name,
                       processes=8,
                       scheduler_options={'dashboard_address': ':12435'},
                       local_directory=dask_scratch_dir.name,
                       job_extra=[f'-J {job_name}', 
                                  f'-D {dask_scratch_dir.name}',
                                  f'--begin=now',
                                  f'--output={dask_scratch_dir.name}/LOG_cluster.%j.o',
                                  f'--output={dask_scratch_dir.name}/LOG_cluster.%j.o'
                                 ],
                       interface='ib0')

Perhaps you already have a cluster running?
Hosting the HTTP server on port 45769 instead


In [23]:
print(cluster.job_script())

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -p compute
#SBATCH -A bb1018
#SBATCH -n 1
#SBATCH --cpus-per-task=48
#SBATCH --mem=64G
#SBATCH -t 01:00:00
#SBATCH -J cloud3d
#SBATCH -D /scratch/b/b380873/cloud3dwy_0h6ez
#SBATCH --begin=now
#SBATCH --output=/scratch/b/b380873/cloud3dwy_0h6ez/LOG_cluster.%j.o
#SBATCH --output=/scratch/b/b380873/cloud3dwy_0h6ez/LOG_cluster.%j.o

JOB_ID=${SLURM_JOB_ID%;*}

/pf/b/b380459/conda-envs/Nawdex-Hackathon/bin/python3 -m distributed.cli.dask_worker tcp://10.50.40.118:40969 --nthreads 6 --nprocs 8 --memory-limit 8.59GB --name name --nanny --death-timeout 60 --local-directory /scratch/b/b380873/cloud3dwy_0h6ez --interface ib0



In [24]:
cluster.scale(jobs=1)
cluster

VBox(children=(HTML(value='<h2>cloud3d</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n    .data…

In [25]:
dask_client = Client(cluster)
dask_client

0,1
Client  Scheduler: tcp://10.50.40.118:40969  Dashboard: http://10.50.40.118:45769/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [28]:
##############################################################################
#opath = '/scratch/b/b380490/hackathon/'
opath = '/work/bb1018/b380873/hackathon/'

ipath_grid = '/work/bb1018/icon_4_hackathon/'
ipath_oceanmask = '/work/bb1018/nawdex-hackathon_pp/'
#ipath_cloudheat = '/work/bb1018/nawdex-hackathon_pp/ddttemp_rad-from-fluxes/'
ipath_cloudmask = '/pf/b/b380796/scratch/hackathon/george/'
ipath_data = '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/'

resolutions = ['10km']#['80km', '40km', '20km']#, '10km', '5km']#, '2km']
#resolutions = ['2km', '2km-shcon']
gridres = ['R10000m']#['R80000m', 'R40000m', 'R20000m', 'R10000m', 'R5000m']#, 'R2500m']
#gridres = ['R2500m', 'R2500m']

# cloud classes
ncclass = 8 # number of cloud classes
cclasses = ['High (H)', 'Middle (M)', 'Low (L)',
            'H-M', 'M-L', 'H-L', 'H-M-L', 'clear sky']
cclass_save = ['H', 'M', 'L', 'H-M', 'M-L', 'H-L', 'H-M-L', 'clearsky']

for r, resolution in enumerate(resolutions):
    print(resolution)
    if resolution in ['80km', '40km', '20km']:
        sims = [ '0001', '0002', '0003', '0004', '0005',
                '0006', '0007', '0008', '0009', '0010']
    elif resolution == '2km-shcon':
        sims = ['0001-shcon', '0002-shcon', '0005-shcon',
                '0006-shcon', '0007-shcon', '0008-shcon',
                '0009-shcon', '0010-shcon', '0011-shcon',
                '0012-shcon']
        resolution = '2km'
    else:
        sims = ['0012']#['0001', '0002', '0003', '0004', '0005',
                #'0006', '0007', '0008', '0009', '0010', '0011', '0012']
        
    for sim in sims:
        print('  ', sim)
        expid = 'nawdexnwp-' + resolution + '-mis-' + sim

        ##########################################################################
        # read ocean mask
        da_ocean = xr.open_dataset(ipath_oceanmask + '/openoceanmask/' + expid + \
                                   '_openoceanmask.nc')['mask_openocean']
        index = np.where(da_ocean == 1)[0]
        del da_ocean

        ##########################################################################
        # read cell area
        da_cell_area = xr.open_dataset(ipath_grid + '/grids/icon-grid_nawdex_78w40e23n80n_' + \
                                       gridres[r] + '.nc')['cell_area'].rename({'cell': 'ncells'})

        # weight for area mean
        weights = da_cell_area / (da_cell_area).sum(dim=['ncells'])

        # apply open ocean mask
        weights = weights.isel(ncells=index)

        del da_cell_area

        ##########################################################################
        # read cloud cover
        #print('   read cloud cover')
        print('    read q')
        da_ccover = xr.open_mfdataset(ipath_data + expid + '/' +
                                      expid + '_2016*_3dcloud_DOM01_ML_*.nc',
                                      combine='by_coords',parallel=True,
                                      engine='h5netcdf', chunks={'time': 1})['tot_qv_dia']
        
        #da_ccover = xr.open_mfdataset(ipath_data + expid + '/' +
        #                              expid + '_2016*_3dcloud_DOM01_ML_*.nc',
        #                              combine='by_coords',parallel=True,
        #                              engine='h5netcdf', chunks={'time': 1})
        #print(da_ccover.shape)
        
        ## remove first day from dataset
        #da_ccover = drop_first_day(da_ccover)
        #print(da_ccover.shape)
        
        # get days that are available for all resolutions
        da_ccover = select_analysis_days(da_ccover, expid)
        
        # apply open ocean mask
        da_ccover = da_ccover.isel(ncells=index)
        
        #print(da_ccover.shape)
        
        #print(da_ccover.values.shape)
        #print(da_ccover.time.values[0:6])
        #print(da_ccover.time.values[-6:])
        
        ##########################################################################
        # read cloud classes
        print('   read cloud class')
        # read cloud class: 
        # cloud class stored every 30 minutes; cloud cover stored every hour
        # -> resample cloud class to get it every hour
        da_cclass = xr.open_dataset(ipath_cloudmask + 'nawdexnwp_' + resolution + \
                                    '_cloudclass_mis_' + sim + '_hq65_mq70_lq35.nc').rename({'clch': 'cclass'}).resample(time="1H").nearest(tolerance="5M")['cclass']
  
        # get days that are available for all resolutions
        da_cclass = select_analysis_days(da_cclass, expid)
        
        #print('cloud class shape and time')
        #print(da_cclass.values.shape)
        #print(da_cclass.time.values[0:6])
        #print(da_cclass.time.values[-6:])
        
        ##########################################################################
        # calculate time mean and area mean cloud cover for cloud classes
        for c, clas in enumerate(cclasses):
            print('   ', clas)

            # time mean
            da_mean = da_ccover.where(da_cclass == c+1).mean('time')
            
            # weighted area mean
            da_mean = (da_mean*weights).sum(dim='ncells')
            
            # store data in dataset            
            if c == 0:
                ds_meanall = da_mean.to_dataset(name='cclass' + str(c+1))
                #ds_meanall.name = 'cloud3d_cclass'
            else:
                ds_meanall = ds_meanall.update(da_mean.to_dataset(name='cclass' + str(c+1)))
                #ds_meanall.name = 'cloud3d_cclass'
            
            del da_mean
        del c, clas
        
        # save means at nc file
        print('      save data')
        ofile = expid + '_qv_hq65_mq70_lq35.nc'
        ds_meanall.to_netcdf(path=opath + ofile, format='NETCDF3_64BIT')#,engine='netcdf4')#, unlimited_dims='time')
        #ofile = expid + '_qc_hq60_mq60_lq25'
        #ds_meanall.to_zarr(store=opath + ofile)
        
        del ds_meanall, ofile
    
        ##########################################################################
        del index, weights
        del expid
        del da_cclass, da_ccover
    del sim

del resolution
print('Done!')
        

10km
   0012
    read q


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


   read cloud class
    High (H)
    Middle (M)
    Low (L)
    H-M
    M-L
    H-L
    H-M-L
    clear sky
      save data
Done!


In [None]:
'''############################################'''
'''          !!!!! OLD VERSION !!!!!           '''
'''store each cloud class in an individual file'''
'''############################################'''

##############################################################################
opath = '/scratch/b/b380490/hackathon/'

ipath_grid = '/work/bb1018/icon_4_hackathon/'
ipath_oceanmask = '/work/bb1018/nawdex-hackathon_pp/'
#ipath_cloudheat = '/work/bb1018/nawdex-hackathon_pp/ddttemp_rad-from-fluxes/'
ipath_cloudmask = '/pf/b/b380796/scratch/hackathon/george/'
ipath_data = '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/'

#resolutions = ['80km', '40km', '20km']#, '10km', '5km', '2km']
resolutions = ['10km', '5km']#, '2km']
#gridres = ['R80000m', 'R40000m', 'R20000m']
gridres = ['R10000m', 'R5000m']#, 'R2500m']


# cloud classes
ncclass = 8 # number of cloud classes
cclasses = ['High (H)', 'Middle (M)', 'Low (L)',
            'H-M', 'M-L', 'H-L', 'H-M-L', 'clear sky']
cclass_save = ['H', 'M', 'L', 'H-M', 'M-L', 'H-L', 'H-M-L', 'clearsky']

for r, resolution in enumerate(resolutions):
    print(resolution)
    if resolution in ['80km', '40km', '20km']:
        sims = ['0001', '0002', '0003', '0004', '0005',
                '0006', '0007', '0008', '0009', '0010']
    else:
        sims = ['0001', '0002', '0003', '0004', '0005',
                '0006', '0007', '0008', '0009', '0010', '0011', '0012']
        
    for sim in sims:
        print('  ', sim)
        expid = 'nawdexnwp-' + resolution + '-mis-' + sim

        ##########################################################################
        # read ocean mask
        da_ocean = xr.open_dataset(ipath_oceanmask + '/openoceanmask/' + expid + \
                                   '_openoceanmask.nc')['mask_openocean']
        index = np.where(da_ocean == 1)[0]
        del da_ocean

        ##########################################################################
        # read cell area
        da_cell_area = xr.open_dataset(ipath_grid + '/grids/icon-grid_nawdex_78w40e23n80n_' + \
                                       gridres[r] + '.nc')['cell_area'].rename({'cell': 'ncells'})

        # weight for area mean
        weights = da_cell_area / (da_cell_area).sum(dim=['ncells'])

        # apply open ocean mask
        weights = weights.isel(ncells=index)

        del da_cell_area

        ##########################################################################
        # read cloud cover
        print('   read cloud cover')
        da_ccover = xr.open_mfdataset(ipath_data + expid + '/' +
                                      expid + '_2016*_3dcloud_DOM01_ML_*.nc',
                                      combine='by_coords',parallel=True,
                                      engine='h5netcdf', chunks={'time': 1})['clc']
        #print(da_ccover.shape)
        
        # remove first day from dataset
        da_ccover = drop_first_day(da_ccover)
        #print(da_ccover.shape)
        
        # apply open ocean mask
        da_ccover = da_ccover.isel(ncells=index)
        #print(da_ccover.shape)
        
        #print(da_ccover.values.shape)
        #print(da_ccover.time.values[0:6])
        #print(da_ccover.time.values[-6:])
        
        ##########################################################################
        # read cloud classes
        print('   read cloud class')
        ds_cc = xr.open_dataset(ipath_cloudmask + 'nawdexnwp_' + resolution + \
                                '_cloudclass_mis_' + sim + '.nc').rename({'clch': 'cclass'})
                            # old data:
                                #'cloud_class_array_thres10p_' + \
                                #resolution + '_alltimesteps_v3.nc').rename({'clch': 'cclass'})

        # cloud class stored every 30 minutes. heating rates stored every hour
        # store cloud class also at every hour
        da_cclass = ds_cc.cclass[::2]
        del ds_cc

        #print('cloud class shape and time')
        #print(da_cclass.values.shape)
        #print(da_cclass.time.values[0:6])
        #print(da_cclass.time.values[-6:])
        
        ##########################################################################
        # calculate time mean and area mean cloud-radiative heating rates for
        # cloud classes
        #means = {}
        #means.fromkeys(cclasses)
        for c, clas in enumerate(cclasses):
            print('   ', clas)

            # time mean
            da_mean = da_ccover.where(da_cclass == c+1).mean('time')
            
            # weighted area mean
            da_mean = (da_mean*weights).sum(dim='ncells')
            
            #print(da_mean.values.min(), da_mean.values.max())
            #print(da_mean.values)
            
            # save means as nc file
            print('      save data')
            ofile = expid + '_' + cclass_save[c] + \
                    '_cloudcover_timemean_areamean_oceanmask_applied.nc'
            da_mean.to_netcdf(path=opath + ofile, format='NETCDF3_64BIT')#, unlimited_dims='time')
            
            del da_mean, ofile
        del c, clas
        ##########################################################################
        del index, weights
        del expid
        del da_cclass, da_ccover
    del sim

del resolution
print('Done!')