## Domain mean of different datasets for NAWDEX simulations

In [None]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [None]:
from dask.distributed import Client
client = Client()
client

In [None]:
# importing utility dictionaries
import sys
sys.path.append('/pf/b/b381185/behrooz/Hackathon_b/nawdex-hackathon/shared')

# simulations dictionary
import dict_nawdexsims
simdict = dict_nawdexsims.simdictionary()

## Cloud radiative heating rates

In [None]:
# Radiative Heating rates + cloud. Function for Loading, masking, domain_mean, and saving to nc file 

def load_iconnwp_hr_data():
    
    ds_list = []
    
    # looping through simulations
    for sim in list(simdict.keys()):
        
        print('Working on loading data for', sim)
        path = '/work/bb1018/nawdex-hackathon_pp/ddttemp_rad-from-fluxes/'
        
        #print(path+'/'+sim+'/'+sim+'_ddttemp_rad-from-fluxes_DOM01_ML.zarr')
        # loading radiative heating rate datasets
        ds = xr.open_zarr(path+sim+'_ddttemp_rad-from-fluxes_DOM01_ML.zarr')#,chunks={'time': 1})
        
        # Loading open_ocean mask datasets
        fname = '/work/bb1018/nawdex-hackathon_pp/openoceanmask/'+sim+'_openoceanmask.nc'
        ds_om = xr.open_dataset(fname)['mask_openocean']
        index = np.where(ds_om==1)[0]
        # Ocean masking
        ds = ds.isel(ncells=index)
        
        # loading related grid files for domain mean calculations   
        if simdict[sim]['res'] == '80km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[0]+'.nc'
        elif simdict[sim]['res'] == '40km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[1]+'.nc'
        elif simdict[sim]['res'] == '20km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[2]+'.nc'
        elif simdict[sim]['res'] == '10km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[3]+'.nc'
        elif simdict[sim]['res'] == '5km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[4]+'.nc'
        elif simdict[sim]['res'] == '2km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[5]+'.nc'
        
        dg = ( xr.open_dataset(fname1)[['cell_area']].rename({'cell': 'ncells'}))
        # open_ocean mascking for grid files 
        dg = dg.isel(ncells=index)
            
        # Domain mean
        weights=dg['cell_area']/(dg['cell_area']).sum(dim=['ncells'])
        ds = (ds*weights).sum(dim=['ncells'])
        #########
        # Saving to nc files    
        ds.attrs['simulation'] = sim
        
        ds.to_netcdf('/work/bb1018/nawdex-hackathon_pp/radiative_heating_domain_mean/atmradheating_fldmean_openocean_'+sim+'.nc')

    return ds_list

#---------------------------------------
ds_icon_list_hr = load_iconnwp_hr_data()

## Other datasets

In [None]:
# Variables. Loading, masking, domain_mean (looping through time steps), and saving to nc file 

# Settings for running this function: Compute 64GB, 24 cores, 1 node, whit dask configuration
# Multiple files, time step looping

def load_iconnwp_data():
    
    ds_list = []
    
    # looping through simulations
    for sim in list(simdict.keys()):
        
        print('Working on loading data for', sim)
        path = '/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/'
        
        # Create a list to extract time steps from datasets
        tstep_list = []
        
        for i in Path('/work/bb1018/b380459/NAWDEX/ICON_OUTPUT_NWP/'+sim).rglob(sim+'_2016*_2d_30min_DOM01_ML_*.nc'): 
            tstep_list.append(str(i).split('/')[-1].split('_')[6].split('.')[0]) # 1 hourly -> split('_')[5] 
            tstep_list.sort(key=int)
        
        # loading open_ocean masking datasets
        fname = '/work/bb1018/nawdex-hackathon_pp/openoceanmask/'+sim+'_openoceanmask.nc' #(.split('-shcon')[-2])change only for shcon sims
        ds_om = xr.open_dataset(fname)['mask_openocean']
        index = np.where(ds_om==1)[0]
        
        # loading related grid files for domain mean calculations  
        
        if simdict[sim]['res'] == '80km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[0]+'.nc'
        elif simdict[sim]['res'] == '40km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[1]+'.nc'
        elif simdict[sim]['res'] == '20km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[2]+'.nc'
        elif simdict[sim]['res'] == '10km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[3]+'.nc'
        elif simdict[sim]['res'] == '5km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[4]+'.nc'
        elif simdict[sim]['res'] == '2km':
            fname1 = '/work/bb1018/icon_4_hackathon/grids/icon-grid_nawdex_78w40e23n80n_'+grids[5]+'.nc'
        
            
        dg = ( xr.open_dataset(fname1)[['cell_area']].rename({'cell': 'ncells'}))
            
        # open_ocean mascking grid files
        dg = dg.isel(ncells=index)
        
        # Create a list to concat datasets with time dimension
        ds_t=[]
        # looping through time step for loading datasets
        for tstep in tstep_list:
            
            # choose datasets
            
            #ds = xr.open_mfdataset(path+'/'+sim+'/'+sim+'_2016*_3dcloud_DOM01_ML_'+str(tstep).zfill(4)+'.nc',
            #                    combine='by_coords', parallel=True, 
            #                    engine='h5netcdf', chunks={'ncells': 1e6} )[['clc']] #'time': 1 #change datasets
            
            #------------------------------------------------------------------------------------
            #ds = xr.open_mfdataset(path+'/'+sim+'/'+sim+'_2016*_2drad_30min_DOM01_ML_'+str(tstep).zfill(4)+'.nc',
            #                    combine='by_coords', parallel=True, 
            #                    engine='h5netcdf', chunks={'ncells': 1e6})
            
            # derive net shortwave fluxes at TOA
            #ds['sob_t'] = (ds['sod_t']-ds['sou_t'])
        
            # derive TOA CRE fluxes
            #ds['swtoacre'] = ds['sob_t'] - ds['swtoaclr']
            #ds['lwtoacre'] = ds['thb_t'] - ds['lwtoaclr']
            #ds['nttoacre'] = ds['lwtoacre'] + ds['swtoacre']
        
            # derive Surface CRE fluxes
            #ds['swsfccre'] = ds['sob_s'] - ds['swsfcclr']
            #ds['lwsfccre'] = ds['thb_s'] - ds['lwsfcclr']
            #ds['ntsfccre'] = ds['swsfccre'] + ds['lwsfccre']
        
            # derive atmospheric CRE fluxes
            #ds['swatmcre'] = ds['swtoacre'] - ds['swsfccre']
            #ds['lwatmcre'] = ds['lwtoacre'] - ds['lwsfccre']
            #ds['ntatmcre'] = ds['nttoacre'] - ds['ntsfccre']
            
            #--------------------------------------------------------------------------------
            
            #ds = xr.open_mfdataset(path+'/'+sim+'/'+sim+'_2016*_ddttemp_DOM01_ML_'+str(tstep).zfill(4)+'.nc',
            #                    combine='by_coords', parallel=True, 
            #                    engine='h5netcdf', chunks={'ncells': 1e6}) [['ddt_temp_dyn2','ddt_temp_radlw','ddt_temp_radsw','ddt_temp_totnwpphy','ddt_temp_mphy','ddt_temp_turb']]
            
            #--------------------------------------------------------------------------------
            #ds = xr.open_mfdataset(path+'/'+sim+'/'+sim+'_2016*_2d_30min_DOM01_ML_'+str(tstep).zfill(4)+'.nc',
            #                    combine='by_coords', parallel=True, 
            #                    engine='h5netcdf', chunks={'ncells': 1e6} )[['tqc_dia','tqi_dia','clch','clcm','clcl','clct','tqc','tqi']]
        
            ds = xr.open_mfdataset(path+'/'+sim+'/'+sim+'_2016*_2d_30min_DOM01_ML_'+str(tstep).zfill(4)+'.nc',
                                combine='by_coords', parallel=True, 
                                engine='h5netcdf', chunks={'ncells': 1e6} )[['snow_con_rate','rain_con_rate','snow_gsp_rate','rain_gsp_rate',
                                                                             'graupel_gsp_rate','tot_prec']]
            #--------------------------------------------------------------------------------
            
            # open_ocean masking dataset
            ds = ds.isel(ncells=index)
            
            # Domain mean
            weights=dg['cell_area']/(dg['cell_area']).sum(dim=['ncells'])
            ds = (ds*weights).sum(dim=['ncells']).compute()

            # Cancating time steps into 1 dataset for each simulation
            ds_t.append(ds)
            ds_c = xr.concat(ds_t, dim="time")
            
        # Saving to nc files
        ds_c.attrs['simulation'] = sim
    
        # change the path based on datasets
        ds_c.to_netcdf('/work/bb1018/nawdex-hackathon_pp/2d_30min_domain_mean/rain_snow/2d_30min_'+sim+'.nc')
        
        del ds, ds_om, index, dg, ds_t ,tstep_list, ds_c
            
    return ds_list
#-----------------------------------
ds_icon_list = load_iconnwp_data()