In [1]:
from dask.distributed import Client
import multiprocessing

ncpu = multiprocessing.cpu_count()
threads = 8
nworker = ncpu // threads
print(
    f"Number of CPUs: {ncpu}, number of threads: {threads}, number of workers: {nworker}"
)

Number of CPUs: 48, number of threads: 8, number of workers: 6


In [2]:
client = Client(
    processes=True, threads_per_worker=threads, n_workers=nworker, memory_limit="256GB"
)
client

0,1
Client  Scheduler: tcp://127.0.0.1:35099  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 6  Cores: 48  Memory: 133.88 GB


In [3]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import metcalc
import scipy.stats as stat
import metpy
import pandas as pd

In [114]:
SST_mask = xr.open_dataset("../data/sea_mask_icon.nc")

In [115]:
def calc_PRSST(ds,lat_bound) :
   
    ds['SST'] = ds.ts * SST_mask.sea
    ds_tropics = ds.sel(lat=slice(lat_bound,-lat_bound))
    
    PRSST = metcalc.xr_fldmean(ds_tropics.pr * ds_tropics.SST) / metcalc.xr_fldmean( ds_tropics.pr * SST_mask.sea )
    
    return PRSST.to_dataset(name='PRSST')
    

# Read files

In [42]:
# we can preprocess the data with the preproc function
# especially important is the assignment of a newcoordinate that 
# describes which ensemble member is meant
# this is read by the characters in the filename, so might be different on you machine and directory structure!

def preproc(ds):
    filename = ds.encoding['source']
    member=filename[36:43]
    #member is always at the same place in the filename, as long as the data is not moved
    ds = ds.squeeze(drop='true')
    ds['member'] = member
    return ds



data_ts=xr.open_mfdataset('../data/icon-param-exp/ppk00??_ts_remap63_1979-2014_mm.nc'
                       ,concat_dim='member', combine='nested', parallel=True, preprocess=preproc)


data_pr=xr.open_mfdataset('../data/icon-param-exp/ppk00??_pr_remap63_1979-2014_mm.nc'
                       ,concat_dim='member', combine='nested', parallel=True, preprocess=preproc)




In [43]:
data_pr

Unnamed: 0,Array,Chunk
Bytes,1.72 GB,31.92 MB
Shape,"(54, 433, 96, 192)","(1, 433, 96, 192)"
Count,216 Tasks,54 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.72 GB 31.92 MB Shape (54, 433, 96, 192) (1, 433, 96, 192) Count 216 Tasks 54 Chunks Type float32 numpy.ndarray",54  1  192  96  433,

Unnamed: 0,Array,Chunk
Bytes,1.72 GB,31.92 MB
Shape,"(54, 433, 96, 192)","(1, 433, 96, 192)"
Count,216 Tasks,54 Chunks
Type,float32,numpy.ndarray


# Process

In [44]:
def correct_time(ds) :
    '''time stamps in ICON are wrong, we need to move the months one back.'''
    ds = ds.sel(time=slice(19790201,20150101))
    ds['time'] = pd.date_range(start='1/1979', end='01/2015', freq='M')
    return ds

In [45]:
data = data_pr
data['ts'] =data_ts.ts
data = correct_time(data)

#data = data.resample(time='Y').mean()
#data['time'] = np.arange(1979.,2015.)

# Calculate PRSST

In [117]:
PRSST = calc_PRSST(data,20)

In [120]:
%time PRSST = PRSST.compute()

CPU times: user 2.03 s, sys: 53 ms, total: 2.08 s
Wall time: 3.48 s


In [122]:
PRSST

In [49]:
!mkdir -p ..data/icon-param-exp
PRSST.to_netcdf('../data/icon-param-exp/PRSST_20_all.nc')