In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob
import os 

In [2]:
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    job_name="Climt1",          # --job-name
    cores=12,                     # Number of cores per task (adjust if needed)
    processes=6,                 # One process per task
    memory="100GB",               # --mem
    walltime="01:15:00",         # --time
    queue="med",               # --partition
    log_directory=".",           # Logs will be saved to the current directory
)



Perhaps you already have a cluster running?
Hosting the HTTP server on port 43149 instead


In [3]:
from dask.distributed import Client


cluster.scale(jobs=10)
client = Client(cluster)


In [4]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://10.42.239.61:43149/status,

0,1
Dashboard: http://10.42.239.61:43149/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.42.239.61:37413,Workers: 0
Dashboard: http://10.42.239.61:43149/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [21]:
infol = '/home1/nalex2023/Datasets/era5_manus/'

files = glob.glob(infol+'*.nc')

dft_era = pd.DataFrame(files, columns=['file'])


dft_era['datetime'] = dft_era['file'].str.split(os.sep).str[-1].str.split('_').str[2].str.split('m').str[0]


dft_era['datetime'] = pd.to_datetime(dft_era['datetime'], format='%Y%m')

dft_era['month'] = dft_era['datetime'].dt.month

# seasonal DJF , MAM, JJA, SON
seasonal = dft_era[dft_era['month'].isin([9,10,11])]

seasonal


Unnamed: 0,file,datetime,month
0,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2003-10-01,10
5,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2011-11-01,11
7,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2010-11-01,11
9,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2002-10-01,10
14,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2012-11-01,11
17,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2001-10-01,10
21,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2004-10-01,10
24,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2005-10-01,10
30,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2007-10-01,10
32,/home1/nalex2023/Datasets/era5_manus/era5_plev...,2006-10-01,10


In [22]:
def prepro(ds):
    ds_sub = ds[['u','v']].sel(pressure_level=slice(1000,800),latitude=slice(0,-5),longitude=slice(141,154))
    return ds_sub


dset_seasonal = xr.open_mfdataset(seasonal['file'], combine='by_coords',chunks='auto', preprocess=prepro,
                                  parallel=True)




In [23]:
dset_seasonal

Unnamed: 0,Array,Chunk
Bytes,420.75 kiB,11.62 kiB
Shape,"(26928,)","(744,)"
Dask graph,37 chunks in 112 graph layers,37 chunks in 112 graph layers
Data type,,
"Array Chunk Bytes 420.75 kiB 11.62 kiB Shape (26928,) (744,) Dask graph 37 chunks in 112 graph layers Data type",26928  1,

Unnamed: 0,Array,Chunk
Bytes,420.75 kiB,11.62 kiB
Shape,"(26928,)","(744,)"
Dask graph,37 chunks in 112 graph layers,37 chunks in 112 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,265.33 MiB,2.85 MiB
Shape,"(26928, 3, 21, 41)","(480, 2, 19, 41)"
Dask graph,344 chunks in 137 graph layers,344 chunks in 137 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 265.33 MiB 2.85 MiB Shape (26928, 3, 21, 41) (480, 2, 19, 41) Dask graph 344 chunks in 137 graph layers Data type float32 numpy.ndarray",26928  1  41  21  3,

Unnamed: 0,Array,Chunk
Bytes,265.33 MiB,2.85 MiB
Shape,"(26928, 3, 21, 41)","(480, 2, 19, 41)"
Dask graph,344 chunks in 137 graph layers,344 chunks in 137 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,265.33 MiB,2.85 MiB
Shape,"(26928, 3, 21, 41)","(480, 2, 19, 41)"
Dask graph,344 chunks in 137 graph layers,344 chunks in 137 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 265.33 MiB 2.85 MiB Shape (26928, 3, 21, 41) (480, 2, 19, 41) Dask graph 344 chunks in 137 graph layers Data type float32 numpy.ndarray",26928  1  41  21  3,

Unnamed: 0,Array,Chunk
Bytes,265.33 MiB,2.85 MiB
Shape,"(26928, 3, 21, 41)","(480, 2, 19, 41)"
Dask graph,344 chunks in 137 graph layers,344 chunks in 137 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [24]:
dset_seasonal_diurnal = dset_seasonal.groupby('valid_time.hour').mean()


In [25]:
out_dir = '/home1/nalex2023/Datasets/era5_manus/climt/'
dset_seasonal_diurnal.to_netcdf(out_dir+'era5_diurnal_SON.nc')


In [10]:
dset_seasonal_diurnal.close()