In [None]:
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    job_name="Climt1",          # --job-name
    cores=2,                     # Number of cores per task (adjust if needed)
    processes=5,                 # One process per task
    memory="100GB",               # --mem
    walltime="00:15:00",         # --time
    queue="med",               # --partition
    log_directory=".",           # Logs will be saved to the current directory
)


In [2]:
from dask.distributed import Client

cluster.scale(jobs=10)
client = Client(cluster)

In [None]:
client

In [None]:
# test the cluster
import dask.array as da

x = da.random.random((10000, 10000), chunks=(1000, 1000))

(x + x.T).mean().compute()




In [5]:
import xarray as xr
import numpy as np
import glob
import pandas as pd
import os

In [44]:
def create_dft(in_dir,arm_data=True):
    files = glob.glob(in_dir+os.sep+'*.nc')
    if len(files) == 0:
        files = glob.glob(in_dir+os.sep+'*.cdf')
        if len(files) == 0:
            raise Exception('No files Found')
        else:
            pass
    else:
        pass
    
    dft = pd.DataFrame(files,columns=['filepath'])
    dft['filename'] = dft['filepath'].str.split(os.sep).str[-1]
    
    if arm_data:
        dft['datetime'] = pd.to_datetime(dft['filename'].str.split(
            '.',
            expand=True).iloc[:, 2] + dft['filename'].str.split(
                '.',  expand=True).iloc[:, 3],format='%Y%m%d%H%M%S')
    return dft


In [None]:
in_era = '/home1/nalex2023/Datasets/era5_manus/'

dft = create_dft(in_era,arm_data=False)

dft['datetime'] = dft['filename'].str.split('_').str[-1].str[:6]

dft['datetime'] = pd.to_datetime(dft['datetime'],format='%Y%m')

dft_sort = dft.sort_values('datetime')

dft_sort

In [46]:

"""
def prepro(ds):
    ds = ds.sel(pressure_level=slice(1000,800))[['u','v']]
    return ds
"""
def prepro(ds):
    ds = ds.sel(pressure_level=1000)['u']
    return ds


dset = xr.open_mfdataset(dft_sort['filepath'],preprocess=prepro,combine='by_coords',parallel=True,chunks='auto')

#hourly_summary = dset[['u','v']].groupby('valid_time.hour').mean()

hourly_summary = dset['u'].groupby('valid_time.hour').mean()

#seasonal = dset[['u','v']].groupby('valid_time.season').apply(lambda x: x.groupby('valid_time.hour').mean())



In [None]:
hourly_summary
#seasonal

In [10]:
hourly_summary['speed'] = np.sqrt((hourly_summary['u'] * hourly_summary['u']) + (hourly_summary['v'] * hourly_summary['v']))



In [None]:
final_data_loc = hourly_summary.compute()



#final_data_loc = seasonal.compute()

In [24]:
out_dir = '/home1/nalex2023/Datasets/LSB_OUTS/'

final_data_loc.to_netcdf(out_dir+'era5_maritime_diurnal.nc')

In [9]:
sst_fol = '/home1/nalex2023/Datasets/sst_datasets'

sst_files = glob.glob(sst_fol + '/*.nc')

sst_files_df = pd.DataFrame(sst_files,columns=['filepath'])

sst_files_df['filename'] = sst_files_df['filepath'].str.split(os.sep).str[-1]

sst_files_df['datetime'] = sst_files_df['filename'].str.split('.').str[-2]

test = xr.open_dataset(sst_files_df.iloc[22]['filepath'])
sst_files_df['datetime'] = sst_files_df['datetime'].str.replace(" (1)",'')



In [None]:
def prepro_sst(ds):
    ds_sel = ds.sel(latitude=slice(-5,5),longitude=slice(141,151))
    return ds_sel

test = xr.open_dataset(sst_files_df.iloc[0]['filepath'])

test