In [1]:
from dask_jobqueue import SLURMCluster
import matplotlib.pyplot as plt 
import glob
import os
import xarray as xr 
import pandas as pd
import numpy as np


cluster = SLURMCluster(
    job_name="Climt2",          # --job-name
    cores=2,                     # Number of cores per task (adjust if needed)
    processes=2,                 # One process per task
    memory="20GB",               # --mem
    walltime="00:15:00",         # --time
    queue="short",               # --partition
    log_directory=".",           # Logs will be saved to the current directory
)


from dask.distributed import Client
cluster.scale(jobs=10)
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://10.42.239.61:8787/status,

0,1
Dashboard: http://10.42.239.61:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.42.239.61:41477,Workers: 0
Dashboard: http://10.42.239.61:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [4]:
met_arm = '/home1/nalex2023/Datasets/met_manus/'


def create_dft(in_dir,arm_data=True):
    files = glob.glob(in_dir+os.sep+'*.nc')
    if len(files) == 0:
        files = glob.glob(in_dir+os.sep+'*.cdf')
        if len(files) == 0:
            raise Exception('No files Found')
        else:
            pass
    else:
        pass
    
    dft = pd.DataFrame(files,columns=['filepath'])
    dft['filename'] = dft['filepath'].str.split(os.sep).str[-1]
    
    if arm_data:
        dft['datetime'] = pd.to_datetime(dft['filename'].str.split(
            '.',
            expand=True).iloc[:, 2] + dft['filename'].str.split(
                '.',  expand=True).iloc[:, 3],format='%Y%m%d%H%M%S')


    return dft


dft_met = create_dft(met_arm).sort_values('datetime')


years = [1997,1998,1999,2000]

dft_sub = dft_met[dft_met['datetime'].dt.year.isin(years)]




In [5]:
def prepro_met(ds):
    ds_su = ds[['wdir_vec_mean','wspd_vec_mean','rh_mean','temp_mean']]
    ds_re = ds_su.resample(time='1h').mean()
    
    return ds_re


met_dset = xr.open_mfdataset(dft_sub['filepath'].values,
                             preprocess=prepro_met,parallel=True)


In [None]:
out_dir=  '/home1/nalex2023/Datasets/met_manus_yr/{0}-{1}.met_manus.nc'.format(years[0],years[-1])

met_dset.to_netcdf(out_dir)