# Setup

In [None]:
import xarray as xr
import numpy as np
import pandas as pd

import dask
import dask.array as da
from dask.distributed import Client,LocalCluster,progress,wait
import glob

import matplotlib.pyplot as plt

import dask_config
import kbdi_funcs as k_funcs

In [None]:
# pr_file = r'prcp_nClimGridDaily_1951-2024_USsouth.nc'
# tmax_file = r'tmax_nClimGridDaily_1951-2024_USsouth.nc'
pr_file = r'C://Users/kerrie/Documents/02_LocalData/nclimgrid_daily/prcp_nClimGridDaily_1951-2024_USsouth.nc'
tmax_file = r'C://Users/kerrie/Documents/02_LocalData/nclimgrid_daily/tmax_nClimGridDaily_1951-2024_USsouth.nc'
year_start='1951'
year_end='2023'

In [None]:
# Interactive configuration
# config = dask_config.auto_configure_processing(pr_file, tmax_file, year_start, year_end)

# non-interactive config
config = dask_config.quick_configure(pr_file, tmax_file, year_start, year_end)

# grab config settings
nworkers = config['nworkers']
chunks = config['chunks']

In [None]:
cluster=LocalCluster(n_workers=nworkers,threads_per_worker=1) # a cluster where each thread is a separate process or "worker"
client=Client(cluster)  # connect to your compute cluster
client.wait_for_workers(n_workers=nworkers,timeout=30) # wait up to 10s for the cluster to be fully ready, error if not ready in 10s
client # print info

# Begin Main Code

In [None]:
%%time
pr = k_funcs.get_pr(pr_file,chunks,year_start,year_end)
coord_list = k_funcs.get_chunk_info(pr)
pr

In [None]:
%%time
tmax = k_funcs.get_tmax(tmax_file,chunks,year_start,year_end)
tmax

In [None]:
%%time

pr_delayed = pr.data.to_delayed().ravel()
tmax_delayed = tmax.data.to_delayed().ravel()
zipvars = zip(pr_delayed,tmax_delayed,coord_list)

task_list = [dask.delayed(k_funcs.calc_kbdi)(ID,p,t,coord) for ID,(p,t,coord) in enumerate(zipvars)]
len(task_list)

In [None]:
%%time
result = dask.compute(*task_list)

In [None]:
files = glob.glob('output/kbdi_chunk_*.nc')
test = xr.open_mfdataset(files)
test.kbdi.isel(time=20000).plot()