# Binned Calculations?

## Packages

In [131]:
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import dask
import os
import glob

## Functions

In [128]:
def normal_value_spatial(year, start_year, end_year, month_start, month_end):
    '''
    uses glob to get usable file paths which are then used in seasonal average function
    inputs:
    year, start_year, end_year, month_start, month_end
    outputs:
    scalar value which is average 
    '''
    files_arr = []
    
    for month_i in np.arange(month_start, month_end+1, 1):
        files = sorted(glob.glob(f"{sm_base_path}/{year}/NLDAS_{model}0125_H.A{year}{str(month_i).zfill(2)}*.nc"))
        for file_i in files:
            files_arr.append(file_i)
    
    ds = xr.concat([xr.open_dataset(f) for f in files_arr], dim="time")
    
    return ds.mean(dim='time')

## dask

In [None]:
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    # account="pches",
    account="open",
    cores=1,
    memory="10GiB",
    walltime="03:00:00",
)

cluster.scale(jobs=10) 

In [152]:
from dask.distributed import Client

client = Client(cluster)

In [153]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://146.186.150.11:37639/status,

0,1
Dashboard: http://146.186.150.11:37639/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://146.186.150.11:41325,Workers: 0
Dashboard: http://146.186.150.11:37639/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Calculations

In [154]:
# both these have year then dday is formatted as NLDAS_FORA0125_H.A19791221_dday.nc
#                             sm is formatted as NLDAS_VIC0125_H.A19790328.nc
# april to september taken as growing season per haqiqi 2021

model = "VIC"
dday_base_path = "/storage/home/cta5244/work/pyWBM_yield_data/NCEPNARR_NLDAS_tmax_tmin/"
sm_base_path = "/storage/home/cta5244/work/pyWBM_yield_data/VIC_daily/"
start_year, end_year = 1979, 2025
month_start = 4 # april
month_end = 9 # september

In [155]:
results = []
    
for year in np.arange(start_year, end_year):
    out = dask.delayed(normal_value_spatial)(year=year, 
                                              start_year=start_year, 
                                              end_year=end_year, 
                                              month_start=month_start,
                                              month_end=month_end)
    results.append(out)


In [None]:
results = dask.compute(*results)