In [None]:
import os
from glob import glob
import numpy as np
import pandas as pd

import dask

import climate_utils as cu
import python_utils as pu

In [None]:
############
### Dask ###
############
from dask.distributed import LocalCluster

cluster = LocalCluster(n_workers=20)

client = cluster.get_client()
client

## Temperature for load

In [None]:
# All TGW experiments and bounds
tgw_vars = ['T2C']
tgw_var_str = tgw_vars[0]

tgw_experiments = os.listdir(pu.tgw_path)

# Loop through all
for tgw_experiment in tgw_experiments:
    if os.path.isfile(f"{pu.project_path}/data/climate/tgw/zonal_{tgw_var_str}_{tgw_experiment}.csv"):
        print(f"{tgw_experiment} already done")
        continue
        
    # Extract years
    tgw_experiment_yrs_str = tgw_experiment.split('_')[1:]
    tgw_years = np.arange(int(tgw_experiment_yrs_str[0]), int(tgw_experiment_yrs_str[1])+1)

    # Get paths of all hourly netcdf files
    tgw_paths = [glob(f"{pu.tgw_path}/{tgw_experiment}/hourly/*_{year}-*") for year in tgw_years]
    tgw_paths = [path for sub_path in tgw_paths for path in sub_path] # flatten

    # Parallelize with dask
    delayed = []
    
    for path in tgw_paths:
        delayed.append(dask.delayed(cu.tgw_to_zones)(path, tgw_vars))
    
    # Store
    df_out = pd.concat(dask.compute(*delayed))
    df_out.to_csv(f"{pu.project_path}/data/climate/tgw/zonal_{tgw_var_str}_{tgw_experiment}.csv")
    print(tgw_experiment)