In [None]:
proj_dir='/path/to/main_project_folder/' # edit this line

import sys
sys.path.append(proj_dir)
from project_utils import parameters as param
from project_utils import load_region
from project_utils import prepare_inputs
import numpy as np
import pandas as pd
import xarray as xr
import dask.array as da
import glob as glob
import matplotlib.pyplot as plt
import geopandas as gpd
import regionmask
import importlib
importlib.reload(param)
importlib.reload(load_region)
importlib.reload(prepare_inputs)
    
region_list = ['northcentral_north_america', 
               'southcentral_north_america', 
               'southeastern_north_america', 
               'southwestern_europe', 
               'western_europe', 
               'central_europe', 
               'eastern_europe', 
               'northeastern_europe', 
               'northeastern_asia', 
               'southeastern_asia', 
               'northsouthern_south_america', 
               'southsouthern_south_america', 
               'southwestern_africa', 
               'southeastern_africa', 
               'southwestern_australia', 
               'southeastern_australia', 
              ]

for dset in ['ERA5', 'NCEP']:
    for jj in range(len(region_list)):
        region_str = region_list[jj]
        print(jj)
        print(region_str)

        hem, region_input_lat_bbox, region_input_lon_bbox, region_box_x, region_box_y, region_lat, region_lon, region_lon_EW, region_t62_lats, region_t62_lons = load_region.load_region_constants(region_str)

        ################################################
        #################### GPH #######################
        ################################################

        hgt_ds = xr.open_dataset("../processed_data_"+dset+"/"+region_str+"/hgt_calday_anomalies.nc")
        print(hgt_ds)

        hgt_sorted_lon = hgt_ds.copy().sortby("lon")
        if isinstance(region_lon, slice):
            region_ds = hgt_sorted_lon.sel(lon = region_lon, lat = region_lat)
        else:   # read east and west hemispheres separately
            region_ds_left = hgt_sorted_lon.sel(lon = region_lon[0], lat = region_lat)
            region_ds_right = hgt_sorted_lon.sel(lon = region_lon[1], lat = region_lat)
            region_ds = xr.concat([region_ds_left, region_ds_right], dim="lon")
            
        area_weights = xr.broadcast(np.cos(np.deg2rad(region_ds.lat)), region_ds, exclude = ['lat', 'time'])[0]
        if dset == "NCEP":
            area_weights = area_weights.drop(['level'])
            
        region_mean = region_ds.weighted(area_weights).mean(dim = ['lat', 'lon'], skipna=True)
        if dset == "NCEP":
            region_mean = region_mean.drop(['level'])
        
        region_mean = region_mean.to_dataframe()
        region_mean.to_csv("../processed_data_"+dset+"/"+region_str+"/region_avg_hgt.csv")

        hgt_sorted_lon, hgt_ds, region_mean, region_ds = None, None, None, None    

        ##################################################    
        #################### soilw #######################
        ##################################################

        soilw_ds = xr.open_dataset("../processed_data_"+dset+"/"+region_str+"/soilw.nc")
        soilw_ds = soilw_ds.rename({'soilw_detrend':'soilw'})
        soilw_anom_file = "../processed_data_"+dset+"/"+region_str+"/soilw_calday_anomalies.nc"
        soilw_anom = xr.open_dataset(soilw_anom_file)
        soilw_stdev_file = "../processed_data_"+dset+"/"+region_str+"/soilw_calday_stdev.nc"
        soilw_stdev = xr.open_dataset(soilw_stdev_file)
        soilw_ds['time'] = pd.to_datetime(soilw_ds['time'].values, format='%Y%m%d')
        soilw_anom['time'] = pd.to_datetime(soilw_anom['time'].values, format='%Y%m%d')
        soilw_ds_sorted_lon = soilw_ds.copy().sortby("lon")
        soilw_anom_sorted_lon = soilw_anom.copy().sortby("lon")
        soilw_stdev_sorted_lon = soilw_stdev.copy().sortby("lon")

        if dset == "NCEP":
            if isinstance(region_lon, slice):
                region_ds = soilw_ds_sorted_lon.sel(lon = region_lon, lat = region_lat)
            else:   
                region_ds_left = soilw_ds_sorted_lon.sel(lon = region_lon[0], lat = region_lat)
                region_ds_right = soilw_ds_sorted_lon.sel(lon = region_lon[1], lat = region_lat)
                region_ds = xr.concat([region_ds_left, region_ds_right], dim="lon")

            print(region_ds)
            area_weights_gph = area_weights
            area_weights = xr.broadcast(np.cos(np.deg2rad(region_ds.lat)), region_ds, exclude = ['lat', 'time'])[0].drop('level', errors='ignore')
                                   
        if isinstance(region_lon, slice):
            region_mean = soilw_ds_sorted_lon.sel(lon = region_lon, lat = region_lat).weighted(area_weights.lat).mean(dim = ['lat', 'lon'], skipna=True)
            region_mean_anom = soilw_anom_sorted_lon.sel(lon = region_lon, lat = region_lat).weighted(area_weights.lat).mean(dim = ['lat', 'lon'], skipna=True)
            region_mean_stdev = soilw_stdev_sorted_lon.sel(lon = region_lon, lat = region_lat).weighted(area_weights.lat).mean(dim = ['lat', 'lon'], skipna=True)
        else:   # read east and west hemispheres separately
            region_mean_left = soilw_ds_sorted_lon.sel(lon = region_lon[0], lat = region_lat)
            region_mean_anom_left = soilw_anom_sorted_lon.sel(lon = region_lon[0], lat = region_lat)
            region_mean_stdev_left = soilw_stdev_sorted_lon.sel(lon = region_lon[0], lat = region_lat)

            region_mean_right = soilw_ds_sorted_lon.sel(lon = region_lon[1], lat = region_lat)
            region_mean_anom_right = soilw_anom_sorted_lon.sel(lon = region_lon[1], lat = region_lat)
            region_mean_stdev_right = soilw_stdev_sorted_lon.sel(lon = region_lon[1], lat = region_lat)

            region_mean = xr.concat([region_mean_left, region_mean_right], dim="lon").weighted(area_weights.lat).mean(dim = ['lat', 'lon'], skipna=True)
            region_mean_anom = xr.concat([region_mean_anom_left, region_mean_anom_right], dim="lon").weighted(area_weights.lat).mean(dim = ['lat', 'lon'], skipna=True)
            region_mean_stdev = xr.concat([region_mean_stdev_left, region_mean_stdev_right], dim="lon").weighted(area_weights.lat).mean(dim = ['lat', 'lon'], skipna=True)

        region_mean_left, region_mean_right = None, None
        region_mean_anom_left, region_mean_anom_right = None, None
        region_mean_stdev_left, region_mean_stdev_right = None, None
        soilw_ds_sorted_lon, soilw_anom_sorted_lon, soilw_stdev_sorted_lon = None, None, None

        region_mean = region_mean.to_dataframe().drop(columns=['level'], errors='ignore')
        region_mean_anom = region_mean_anom.to_dataframe().drop(columns=['level'], errors='ignore')
        region_mean_stdev = region_mean_stdev.to_dataframe().drop(columns=['level'], errors='ignore')

        region_mean.to_csv("../processed_data_"+dset+"/"+region_str+"/region_avg_soilw.csv")
        region_mean_anom.to_csv("../processed_data_"+dset+"/"+region_str+"/region_avg_soilw_cday_anomaly.csv")
        region_mean_stdev.to_csv("../processed_data_"+dset+"/"+region_str+"/region_avg_soilw_calday_stdev.csv")

        soilw_anom, region_left, region_right = None, None, None
        region_mean, region_mean_anom, region_mean_stdev = None, None, None

        #################################################
        #################### TMAX #######################
        #################################################

        land_only = ~np.isnan(soilw_ds.soilw[0,:,:].values)

        tmax_ds = xr.open_dataset("../processed_data_"+dset+"/"+region_str+"/tmax.nc")
        tmax_ds = tmax_ds.rename({'tmax_detrend':'tmax'})
        
        if dset == "NCEP":
            tmax_ds = tmax_ds.squeeze('level')
            tmax_sorted_lon = tmax_ds.copy().sortby("lon").squeeze()
            if isinstance(region_lon, slice):
                region_ds = tmax_sorted_lon.sel(lon = region_lon, lat = region_lat)
            else:   
                region_ds_left = tmax_sorted_lon.sel(lon = region_lon[0], lat = region_lat)
                region_ds_right = tmax_sorted_lon.sel(lon = region_lon[1], lat = region_lat)
                region_ds = xr.concat([region_ds_left, region_ds_right], dim="lon")

            print(region_ds)
            area_weights = xr.broadcast(np.cos(np.deg2rad(region_ds.lat)), region_ds, exclude = ['lat', 'time'])[0]

        # Replace all zeros in land only with NANs. Then multiply by land only. (otherwise zeros will affect the mean calculation)
        tmax_masked = tmax_ds.copy()
        tmax_masked['tmax'] = tmax_masked.tmax*np.tile(land_only, [len(tmax_masked.tmax[:,0,0]),1,1])
        tmax_masked['tmax'] = tmax_masked.tmax.where(tmax_masked.tmax != 0)
        tmax_masked['time'] = pd.to_datetime(tmax_masked['time'].values, format='%Y%m%d')

        tmax_masked_sorted_lon = tmax_masked.copy().sortby("lon")
        if isinstance(region_lon, slice):
            region_mean = tmax_masked_sorted_lon.sel(lon = region_lon, lat = region_lat).weighted(area_weights.lat).mean(dim = ['lat', 'lon'], skipna=True)
        else:   # read east and west hemispheres separately
            region_mean_left = tmax_masked_sorted_lon.sel(lon = region_lon[0], lat = region_lat)
            region_mean_right = tmax_masked_sorted_lon.sel(lon = region_lon[1], lat = region_lat)
            region_mean = xr.concat([region_mean_left, region_mean_right], dim="lon").weighted(area_weights.lat).mean(dim = ['lat', 'lon'], skipna=True)

        region_mean = region_mean.to_dataframe().drop(columns=['level'], errors='ignore')
        region_mean.to_csv("../processed_data_"+dset+"/"+region_str+"/region_avg_tmax.csv")

        tmax_masked_sorted_lon, region_mean_left, region_mean_right, region_mean, region_ds = None, None, None, None, None