In [1]:
import xarray as xr
import numpy as np
import pandas as pd
from scipy.interpolate import NearestNDInterpolator
import glob
import os

# Historical calcs using CRU

In [45]:
pet_ds = xr.open_dataset('./data/cru_ts4.08.1901.2023.pet.dat.nc')

In [3]:
pet_ds = pet_ds.sel(time=slice(np.datetime64('2021-01-01'), np.datetime64('2021-12-31'))).mean(dim='time')
# pet_ds = pet_ds.sel(time=slice(np.datetime64('1940-01-01'), np.datetime64('1949-12-31'))).mean(dim='time')

In [4]:
res_df = pd.read_csv('../remove_bad_water/out/sentinel_2021_v6_wgs84_combined_merged.csv')
res_df = res_df.loc[res_df['hydropoly_max']<100]
res_df['area_ha'] = res_df['area']*100/10000 # HA
res_df['area_km'] = res_df['area']*100/(1000*1000) # km2
res_df = res_df.loc[res_df['area_ha']<100]
res_df['area_m'] = res_df['area']*100

In [None]:
res_df['area_km'].sum()

In [None]:
res_df.shape

In [8]:
xr_indexer = {
    'lon':xr.DataArray(res_df['longitude'].values, dims=['res']),
    'lat':xr.DataArray(res_df['latitude'].values, dims=['res'])
}

In [9]:
res_pets = pet_ds.sel(xr_indexer, method='nearest')

In [10]:
res_pets = res_pets.assign(area_m=('res', res_df['area_m'].values))

In [11]:
res_month_evap = res_pets['pet']*res_pets['area_m']
res_month_evap = res_month_evap/1000 # Assuming PET is in mm
evap_value = (res_month_evap*365).sum()*1000 # 365 days per year, 1000 converts from cubic meters to liters

In [None]:
evap_value

In [None]:
11.7/30

# Future Calcs

Using: https://www.nature.com/articles/s41597-023-02290-0


In [81]:
def interp(da):
    indices = np.where(np.isfinite(da))
    interp = NearestNDInterpolator(np.transpose(indices), da.data[indices])
    da[...] = interp(*np.indices(da.shape))
    return da


def calc_total_evap(res_pets):
    res_daily_evap = res_pets['PET']*res_pets['area_m'] / 1000 # Convert PET from mm to m
    evap_value = (res_daily_evap*365).sum() * 1000 # Convert to annual cubic liters (not m^3)
    # res_month_evap = res_month_evap * 30/1000 # Assuming PET is in mm
    # evap_value = (res_month_evap*12).sum()*1000 # 12 months per year
    return evap_value

def full_pet_calcs(res_df, pet_ncdf_path):
    pet_ds = xr.open_dataset(pet_ncdf_path)
    xr_indexer = {
        'lon':xr.DataArray(res_df['longitude'].values, dims=['res']),
        'lat':xr.DataArray(res_df['latitude'].values, dims=['res'])
    }
    pet_ds_last10 = pet_ds.sel(time=slice('2090-01-01', '2100-01-01')).mean(dim='time')
    pet_ds_first10 = pet_ds.sel(time=slice('2015-01-01', '2025-01-01')).mean(dim='time')
    pet_ds_first10['PET'] = interp(pet_ds_first10['PET'])
    pet_ds_last10['PET'] = interp(pet_ds_last10['PET'])
    res_pets_last10 = pet_ds_last10.sel(xr_indexer, method='nearest')
    res_pets_last10 = res_pets_last10.assign(area_m=('res', res_df['area_m'].values))
    res_pets_first10 = pet_ds_first10.sel(xr_indexer, method='nearest')
    res_pets_first10 = res_pets_first10.assign(area_m=('res', res_df['area_m'].values))
    evap_first10 = calc_total_evap(res_pets_first10)
    evap_last10 = calc_total_evap(res_pets_last10)
    return evap_first10, evap_last10

In [None]:
# Do All and write out results
# First mount using rclone: rclone mount --drive-shared-with-me cu_gdrive: gdrive/
all_models = glob.glob('./data/cmip6/*.nc')
all_dicts = []
for mod_nc in all_models:
    mod_name = os.path.basename(mod_nc)[:-3]
    print(mod_name)
    baseline, final = full_pet_calcs(res_df, mod_nc)
    out_dict = {
        'model':mod_name,
        'baseline_evap': baseline.values,
        'final_evap': final.values
    }
    all_dicts.append(out_dict)

In [85]:
full_df = pd.DataFrame(all_dicts)
full_df.to_csv('./data/sentinel_v6_2021_allmodels_cmip6.csv', index=False)
# full_df = pd.read_csv('./data/sentinel_v6_2021_allmodels_cmip6.csv')

In [86]:
full_df['scenario'] = None
full_df.loc[full_df['model'].str.contains('ssp126'), 'scenario'] = 'ssp126'
full_df.loc[full_df['model'].str.contains('ssp245'), 'scenario'] = 'ssp245'
full_df.loc[full_df['model'].str.contains('ssp370'), 'scenario'] = 'ssp370'
full_df.loc[full_df['model'].str.contains('ssp585'), 'scenario'] = 'ssp585'
full_df['ratio'] = (full_df['final_evap'] / full_df['baseline_evap'])

In [None]:
full_df.groupby('scenario').aggregate(['mean','std', 'max', 'min', 'count'])['ratio']