In [13]:
import os
import sys
import pandas as pd

import numpy as np
from datetime import datetime as dt
import calendar
import matplotlib.pyplot as plt

In [14]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
import warnings
warnings.filterwarnings('ignore')

In [16]:
data_dir = '/Users/maoyabassiouni/Documents/DATA/Projects/OLNI'

In [17]:
def cal_vpd(air_temp, rh):
    def __esat(air_temp):
        return 610.8 * np.exp((17.27 * air_temp) / (air_temp + 237.3))
    return __esat(air_temp) * (1 - rh/100)

In [18]:
def f_temp(air_temp, pa):
    a =  np.exp((17.27 * air_temp) / (air_temp + 237.3))
    g = 0.665 * pa
    return a / (a + g)

In [19]:
models = ['ACCESS-CM2',
          'CanESM5',
          'CESM2',
          'CNRM-CM6-1',
          'EC-Earth3-Veg-LR',
          'FGOALS-g3',
          'GFDL-CM4',
          'GISS-E2-1-G',
          'IITM-ESM',
          'INM-CM5-0',
          'IPSL-CM6A-LR',
          'KIOST-ESM',
          'MIROC6',
          'MPI-ESM1-2-LR',
          'MRI-ESM2-0',
          'NorESM2-MM',
          'UKESM1-0-LL']

In [20]:
df_all = pd.read_csv(os.path.join(data_dir, 'model_inputs', 'df_model_all_LT_CMIP.csv'))
df_all['Lat'] = np.round(df_all['Lat'], 4)
df_all['Lon'] = np.round(df_all['Lon'], 4)

data_CMIP_co2 = pd.read_csv(os.path.join(data_dir, 'grid_data_extract', 'CMIP_historical_co2.csv'))
data_CMIP_co2 = data_CMIP_co2.drop(['Unnamed: 0',], axis=1)

data_CMIP = pd.read_csv(os.path.join(data_dir, 'grid_data_extract', 'CMIP_historical_models_annual.csv'))
#data_CMIP = pd.read_csv(os.path.join(data_dir, 'grid_data_extract', 'CMIP_historical_CESM2_annual.csv'))
data_CMIP['Lat'] = np.round(data_CMIP['Lat'], 4)
data_CMIP['Lon'] = np.round(data_CMIP['Lon'], 4)
data_CMIP = data_CMIP.groupby(['Lon', 'Lat', 'sp', 'year']).mean()
for k in data_CMIP.keys():
    if k.startswith('Unnamed'):
        data_CMIP = data_CMIP.drop([k,], axis=1)

        
data_CMIP = data_CMIP.reset_index()
df_all = df_all.reset_index()
df_all = pd.merge(df_all, data_CMIP, on=['Lat', 'Lon', 'sp'], how='outer')
df_all = df_all.drop(['index'], axis=1)

co2 = [data_CMIP_co2[data_CMIP_co2['year']==yi]['CO2_historical'].values[0] for yi in df_all['year']]
df_all['CO2_historical'] = co2

#conversions.... 
for source_id in models:
    for tt in ['gs_LTavg', 'gs']:
        df_all['par_historical_%s_%s' % (source_id, tt)] = df_all['rsds_historical_%s_%s' % (source_id, tt)] * 10 ** (-6) * 2.04
        df_all['vpd_historical_%s_%s' % (source_id, tt)] = cal_vpd(df_all['tas_historical_%s_%s' % (source_id, tt)], df_all['hurs_historical_%s_%s' % (source_id, tt)])

        df_all['rf_alpha_historical_%s_%s' % (source_id, tt)] = df_all['pr_historical_%s_%s' % (source_id, tt)]\
                                                     /  df_all['ERA5_rf_lambda_gs_LTavg']
        df_all['rf_lambda_historical_%s_%s' % (source_id, tt)] = df_all['ERA5_rf_lambda_gs_LTavg']
        df_all['pet_historical_%s_%s' % (source_id, tt)] = df_all['TC_pet_gs_LTavg'] \
                                                * df_all['rsds_historical_%s_%s' % (source_id, tt)] / df_all['TC_srad_gs_LTavg'] \
                                                * f_temp(df_all['tas_historical_%s_%s' % (source_id, tt)], df_all['elev_pa']) \
                                                / f_temp(df_all['TC_tmmn_gs_LTavg'], df_all['elev_pa'])

df_all.to_csv(os.path.join(data_dir, 'model_inputs', 'df_model_all_annual_CMIP_historical.csv'))

In [21]:
df_all

Unnamed: 0,Lat,Lon,sp,n,ymin,ymax,count,yrange,ERA5_rf_alpha_LTavg,ERA5_rf_lambda_LTavg,...,rf_lambda_historical_NorESM2-MM_gs,pet_historical_NorESM2-MM_gs,rf_alpha_historical_UKESM1-0-LL_gs_LTavg,rf_lambda_historical_UKESM1-0-LL_gs_LTavg,pet_historical_UKESM1-0-LL_gs_LTavg,par_historical_UKESM1-0-LL_gs,vpd_historical_UKESM1-0-LL_gs,rf_alpha_historical_UKESM1-0-LL_gs,rf_lambda_historical_UKESM1-0-LL_gs,pet_historical_UKESM1-0-LL_gs
0,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.431431,0.004007,0.004482,0.431431,0.005323,0.000521,529.089684,0.005962,0.431431,0.004873
1,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.431431,0.004264,0.004482,0.431431,0.005323,0.000553,531.732514,0.006068,0.431431,0.005217
2,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.431431,0.004156,0.004482,0.431431,0.005323,0.000526,356.698556,0.006974,0.431431,0.004460
3,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.431431,0.004889,0.004482,0.431431,0.005323,0.000541,448.877258,0.007354,0.431431,0.004899
4,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.431431,0.004235,0.004482,0.431431,0.005323,0.000562,700.915922,0.003305,0.431431,0.005593
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67480,66.3025,29.5011,118.0,9.483958,1995,2009,8,14,0.002292,0.826733,...,1.008569,0.003345,0.002841,1.008569,0.002946,0.000318,319.233390,0.002778,1.008569,0.002552
67481,66.3025,29.5011,118.0,9.483958,1995,2009,8,14,0.002292,0.826733,...,1.008569,0.003214,0.002841,1.008569,0.002946,0.000379,290.256302,0.003378,1.008569,0.003070
67482,66.3025,29.5011,118.0,9.483958,1995,2009,8,14,0.002292,0.826733,...,1.008569,0.003679,0.002841,1.008569,0.002946,0.000359,306.894434,0.003482,1.008569,0.003008
67483,66.3025,29.5011,118.0,9.483958,1995,2009,8,14,0.002292,0.826733,...,1.008569,0.002865,0.002841,1.008569,0.002946,0.000300,363.748036,0.002545,1.008569,0.002855


In [22]:
list(data_CMIP.keys())

['Lon',
 'Lat',
 'sp',
 'year',
 'tas_historical_ACCESS-CM2',
 'hurs_historical_ACCESS-CM2',
 'pr_historical_ACCESS-CM2',
 'rsds_historical_ACCESS-CM2',
 'tas_historical_CanESM5',
 'hurs_historical_CanESM5',
 'pr_historical_CanESM5',
 'rsds_historical_CanESM5',
 'tas_historical_CESM2',
 'hurs_historical_CESM2',
 'pr_historical_CESM2',
 'rsds_historical_CESM2',
 'tas_historical_CNRM-CM6-1',
 'hurs_historical_CNRM-CM6-1',
 'pr_historical_CNRM-CM6-1',
 'rsds_historical_CNRM-CM6-1',
 'tas_historical_EC-Earth3-Veg-LR',
 'hurs_historical_EC-Earth3-Veg-LR',
 'pr_historical_EC-Earth3-Veg-LR',
 'rsds_historical_EC-Earth3-Veg-LR',
 'tas_historical_FGOALS-g3',
 'hurs_historical_FGOALS-g3',
 'pr_historical_FGOALS-g3',
 'rsds_historical_FGOALS-g3',
 'tas_historical_GFDL-CM4',
 'hurs_historical_GFDL-CM4',
 'pr_historical_GFDL-CM4',
 'rsds_historical_GFDL-CM4',
 'tas_historical_GISS-E2-1-G',
 'hurs_historical_GISS-E2-1-G',
 'pr_historical_GISS-E2-1-G',
 'rsds_historical_GISS-E2-1-G',
 'tas_historical

In [23]:
df_all = pd.read_csv(os.path.join(data_dir, 'model_inputs', 'df_model_all_LT_CMIP.csv'))
df_all['Lat'] = np.round(df_all['Lat'], 4)
df_all['Lon'] = np.round(df_all['Lon'], 4)

data_CMIP_co2 = pd.read_csv(os.path.join(data_dir, 'grid_data_extract', 'CMIP_proj_co2.csv'))
data_CMIP_co2 = data_CMIP_co2.drop(['Unnamed: 0',], axis=1)

data_CMIP = pd.read_csv(os.path.join(data_dir, 'grid_data_extract', 'CMIP_proj_models_annual.csv'))
data_CMIP['Lat'] = np.round(data_CMIP['Lat'], 4)
data_CMIP['Lon'] = np.round(data_CMIP['Lon'], 4)
data_CMIP = data_CMIP.groupby(['Lon', 'Lat', 'sp', 'year']).mean()
for k in data_CMIP.keys():
    if k.startswith('Unnamed'):
        data_CMIP = data_CMIP.drop([k,], axis=1)

        
data_CMIP = data_CMIP.reset_index()
df_all = df_all.reset_index()
df_all = pd.merge(df_all, data_CMIP, on=['Lat', 'Lon', 'sp'], how='outer')
df_all = df_all.drop(['index'], axis=1)

for ck in ['CO2_ssp245', 'CO2_ssp585']:
    co2 = [data_CMIP_co2[data_CMIP_co2['year']==yi][ck].values[0] for yi in df_all['year']]
    df_all[ck] = co2

#conversions.... 
for source_id in models:
    for sc in ['ssp245', 'ssp585']:
        for tt in [ 'gs',]:
            df_all['par_%s_%s_%s' % (sc, source_id, tt)] = df_all['rsds_%s_%s_%s' % (sc, source_id, tt)] * 10 ** (-6) * 2.04
            df_all['vpd_%s_%s_%s' % (sc, source_id, tt)] = cal_vpd(df_all['tas_%s_%s_%s' % (sc, source_id, tt)], df_all['hurs_%s_%s_%s' % (sc, source_id, tt)])

            df_all['rf_alpha_%s_%s_%s' % (sc, source_id, tt)] = df_all['pr_%s_%s_%s' % (sc, source_id, tt)]\
                                                         /  df_all['ERA5_rf_lambda_gs_LTavg']
            df_all['rf_lambda_%s_%s_%s' % (sc, source_id, tt)] = df_all['ERA5_rf_lambda_gs_LTavg']
            df_all['pet_%s_%s_%s' % (sc, source_id, tt)] = df_all['TC_pet_gs_LTavg'] \
                                                    * df_all['rsds_%s_%s_%s' % (sc, source_id, tt)] / df_all['TC_srad_gs_LTavg'] \
                                                    * f_temp(df_all['tas_%s_%s_%s' % (sc, source_id, tt)], df_all['elev_pa']) \
                                                / f_temp(df_all['TC_tmmn_gs_LTavg'], df_all['elev_pa'])

df_all.to_csv(os.path.join(data_dir, 'model_inputs', 'df_model_all_annual_CMIP_proj.csv'))

In [24]:
df_all

Unnamed: 0,Lat,Lon,sp,n,ymin,ymax,count,yrange,ERA5_rf_alpha_LTavg,ERA5_rf_lambda_LTavg,...,par_ssp245_UKESM1-0-LL_gs,vpd_ssp245_UKESM1-0-LL_gs,rf_alpha_ssp245_UKESM1-0-LL_gs,rf_lambda_ssp245_UKESM1-0-LL_gs,pet_ssp245_UKESM1-0-LL_gs,par_ssp585_UKESM1-0-LL_gs,vpd_ssp585_UKESM1-0-LL_gs,rf_alpha_ssp585_UKESM1-0-LL_gs,rf_lambda_ssp585_UKESM1-0-LL_gs,pet_ssp585_UKESM1-0-LL_gs
0,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.000525,736.918075,0.004833,0.431431,0.005607,0.000546,1256.758573,0.000374,0.431431,0.010409
1,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.000507,704.159871,0.005030,0.431431,0.005334,0.000546,1142.456603,0.000468,0.431431,0.010391
2,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.000523,672.290184,0.004124,0.431431,0.005349,0.000546,1224.531955,0.000512,0.431431,0.010706
3,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.000517,660.204245,0.005080,0.431431,0.005650,0.000551,1165.962660,0.001235,0.431431,0.010559
4,43.2562,-8.8250,129.0,10.700972,1995,2015,12,20,0.005236,0.603362,...,0.000547,704.824699,0.003112,0.431431,0.005569,0.000558,1495.877251,0.000190,0.431431,0.011462
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35169,66.3025,29.5011,118.0,9.483958,1995,2009,8,14,0.002292,0.826733,...,,480.317320,0.002130,1.008569,,0.000478,1032.447782,0.002323,1.008569,0.008812
35170,66.3025,29.5011,118.0,9.483958,1995,2009,8,14,0.002292,0.826733,...,,510.867626,0.005737,1.008569,,0.000403,906.044317,0.003708,1.008569,0.007559
35171,66.3025,29.5011,118.0,9.483958,1995,2009,8,14,0.002292,0.826733,...,,562.682593,0.003362,1.008569,,0.000418,1079.335588,0.003673,1.008569,0.008503
35172,66.3025,29.5011,118.0,9.483958,1995,2009,8,14,0.002292,0.826733,...,,491.848681,0.002669,1.008569,,0.000468,1294.224635,0.002144,1.008569,0.010362


In [25]:
list(df_all.keys())

['Lat',
 'Lon',
 'sp',
 'n',
 'ymin',
 'ymax',
 'count',
 'yrange',
 'ERA5_rf_alpha_LTavg',
 'ERA5_rf_lambda_LTavg',
 'ERA5_rf_alpha_gs_LTavg',
 'ERA5_rf_lambda_gs_LTavg',
 'ERA5_tmp_LTavg',
 'ERA5_tmp_gs_LTavg',
 'ERA5_tmp_gs_peak_LTavg',
 'ERA5_pre_LTavg',
 'ERA5_pre_gs_LTavg',
 'ERA5_pre_gs_peak_LTavg',
 'ERA5_pa_LTavg',
 'ERA5_pa_gs_LTavg',
 'ERA5_pa_gs_peak_LTavg',
 'ERA5_rf_alpha_LTstd',
 'ERA5_rf_lambda_LTstd',
 'ERA5_rf_alpha_gs_LTstd',
 'ERA5_rf_lambda_gs_LTstd',
 'ERA5_tmp_LTstd',
 'ERA5_tmp_gs_LTstd',
 'ERA5_tmp_gs_peak_LTstd',
 'ERA5_pre_LTstd',
 'ERA5_pre_gs_LTstd',
 'ERA5_pre_gs_peak_LTstd',
 'ERA5_pa_LTstd',
 'ERA5_pa_gs_LTstd',
 'ERA5_pa_gs_peak_LTstd',
 'TC_srad_LTavg',
 'TC_pet_LTavg',
 'TC_aet_LTavg',
 'TC_def_LTavg',
 'TC_pdsi_LTavg',
 'TC_ro_LTavg',
 'TC_soil_LTavg',
 'TC_tmmn_LTavg',
 'TC_tmmx_LTavg',
 'TC_vap_LTavg',
 'TC_vpd_LTavg',
 'TC_vs_LTavg',
 'TC_days_month_LTavg',
 'TC_tmean_LTavg',
 'TC_aet_gs_LTavg',
 'TC_def_gs_LTavg',
 'TC_pdsi_gs_LTavg',
 'TC_pet_gs