In [1]:
# Import neccessary libraries
from intake_esgf import ESGFCatalog
import numpy as np
import xarray as xr
import netCDF4
import csv

In [2]:
# Create functions needed

# RMSE function
def get_RMSE(model, obs):
    # Step 2: Compute the Difference
    difference = model - obs
    # Step 3: Square the Difference
    squared_difference = difference ** 2
    # Step 4: Compute the Mean Squared Error
    mse = squared_difference.mean(dim=['lat', 'lon'])
    # Step 5: Take the Square Root to get RMSE
    rmse  = np.sqrt(mse)
    rmse_weighted = rmse.values.item()

    return rmse_weighted

# function for shifiting lon according to Bharat's code
def shift_lon(model):
    # First shift the lon dimension for the model average
    ds_tmp = model.copy(deep=True)
    ds_tmp['lon'] = ds_tmp['lon'] - 180

    gpp_reshape = np.zeros(ds_tmp.shape)
    gpp_reshape[:,ds_tmp['lon'].size//2:] = ds_tmp[:,:ds_tmp['lon'].size//2].data
    gpp_reshape[:,:ds_tmp['lon'].size//2] = ds_tmp[:,ds_tmp['lon'].size//2:].data
    ds_tmp.data = gpp_reshape
    return ds_tmp 

In [3]:
#Populate the Catalog - bringing in nothing from the catalog
cat = ESGFCatalog()
print(cat)  # <-- nothing to see here yet

Perform a search() to populate the catalog.


In [4]:
# Import selected models from Intake-ESGF Catalog for selected variable

models = ["ACCESS-ESM1-5","IPSL-CM6A-LR","CESM2", "UKESM1-0-LL","BCC-CSM2-MR","MPI-ESM1-2-HR","CanESM5","GFDL-ESM4","NorESM2-LM", "MIROC-ES2L"]

cat.search(
    experiment_id="historical",
    source_id= models,
    frequency="mon",
    variable_id=["gpp"],
)
cat.remove_ensembles()

   Searching indices:   0%|          |0/2 [       ?index/s]

Summary information for 10 results:
experiment_id                                          [historical]
mip_era                                                     [CMIP6]
institution_id    [IPSL, MOHC, NCAR, BCC, MIROC, NOAA-GFDL, CCCm...
member_id                                      [r1i1p1f1, r1i1p1f2]
variable_id                                                   [gpp]
activity_drs                                                 [CMIP]
table_id                                                     [Lmon]
grid_label                                            [gr, gn, gr1]
source_id         [IPSL-CM6A-LR, UKESM1-0-LL, CESM2, BCC-CSM2-MR...
project                                                     [CMIP6]
dtype: object

In [5]:
#Obtaining the datasets and loading it into a dictionary (putting it in the shopping cart)

dsd = cat.to_dataset_dict()

Get file information:   0%|          |0/2 [       ?index/s]

Adding cell measures:   0%|          |0/10 [     ?dataset/s]

In [6]:
#printing variable keys to see how variable names are set up
print(dsd.keys())

dict_keys(['MPI-M.r1i1p1f1.gn.MPI-ESM1-2-HR', 'BCC.r1i1p1f1.gn.BCC-CSM2-MR', 'IPSL.r1i1p1f1.gr.IPSL-CM6A-LR', 'NCC.r1i1p1f1.gn.NorESM2-LM', 'MIROC.r1i1p1f2.gn.MIROC-ES2L', 'CCCma.r1i1p1f1.gn.CanESM5', 'MOHC.r1i1p1f2.gn.UKESM1-0-LL', 'NCAR.r1i1p1f1.gn.CESM2', 'NOAA-GFDL.r1i1p1f1.gr1.GFDL-ESM4', 'CSIRO.r1i1p1f1.gn.ACCESS-ESM1-5'])


In [7]:
# Load each model in its own dataset

# list keys of model names
list_keys = (list(dsd.keys()))

# Find out which model to load weights into
for idx, item in enumerate(list_keys):
    if 'ACCESS' in item:
        model_1 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000
    if 'BCC' in item:
        model_2 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000
    if 'CanESM5' in item:
        model_3 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000
    if 'CESM2' in item:
        model_4 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000
    if 'GFDL' in item:
        model_5 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000
    if 'IPSL' in item:
        model_6 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000
    if 'MIROC' in item:
        model_7 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000
    if 'MPI' in item:
        model_8 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000
    if 'NorESM2' in item:
        model_9 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000
    if 'UKESM1' in item:
        model_10 = dsd[list_keys[idx]]["gpp"].sel(time=slice('1980-01-01', '2013-12-01'))* 86400 * 1000 # .mean(dim="time") * 86400 * 1000

# Find out which model is CanESM (lowest resolution fo regridding)
for idx, item in enumerate(list_keys):
    if 'CanESM' in item:
        print(idx, item)
        break

model_CanESM_id = idx
print(model_CanESM_id)

5 CCCma.r1i1p1f1.gn.CanESM5
5


In [8]:
# Linear Interpolation/Regridding to match lowest resolution model (CanESM5):

#Extract lat/lon grid from CanESM5
lat_target = dsd[list_keys[idx]]["gpp"].lat.values
lon_target = dsd[list_keys[idx]]["gpp"].lon.values

# Regrid each model and take long (not longtitude) term mean 
gpp_model_1_Regridded = model_1.interp(lat=lat_target, lon=lon_target)
gpp_model_1_Regridded_mean = gpp_model_1_Regridded.mean(dim="time")

gpp_model_2_Regridded = model_2.interp(lat=lat_target, lon=lon_target)
gpp_model_2_Regridded_mean = gpp_model_2_Regridded.mean(dim="time")

gpp_model_3_Regridded = model_3.interp(lat=lat_target, lon=lon_target)
gpp_model_3_Regridded_mean = gpp_model_3_Regridded.mean(dim="time")

gpp_model_4_Regridded = model_4.interp(lat=lat_target, lon=lon_target)
gpp_model_4_Regridded_mean = gpp_model_4_Regridded.mean(dim="time")

gpp_model_5_Regridded = model_5.interp(lat=lat_target, lon=lon_target)
gpp_model_5_Regridded_mean = gpp_model_5_Regridded.mean(dim="time")

gpp_model_6_Regridded = model_6.interp(lat=lat_target, lon=lon_target)
gpp_model_6_Regridded_mean = gpp_model_6_Regridded.mean(dim="time")

gpp_model_7_Regridded = model_7.interp(lat=lat_target, lon=lon_target)
gpp_model_7_Regridded_mean = gpp_model_7_Regridded.mean(dim="time")

gpp_model_8_Regridded = model_8.interp(lat=lat_target, lon=lon_target)
gpp_model_8_Regridded_mean = gpp_model_8_Regridded.mean(dim="time")

gpp_model_9_Regridded = model_9.interp(lat=lat_target, lon=lon_target)
gpp_model_9_Regridded_mean = gpp_model_9_Regridded.mean(dim="time")

gpp_model_10_Regridded = model_10.interp(lat=lat_target, lon=lon_target)
gpp_model_10_Regridded_mean = gpp_model_10_Regridded.mean(dim="time")

In [9]:
# Shift every model to proper coordinates
gpp_model_1_Regridded_mean_shifted = shift_lon(gpp_model_1_Regridded_mean)
gpp_model_2_Regridded_mean_shifted = shift_lon(gpp_model_2_Regridded_mean)
gpp_model_3_Regridded_mean_shifted = shift_lon(gpp_model_3_Regridded_mean)
gpp_model_4_Regridded_mean_shifted = shift_lon(gpp_model_4_Regridded_mean)
gpp_model_5_Regridded_mean_shifted = shift_lon(gpp_model_5_Regridded_mean)
gpp_model_6_Regridded_mean_shifted = shift_lon(gpp_model_6_Regridded_mean)
gpp_model_7_Regridded_mean_shifted = shift_lon(gpp_model_7_Regridded_mean)
gpp_model_8_Regridded_mean_shifted = shift_lon(gpp_model_8_Regridded_mean)
gpp_model_9_Regridded_mean_shifted = shift_lon(gpp_model_9_Regridded_mean)
gpp_model_10_Regridded_mean_shifted = shift_lon(gpp_model_10_Regridded_mean)

In [10]:
# Define the observation 

# Open the NetCDF file as an xarray Dataset
ds = xr.open_dataset('/Users/6i0/Documents/Data/gpp_WECANN.nc')

# Access the GPP variable from the Dataset
gpp_data = ds['gpp']
gpp_data_mean = gpp_data.mean(dim="time")
lat = ds['lat']
lon = ds['lon']
time = ds['time']

#gpp_data_Regridded_mean = gpp_data_mean
lat_target = dsd[list_keys[idx]]["gpp"].lat.values
lon_target = dsd[list_keys[idx]]["gpp"].lon.values -180

# Correctly using ds_emean for interpolation
gpp_data_Regridded = gpp_data.interp(lat=lat_target, lon=lon_target)
gpp_data_Regridded_mean = gpp_data_Regridded.mean(dim="time")

#Shift data
gpp_data_Regridded_mean_shifted = shift_lon(gpp_data_Regridded_mean)


In [11]:
# Open the CSV file to extract all the model weights
with open('/Users/6i0/Documents/Data/Weights.csv', newline='') as csvfile:
    # Create a CSV reader object
    reader = csv.DictReader(csvfile)
    
    # Initialize empty lists to store the weights
    weights = []
    weights_BMA100 = []
    weights_BMA1000 = []
    weights_sanderson_10 = []
    weights_sanderson_25 = []

    # Iterate over each row in the CSV file
    for row in reader:
        # Extract the weight from the desired column
        weight = row['Ensemble Mean']  # Change 'Ensemble Mean' to the desired column header
        # Check if the value is not empty before appending
        if weight:
            weights.append(weight)
        
        weight_BMA100 = row['BMA 100 sample']
        if weight_BMA100:
            weights_BMA100.append(weight_BMA100)
        
        weight_BMA1000 = row['BMA 1000 sample']
        if weight_BMA1000:
            weights_BMA1000.append(weight_BMA1000)
        
        weight_sanderson_10 = row['Sanderson 0.1']
        if weight_sanderson_10:
            weights_sanderson_10.append(weight_sanderson_10)
        
        weight_sanderson_25 = row['Sanderson 0.25']
        if weight_sanderson_25:
            weights_sanderson_25.append(weight_sanderson_25)

# Convert the strings to numerical values
weights = np.array(weights, dtype=float)
weights_BMA100 = np.array(weights_BMA100, dtype=float)
weights_BMA1000 = np.array(weights_BMA1000, dtype=float)
weights_sanderson_10 = np.array(weights_sanderson_10, dtype=float)
weights_sanderson_25 = np.array(weights_sanderson_25, dtype=float)

# Print the extracted weights
print(weights_BMA100)
# print(weights_BMA1000)
# print(weights_sanderson_10)
# print(weights_sanderson_25)

# Normalize weights
weights_norm_BMA100 = weights_BMA100 / np.sum(weights_BMA100)
weights_norm_BMA1000 = weights_BMA1000 / np.sum(weights_BMA1000)

# Normalize weights
weights_norm_sanderson_10 = weights_sanderson_10 / np.sum(weights_sanderson_10)
weights_norm_sanderson_25 = weights_sanderson_25 / np.sum(weights_sanderson_25)


[4.76e-03 7.95e-02 1.97e-02 2.77e-02 8.77e-02 2.77e-01 2.56e-01 1.81e-01
 6.60e-02 1.51e-06]


In [12]:
# Create weighted model average
gpp_model_mean_unweighted_shifted = (weights[0]*gpp_model_1_Regridded_mean_shifted) + (weights[1]*gpp_model_2_Regridded_mean_shifted) + (weights[2]*gpp_model_3_Regridded_mean_shifted) + (weights[3]* gpp_model_4_Regridded_mean_shifted) + (weights[4]*gpp_model_5_Regridded_mean_shifted) + (weights[5]*gpp_model_6_Regridded_mean_shifted) + (weights[6]*gpp_model_7_Regridded_mean_shifted) + (weights[7]*gpp_model_8_Regridded_mean_shifted) + (weights[8]*gpp_model_9_Regridded_mean_shifted) + (weights[9]*gpp_model_10_Regridded_mean_shifted)

gpp_model_mean_weighted_BMA100_shifted = (weights_norm_BMA100[0]*gpp_model_1_Regridded_mean_shifted) + (weights_norm_BMA100[1]*gpp_model_2_Regridded_mean_shifted) + (weights_norm_BMA100[2]*gpp_model_3_Regridded_mean_shifted) + (weights_norm_BMA100[3]* gpp_model_4_Regridded_mean_shifted) + (weights_norm_BMA100[4]*gpp_model_5_Regridded_mean_shifted) + (weights_norm_BMA100[5]*gpp_model_6_Regridded_mean_shifted) + (weights_norm_BMA100[6]*gpp_model_7_Regridded_mean_shifted) + (weights_norm_BMA100[7]*gpp_model_8_Regridded_mean_shifted) + (weights_norm_BMA100[8]*gpp_model_9_Regridded_mean_shifted) + (weights_norm_BMA100[9]*gpp_model_10_Regridded_mean_shifted)
gpp_model_mean_weighted_BMA1000_shifted = (weights_norm_BMA1000[0]*gpp_model_1_Regridded_mean_shifted) + (weights_norm_BMA1000[1]*gpp_model_2_Regridded_mean_shifted) + (weights_norm_BMA1000[2]*gpp_model_3_Regridded_mean_shifted) + (weights_norm_BMA1000[3]* gpp_model_4_Regridded_mean_shifted) + (weights_norm_BMA1000[4]*gpp_model_5_Regridded_mean_shifted) + (weights_norm_BMA1000[5]*gpp_model_6_Regridded_mean_shifted) + (weights_norm_BMA1000[6]*gpp_model_7_Regridded_mean_shifted) + (weights_norm_BMA1000[7]*gpp_model_8_Regridded_mean_shifted) + (weights_norm_BMA1000[8]*gpp_model_9_Regridded_mean_shifted) + (weights_norm_BMA1000[9]*gpp_model_10_Regridded_mean_shifted)

gpp_model_mean_weighted_shifted_sanderson_10 = (weights_norm_sanderson_10[0]*gpp_model_1_Regridded_mean_shifted) + (weights_norm_sanderson_10[1]*gpp_model_2_Regridded_mean_shifted) + (weights_norm_sanderson_10[2]*gpp_model_3_Regridded_mean_shifted) + (weights_norm_sanderson_10[3]* gpp_model_4_Regridded_mean_shifted) + (weights_norm_sanderson_10[4]*gpp_model_5_Regridded_mean_shifted) + (weights_norm_sanderson_10[5]*gpp_model_6_Regridded_mean_shifted) + (weights_norm_sanderson_10[6]*gpp_model_7_Regridded_mean_shifted) + (weights_norm_sanderson_10[7]*gpp_model_8_Regridded_mean_shifted) + (weights_norm_sanderson_10[8]*gpp_model_9_Regridded_mean_shifted) + (weights_norm_sanderson_10[9]*gpp_model_10_Regridded_mean_shifted)
gpp_model_mean_weighted_shifted_sanderson_25 = (weights_norm_sanderson_25[0]*gpp_model_1_Regridded_mean_shifted) + (weights_norm_sanderson_25[1]*gpp_model_2_Regridded_mean_shifted) + (weights_norm_sanderson_25[2]*gpp_model_3_Regridded_mean_shifted) + (weights_norm_sanderson_25[3]* gpp_model_4_Regridded_mean_shifted) + (weights_norm_sanderson_25[4]*gpp_model_5_Regridded_mean_shifted) + (weights_norm_sanderson_25[5]*gpp_model_6_Regridded_mean_shifted) + (weights_norm_sanderson_25[6]*gpp_model_7_Regridded_mean_shifted) + (weights_norm_sanderson_25[7]*gpp_model_8_Regridded_mean_shifted) + (weights_norm_sanderson_25[8]*gpp_model_9_Regridded_mean_shifted) + (weights_norm_sanderson_25[9]*gpp_model_10_Regridded_mean_shifted)

In [13]:
# Now estimate RMSE for all models

rmse_unweighted=get_RMSE(gpp_model_mean_unweighted_shifted, gpp_data_Regridded_mean)

rmse_weighted_sanderson_10=get_RMSE(gpp_model_mean_weighted_shifted_sanderson_10, gpp_data_Regridded_mean)
rmse_weighted_sanderson_25=get_RMSE(gpp_model_mean_weighted_shifted_sanderson_25, gpp_data_Regridded_mean)
rmse_weighted_BMA100=get_RMSE(gpp_model_mean_weighted_BMA100_shifted, gpp_data_Regridded_mean)
rmse_weighted_BMA1000=get_RMSE(gpp_model_mean_weighted_BMA1000_shifted, gpp_data_Regridded_mean)

rmse_model_1=get_RMSE(gpp_model_1_Regridded_mean_shifted, gpp_data_Regridded_mean)
rmse_model_2=get_RMSE(gpp_model_2_Regridded_mean_shifted, gpp_data_Regridded_mean)
rmse_model_3=get_RMSE(gpp_model_3_Regridded_mean_shifted, gpp_data_Regridded_mean)
rmse_model_4=get_RMSE(gpp_model_4_Regridded_mean_shifted, gpp_data_Regridded_mean)
rmse_model_5=get_RMSE(gpp_model_5_Regridded_mean_shifted, gpp_data_Regridded_mean)
rmse_model_6=get_RMSE(gpp_model_6_Regridded_mean_shifted, gpp_data_Regridded_mean)
rmse_model_7=get_RMSE(gpp_model_7_Regridded_mean_shifted, gpp_data_Regridded_mean)
rmse_model_8=get_RMSE(gpp_model_8_Regridded_mean_shifted, gpp_data_Regridded_mean)
rmse_model_9=get_RMSE(gpp_model_9_Regridded_mean_shifted, gpp_data_Regridded_mean)
rmse_model_10=get_RMSE(gpp_model_10_Regridded_mean_shifted, gpp_data_Regridded_mean)

In [14]:
#RMSE for model averages
print('Unweighted , ','Sanderson 10 , ','Sanderson 25 , ','BMA 100 , ','BMA 1000')
rmse_unweighted , rmse_weighted_sanderson_10 , rmse_weighted_sanderson_25 , rmse_weighted_BMA100 , rmse_weighted_BMA1000 , 

Unweighted ,  Sanderson 10 ,  Sanderson 25 ,  BMA 100 ,  BMA 1000


(0.6141278000213121,
 0.6027639604467278,
 0.6083123603128108,
 0.5696669772856218,
 0.5536836627090607)

In [15]:
model_print = [
    "ACCESS-ESM1-5",
    "BCC-CSM2-MR",
    "CanESM5",
    "CESM2",
    "GFDL-ESM4",
    "IPSL-CM6A-LR",
    "MIROC-ESM2L",
    "MPI-ESM1.2-HR",
    "NorESM2-LM",
    "UKESM1-0-LL"
]

print(model_print)
rmse_model_1 , rmse_model_2 , rmse_model_3 , rmse_model_4 , rmse_model_5 , rmse_model_6 , rmse_model_7 , rmse_model_8 , rmse_model_9 , rmse_model_10

['ACCESS-ESM1-5', 'BCC-CSM2-MR', 'CanESM5', 'CESM2', 'GFDL-ESM4', 'IPSL-CM6A-LR', 'MIROC-ESM2L', 'MPI-ESM1.2-HR', 'NorESM2-LM', 'UKESM1-0-LL']


(1.097191344345589,
 1.0839707684124917,
 1.182371693672462,
 1.0643187041795092,
 1.0394481120899246,
 0.8097099416640622,
 1.2364457928196744,
 1.2427441103579708,
 1.0225320188046072,
 1.3966485615171664)