In [1]:
'''
DeepDA_verify is to verify DA output

It read proxy, prior, and posterior from DA outputs files and configuration files.
Then, it calculates the statistics (corrcoef and CE) of the DA results and save the outputs.

By Mingsong Li
    Penn State 
    Now at Peking University
    2/17/2020
    
Updated Mar. 03, 2020
Updated Oct. 11, 2020  # plot enhanced
Updated Oct. 12, 2020  # multi jobs
Updated June 30, 2021 
Updated July 15, 2021  # ZSCORE

#df_ob_pi   = df_ob[df_eval['proxy'] == proxy_i]   : only work for single proxy experiment

Updated Aug. 7, 2021  # works for two slice verification: pre-PETM and peak-PETM
'''

import sys
sys.path.append('../')
from DeepDA_lib import modules_nc
from DeepDA_lib import DeepDA_psm
from scipy import stats
import shutil

import h5py
#import time
import yaml
import numpy as np
import numpy.ma as ma
import pandas
import os
from netCDF4 import Dataset
from sys import platform as sys_pf
import matplotlib.pyplot as plt
if sys_pf == 'darwin':
    import matplotlib
    matplotlib.use("TkAgg")
    import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)
try:
    import bayspline
except ImportError as e1:
    print('Warning:', e1)
try:
    import bayspar
except ImportError as e2:
    print('Warning:', e2)
try:
    import bayfox
except ImportError as e3:
    print('Warning:', e3)
try:
    import baymag
except ImportError as e4:
    print('Warning:', e4)

print('>>  Import package => OKAY')
print('')

###################################################################
#####################    User defined start   #####################
###################################################################

# DA output folders
if sys_pf == 'darwin':
    xlsxdir = '/volumes/DA/DeepDA/wrk/'
    xlsxdir = '/volumes/Backup/DeepDA/'
else:
    xlsxdir = '/mnt/d/DeepDA/wrk/'

# Experiment style: 
#    0 = given lsit
#    1 = all folders
#    
expstyle = 0
#expstyle = 1

# needed when explist style is 0
#explist = ['petmproxy3slices_v0.0.20_w_deepmip.csv_petm29_v20_20210718_all_bays_MCsd100_pHcor_omega5_frac0.98']
#explist = ['petmproxy3slices_v0.1.csv_petm29_v0.1deep_20210924_All.noAc._bays_MCsd50_pHcor_frac0.98_testR.75']
#Typelist = ['d18O','TEX86','MgCa','caco3']  # proxy type list
#explist = ['petmproxy3slices_v0.1.csv_petm29_v0.1deep_20210924_d18O.noAc._bays_MCsd50_pHcor_frac0.8_testR']  # 1 unselected proxy
#Typelist = ['d18O']
#explist = ['petmproxy3slices_v0.1.csv_petm29_v0.1deep_20210924_CaCO3._bays_MCsd50_pHcor_frac0.95_testR'] #
#Typelist = ['caco3']
#explist = ['petmproxy3slices_v0.1.csv_petm29_v0.1deep_20210924_CaCO3._bays_MCsd50_pHcor_frac0.95_testR.75'] #
#Typelist = ['caco3']
#explist = ['petmproxy3slices_v0.1.csv_petm29_v0.1deep_20210924_tex.noAc._bays_MCsd50_pHcor_frac0.9_testR']
#Typelist = ['TEX86'] 
#explist = ['petmproxy3slices_v0.1.csv_petm29_v0.1deep_20210924_mgca.noAc._bays_MCsd50_pHcor_frac0.8_testR']
#Typelist = ['MgCa']
explist = ['petmproxy3slices_v0.1.csv_petm29_v0.1deep_20211020_All.noAc._bays_MCsd50_pHcor_frac0.98_testR_Ca75']
Typelist = ['d18O','TEX86','MgCa','caco3']  # proxy type list

label_all = ('prePETM', 'peakPETM','postPETM', 'PETM_body')  # slice name
warmcomp = [0,1]  # ID for petm warming 


pn = len(Typelist)
dum_jmax = 36
dum_imax = 36

AnalysisStd = True   # True: standardize; False: use raw analysis data

# output
savesummary = True
savesummary_slice=  True
# for evaluation save and plot
showplot = False

#pn = 4  # use the first pn data

axis_lim = np.array([[-6,1],[0,1],[0,7],[0,100]])   # axis limit for the plot
axis_limz = np.array([-4,4])   # set axis limit for the zscore plot

###################################################################
#####################    User defined end     #####################
###################################################################
label_all_len = len(label_all)

if expstyle == 0:
    explist = explist
    
elif expstyle == 1:
    # read content
    dir1 = [o for o in os.listdir(xlsxdir) if os.path.isdir(os.path.join(xlsxdir,o))]
    explist = dir1
    #print(dir1[0])

for diri in range(len(explist)):
    
    # run the first 5 folders
    #if diri < 5:
    #    continue
    en = explist[diri]
    print(en)

    dum_ijmax = dum_imax * dum_jmax
    config_name_f = "../DeepDA_config.yml"
    f = open(config_name_f, 'r')
    yml_dict_f = yaml.load(f, Loader=yaml.FullLoader)
    f.close()

    dir_data_save = yml_dict_f['core']['wrkdir']

    config_name = dir_data_save + '/' + en + '.yml'
    f = open(config_name, 'r')
    yml_dict = yaml.load(f, Loader=yaml.FullLoader)
    f.close()
    print('>>  Loading configuration file => OKAY')
    print('')
    # Read parameters from configurations
    MCn = yml_dict['MonteCarlo']['number']
    #debug
    #MCn = 30
    nens = yml_dict['core']['nens']

    nexp = yml_dict['core']['nexp']
    dir_data_save = yml_dict['core']['wrkdir']
    log_level = yml_dict['log_level']
    #debug
    #log_level = 4
    
    recon_period = yml_dict['core']['recon_period']
    recon_timescale = yml_dict['core']['recon_timescale_interval']
    recon_period_full = np.arange(recon_period[0],recon_period[1]+1,recon_timescale)
    recon_period_len = recon_period_full.shape[0]
    recon_timescale = yml_dict['core']['recon_timescale_interval']
    save_ens_full = yml_dict['core']['save_ens_full']
    proxy_assim2 = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_assim2']
    proxy_psm_type    = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_psm_type']
    proxy_blacklist   = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_blacklist']
    proxy_order       = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_order']
    proxy_list = [item for item in proxy_order if item not in proxy_blacklist]
    proxy_err_eval   = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_err_eval']
    lon_label = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['lon_label']
    lat_label = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['lat_label']

    proxy_frac      = yml_dict['proxies']['proxy_frac']
    prior_source = yml_dict['prior']['prior_source'] #
    dum_lon_offset = yml_dict['prior'][prior_source]['dum_lon_offset'] # longitude offset
    limit_hard_keys = list(yml_dict['prior'][prior_source]['limit_hard'].keys())
    psm_baymag_ln =  yml_dict['psm']['bayesreg_mgca_pooled_red']['psm_baymag_ln']
    print(limit_hard_keys)

    data_period_id    = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_id']
    data_period_idstd = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_idstd']
    geologic_age = yml_dict['core']['geologic_age']

    # read preprior HDF5 file
    dir_proxy_data = dir_data_save +'/'+ yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion']
    proxy_err_eval = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_err_eval']

    # ========= dataset for plot =========
    cGENIEGrid = yml_dict['core']['proj_dir'] + '/data_misc/cGENIEGrid.csv'
    cGENIEGrid = pandas.read_csv(cGENIEGrid)
    cGENIEGridB_lat36 = cGENIEGrid['lat']
    cGENIEGridB_lon36 = cGENIEGrid['lon']
    cGENIEGrid = cGENIEGrid.to_numpy()
    #print('>>  OKAY.')

    # Read global mean and plot to show results
    ########## Prior #########
    prior_state_variable = yml_dict['prior'][prior_source]['state_variable']  # note: ['2d': xxx; '3d': xxx]
    dum_lon_offset = yml_dict['prior'][prior_source]['dum_lon_offset'] # longitude offset
    
    # ========= Monte Carlo =========
    local_rad_list = yml_dict['core']['local_rad_list'] #
    locRadn= len(local_rad_list)
    local_rad_list = np.asarray(local_rad_list)
    #print(local_rad_list)
    #print(locRadn)
    proxy_frac_list   = yml_dict['proxies']['proxy_frac']
    proxy_fracn = len(proxy_frac_list)
    proxy_frac_list = np.asarray(proxy_frac_list)
    Rscale_style = yml_dict['core']['Rscale_style']
    
    if Rscale_style == 1:
        Rscale_list = yml_dict['core']['Rscale']
        Rscalen = len(Rscale_list)
        Rscale_list = np.asarray(Rscale_list)
        
        # debug
        #Rscale_list = np.array([2.0])
        #Rscalen = len(Rscale_list)
        
    elif Rscale_style == 2:
        Rscalen = 1
        Rscale_list = [0]
    # save prior variable list
    prior_variable_dict = []  # variable list
    prior_nc_file_list = []  # nc file list
    prior_variable_dict_3d = []  # variable list
    prior_nc_file_list_3d = []  # nc file list

    for key, value in prior_state_variable.items():
        nc_keyvalue = prior_state_variable[key]['ncname']  # note: 2d dict
        print('>>  nc_keyvalue {}...'.format(nc_keyvalue))
        for key1, value1 in nc_keyvalue.items():
            print('>>  {}: {}'.format(key1,value1))
            for i in range(len(prior_state_variable[key][value1])):
                if key in ['2d']:
                    prior_variable_dict.append(prior_state_variable[key][value1][i])
                    prior_nc_file_list.append(key1+'/'+value1+'.nc')
                elif key in ['3d']:
                    prior_variable_dict_3d.append(prior_state_variable[key][value1][i])
                    prior_nc_file_list_3d.append(key1+'/'+value1+'.nc')

    # variable list
    prior_variable_len = len(prior_variable_dict)
    prior_variable3d_len = len(prior_variable_dict_3d)
    print('>>  Number of 2d prior variables is: {}. List:'.format(prior_variable_len))
    print('      {}'.format(prior_variable_dict))
    print('>>  Number of 3d prior variables is: {}. List:'.format(prior_variable3d_len))
    print('      {}'.format(prior_variable_dict_3d))

    MC_dir =  dir_data_save + '/' + en + '/'

    Xa2d_full_np   = np.full((locRadn,proxy_fracn,Rscalen,MCn*nens,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_mean_np   = np.full((locRadn,proxy_fracn,Rscalen,MCn,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_std_np    = np.full((locRadn,proxy_fracn,Rscalen,MCn,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_all_np    = np.full((dum_jmax, dum_imax,locRadn,proxy_fracn,Rscalen,MCn, prior_variable_len, recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_allstd_np = np.full((dum_jmax, dum_imax, locRadn,proxy_fracn,Rscalen,MCn, prior_variable_len, recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_mean_np2  = np.full((locRadn,proxy_fracn,Rscalen,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_std_np2   = np.full((locRadn,proxy_fracn,Rscalen,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    df_evaluation  = pandas.DataFrame()
    df_zscore_all  = pandas.DataFrame()
    for locRadi in range(locRadn):
        locRad = local_rad_list[locRadi]
        if locRad is None:
            locRadv = 0 # for filename only
        else:
            locRadv = locRad
        for proxy_fraci in range(proxy_fracn):
            proxy_frac = proxy_frac_list[proxy_fraci]

            for Rscalei in range(Rscalen):
                Rscale = Rscale_list[Rscalei]
                
                savefilename_add = '_loc_'+ str(locRadv)+'_proxy_frac_'+ str(proxy_frac)+'_Rscale_'+str(Rscale)
                
                for MCi in range(MCn):
                    # NetCDF file name
                    filename_short = '_loc_', str(locRadv),'_proxy_frac_', str(proxy_frac),'_Rscale_',str(Rscale),'_MC_',str(MCi) 
                    nc_filename = MC_dir + ''.join(filename_short) + '.nc'
                    hdf5name    = MC_dir + ''.join(filename_short) + '.hdf5'

                    print('>>  Read nc file: {}'.format(nc_filename))
                    
                    for Xa2d_vari in range(prior_variable_len):
                        
                        Xa_full_name_vari = prior_variable_dict[Xa2d_vari] +'_Xa_full'
                        Xa_mean_name_vari = prior_variable_dict[Xa2d_vari] +'_Xa_mean'
                        Xa_variance_name_vari = prior_variable_dict[Xa2d_vari] +'_Xa_variance'
                        Xa_full_vari = Dataset(nc_filename).variables[Xa_full_name_vari][:]
                        Xa_mean_vari = Dataset(nc_filename).variables[Xa_mean_name_vari][:]
                        Xa_variance_vari = Dataset(nc_filename).variables[Xa_variance_name_vari][:]

                        if prior_variable_dict[Xa2d_vari] in limit_hard_keys:
                            # some variables have hard limitation: e.g., CaCO3 = [0, 100]                        
                            lim_min = yml_dict['prior'][prior_source]['limit_hard'][prior_variable_dict[Xa2d_vari]]['lim_min']
                            lim_max = yml_dict['prior'][prior_source]['limit_hard'][prior_variable_dict[Xa2d_vari]]['lim_max']
                            #print('limit min {} and max {}'.format(lim_min, lim_max))
                            if lim_min:
                                if np.any(Xa_full_vari<lim_min):
                                    Xa_full_vari[Xa_full_vari<lim_min] = lim_min
                                    Xa_mean_vari = np.mean(Xa_full_vari,axis=2)
                                    Xa_variance_vari = np.var(Xa_full_vari,axis=2)
                                    print('>>    Force {} value to be >= {}'.format(prior_variable_dict[Xa2d_vari],lim_min))
                            if lim_max:
                                if np.any(Xa_full_vari>lim_max):
                                    Xa_full_vari[Xa_full_vari>lim_max] = lim_max
                                    Xa_mean_vari = np.mean(Xa_full_vari,axis=2)
                                    Xa_variance_vari = np.var(Xa_full_vari,axis=2)
                                    print('>>    Force {} value to be <= {}'.format(prior_variable_dict[Xa2d_vari], lim_max))

                        for reconi in range(recon_period_len):

                            Xa_full_reconi = Xa_full_vari[:,:,:,0,reconi].reshape((dum_ijmax,nens))
                            Xa_full_reconi_mean = np.nanmean(Xa_full_reconi,axis=0)

                            Xa_mean_reconi = Xa_mean_vari[:,:,0,reconi]
                            Xa2d_all_np[:,:,locRadi,proxy_fraci,Rscalei,MCi,Xa2d_vari,reconi] = np.copy(Xa_mean_vari[:,:,0,reconi])
                            Xa_mean_reconi_mean = np.nanmean(Xa_mean_reconi)

                            Xa_variance_reconi = Xa_variance_vari[:,:,0,reconi]
                            Xa2d_allstd_np[:,:,locRadi,proxy_fraci,Rscalei,MCi,Xa2d_vari,reconi] = Xa_variance_vari[:,:,0,reconi]
                            Xa_std_reconi_mean = np.sqrt(np.nanmean(Xa_variance_reconi))

                            #print('>>  reconi = {}, mean is {}, std is {}'.format(reconi, Xa_mean_reconi_mean, Xa_std_reconi_mean))
                            Xa2d_full_np[locRadi,proxy_fraci,Rscalei,MCi*nens:(MCi+1)*nens,Xa2d_vari,reconi] = Xa_full_reconi_mean
                            Xa2d_mean_np[locRadi,proxy_fraci,Rscalei,MCi,Xa2d_vari,reconi] = Xa_mean_reconi_mean
                            Xa2d_std_np[locRadi,proxy_fraci,Rscalei,MCi,Xa2d_vari,reconi] = Xa_std_reconi_mean
                print('First variable: all MC mean')
                print(Xa2d_mean_np[locRadi,proxy_fraci,Rscalei,:,0,0])

                Xa2d_all_np = np.ma.masked_where(Xa2d_all_np > 9.0e+36, Xa2d_all_np)
                Xa2d_allstd_np = np.ma.masked_where(Xa2d_all_np > 9.0e+36, Xa2d_allstd_np)
                for Xa2d_vari in range(prior_variable_len):
                    for reconi in range(recon_period_len):
                        Xa2d_mean_np2[locRadi,proxy_fraci,Rscalei,Xa2d_vari,reconi] = np.nanmean(Xa2d_all_np[:,:,locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi])
                        Xa2d_std_np2[locRadi,proxy_fraci,Rscalei,Xa2d_vari,reconi] = np.sqrt(np.nanmean(Xa2d_allstd_np[:,:,locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi]))

                np.set_printoptions(precision=6, suppress=True)
                if log_level > 1:
                    print('All variable. Mean of variables x reconi')
                    print('{}'.format(Xa2d_mean_np2[locRadi,proxy_fraci,Rscalei,:,:]))
                #print('std  of variables x reconi')
                #print('{}'.format(Xa2d_std_np2))
    
                print('')
                print('Step #1: read data - Done')
                print('')

                # Calculate mean and std of each variable for each time slice
                # plot the ensemble values

                df = pandas.DataFrame()
                print('')
                print('DA - Summary of global mean and standard deviation')
                print('')
                
                if showplot:
                    fig, (ax0, ax1, ax2, ax3) = plt.subplots(nrows=4, figsize=(3, 6))
                    if recon_period_len>1:
                        fig2, (ax10, ax11, ax12, ax13) = plt.subplots(nrows=4, figsize=(3, 6))
                    params = {'mathtext.default': 'regular' }
                    plt.rcParams.update(params)
                    #plt.rcParams.update({'figure.figsize':(5,3), 'figure.dpi':110})
                    #fig.suptitle('DA')

                # 2d variables
                for Xa2d_vari in range(prior_variable_len):

                    print(prior_variable_dict[Xa2d_vari])
                    datadf = {'field':prior_variable_dict[Xa2d_vari],'mean':[np.nan],'std':[np.nan],
                              '2.5%':[np.nan],'5%':[np.nan],'25%':[np.nan],'median':[np.nan],'75%':[np.nan],'95%':[np.nan],'97.5%':[np.nan],'label':''}
                    df2 = pandas.DataFrame(datadf, index=[Xa2d_vari])
                    df = pandas.concat([df,df2])

                    sst_std_mc = np.std(Xa2d_mean_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,:],axis=0)
                    if log_level > 2:
                        print('  _locR '+str(locRadv)+' proxy_frac '+str(proxy_frac)+' scaled r '+str(Rscale))

                    for reconi in range(recon_period_len):

                        meani = np.nanmean(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi])
                        stdi = np.std(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi])
                        perc = np.percentile(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi],np.array([2.5, 5, 25, 50, 75, 95, 97.5]))
                        datadf = {'field':'','mean':[meani],'std':[stdi],
                                  '2.5%':[perc[0]],'5%':[perc[1]],'25%':[perc[2]],'median':[perc[3]],'75%':[perc[4]],'95%':[perc[5]],'97.5%':[perc[6]],'label':label_all[reconi]}
                        df2 = pandas.DataFrame(data = datadf, index=[Xa2d_vari])
                        df = pandas.concat([df,df2])
                        if log_level > 2:
                            print('    {:.3f} ± {:.3f}: {}'.format(meani, stdi, label_all[reconi]))

                        if recon_period_len>2:
                            warmpeak = Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,1]-Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,0]
                            #warmbody = Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,2]-Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,0]
                            coolpeak = Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,2]-Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,1]
                            warmpeakmean = np.nanmean(warmpeak)
                            warmpeakstd  = np.std(warmpeak)
                            warmperc = np.percentile(warmpeak,np.array([2.5, 5, 25, 50, 75, 95, 97.5]))
                            coolpeakmean = np.nanmean(coolpeak)
                            coolpeakstd  = np.std(coolpeak)
                            coolperc = np.percentile(coolpeak,np.array([2.5, 5, 25, 50, 75, 95, 97.5]))

                    if recon_period_len>2:
                        df2 = pandas.DataFrame({'field':'','mean':[warmpeakmean],'std':[warmpeakstd],
                                                '2.5%':[warmperc[0]],'5%':[warmperc[1]],'25%':[warmperc[2]],'median':[warmperc[3]],'75%':[warmperc[4]],'95%':[warmperc[5]],'97.5%':[warmperc[6]],'label':'Peak_warming'}, index=[Xa2d_vari])
                        df3 = pandas.DataFrame({'field':'','mean':[coolpeakmean],'std':[coolpeakstd],
                                                '2.5%':[coolperc[0]],'5%':[coolperc[1]],'25%':[coolperc[2]],'median':[coolperc[3]],'75%':[coolperc[4]],'95%':[coolperc[5]],'97.5%':[coolperc[6]],'label':'Peak_cooling'}, index=[Xa2d_vari])
                        df = pandas.concat([df,df2,df3])
                        if log_level > 2:
                            print('    {:.6f} ± {:.6f}: peak warming'.format(warmpeakmean,warmpeakstd))
                            print('    {:.6f} ± {:.6f}: peak cooling'.format(coolpeakmean,coolpeakstd))

                    if showplot:
                        for reconi in range(recon_period_len):
                            if reconi == 3:
                                continue
                            kwargs = dict(alpha=0.5, bins=50)

                            if Xa2d_vari == 0:
                                ax0.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                                ax0.set_ylabel('#')
                                ax0.set_xlabel('SST (\u00B0C)')
                                ax0.tick_params(labelsize='small')
                                ax0.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 1:
                                ax1.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                                ax1.set_ylabel('#')
                                ax1.set_xlabel('SAT (\u00B0C)')
                                ax1.tick_params(labelsize='small')
                                ax1.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 2:
                                ax2.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                                ax2.set_ylabel('#')
                                ax2.set_xlabel('$\it{p}$CO$_2$ (ppm)')
                                ax2.set_xlim(0, 2800)
                                ax2.legend(prop={'size': 6.5})  
                                ax2.tick_params(labelsize='small')
                                ax2.xaxis.set_minor_locator(AutoMinorLocator())
                            #if Xa2d_vari == 3:
                            #    ax3.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                            #    ax3.set_ylabel('Number')
                            #    ax3.set_xlabel('Salinity (PSU)')
                            if Xa2d_vari == 4:
                                ax3.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                                ax3.set_ylabel('#')
                                ax3.set_xlabel('pH')     
                                ax3.tick_params(labelsize='small')
                                ax3.xaxis.set_minor_locator(AutoMinorLocator())
                            #if Xa2d_vari == 6:
                            #    ax5.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                            #    ax5.set_ylabel('Number')
                            #    ax5.set_xlabel('$CaCO_3$ (%)')
                        fig.tight_layout()

                        if recon_period_len>1:
                            if Xa2d_vari == 0:                    
                                ax10.hist(warmpeak, **kwargs, color = "#ff7f0e", label = 'warming')
                                ax10.hist(coolpeak, **kwargs, color = "#2ca02c", label = 'cooling')
                                ax10.set_ylabel('#')
                                ax10.set_xlabel('\u0394SST (\u00B0C)')                        
                                ax10.legend(prop={'size': 6.5});
                                ax10.tick_params(labelsize='small')
                                ax10.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 1:
                                ax11.hist(warmpeak, **kwargs, color = "#ff7f0e")
                                ax11.hist(coolpeak, **kwargs, color = "#2ca02c")
                                ax11.set_ylabel('#')
                                ax11.set_xlabel('\u0394SAT (\u00B0C)')
                                ax11.tick_params(labelsize='small')
                                ax11.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 2:
                                ax12.hist(warmpeak, **kwargs, color = "#ff7f0e")
                                ax12.hist(coolpeak, **kwargs, color = "#2ca02c")
                                ax12.set_ylabel('#')
                                ax12.set_xlabel('\u0394$\it{p}$CO$_2$ (ppm)')
                                ax12.tick_params(labelsize='small')
                                ax12.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 4:
                                ax13.hist(warmpeak, **kwargs, color = "#ff7f0e")
                                ax13.hist(coolpeak, **kwargs, color = "#2ca02c")
                                ax13.set_ylabel('#')
                                ax13.set_xlabel('\u0394pH')
                                ax13.tick_params(labelsize='small')
                                ax13.xaxis.set_minor_locator(AutoMinorLocator())
                            fig2.tight_layout()

                if showplot:
                    fig.savefig(yml_dict['core']['proj_dir']+'/wrk/'+en+'.summary.pdf')
                    if recon_period_len > 1:
                        fig2.savefig(yml_dict['core']['proj_dir']+'/wrk/'+en+'.delta.pdf')
            
                # print and save excel
                if savesummary_slice:
                    print('saved @')
                    fullname = yml_dict['core']['proj_dir']+'/wrk/'+en+savefilename_add+'.summary.csv'
                    print(fullname)
                    df.to_csv(fullname)

                print('')
                print('Step #2: summary - Done')
                print('')
    


                ### Purpose of this block
                # Prepare data for verification
                #
                ### Steps
                # 1. Prepare matrix for data saving: proxy, prior, posterior; std or not
                # 2. calculate and save each Monte Carlo runs

                #####################    User defined start   #####################
                if log_level > 1:
                    print('DA - Read proxy, prior, and posterior, standardize')
                    print('')
                #####################    User defined end     #####################

                df_eval = pandas.DataFrame()
                df_ob   = pandas.DataFrame()
                df_xb   = pandas.DataFrame()
                df_xa   = pandas.DataFrame()
                
                # Get the sites_withhold_len
                
                #locRad = local_rad_list[0]
                #if locRad is None:
                #    locRadv = 0 # for filename only
                #else:
                #    locRadv = locRad

                #proxy_frac = proxy_frac_list[0]
                #Rscale = Rscale_list[0]
                filename_short = '_loc_', str(locRadv),'_proxy_frac_', str(proxy_frac),'_Rscale_',str(Rscale),'_MC_0.hdf5'
                hdf5name = MC_dir + ''.join(filename_short)
                if log_level > 1:
                    print('Read first hdf5 file {} to get the number of withold datasets.'.format(hdf5name))
                sites_eval = pandas.read_hdf(hdf5name, 'sites_eval')
                sites_withhold_len  = len(sites_eval)
                if log_level > 1:
                    print(' Site withhold length ： {}'.format(sites_withhold_len))

                data_psm_d18o_find = 0
                data_psm_mgca_find = 0
                if 'Marine sediments_mgca_pooled_bcp' in proxy_list or 'Marine sediments_mgca_pooled_red' in proxy_list:
                    data_psm_mgca_find = 1

                if 'Marine sediments_d18o_pooled' in proxy_list:
                    data_psm_d18o_find = 1

                # Prepare empty matrix for saving the data of proxy, prior, and posterior
                ob_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, recon_period_len*2, sites_withhold_len), np.nan)
                xb_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, nens, sites_withhold_len), np.nan)   # save full prior for withhold data
                xa_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, nens, recon_period_len, sites_withhold_len), np.nan)

                df_ind = 0

                # columns name for the observation
                df_ind_recon = []

                for reconi in range(recon_period_len):
                    df_ind_recon_i = [data_period_id[reconi]] + [data_period_idstd[reconi]]
                    df_ind_recon = df_ind_recon + df_ind_recon_i

                if log_level > 2:
                    print(df_ind_recon)


                for MCi in range(MCn):
                #for MCi in range(1):
                    # NetCDF file name
                    filename_short = '_loc_', str(locRadv),'_proxy_frac_', str(proxy_frac),'_Rscale_',str(Rscale),'_MC_' + str(MCi)
                    nc_filename = MC_dir + ''.join(filename_short) + '.nc'
                    print('    {}'.format(nc_filename))
                    hdf5name    = MC_dir + ''.join(filename_short) + '.hdf5'

                    if data_psm_mgca_find == 1:
                        with h5py.File(hdf5name, 'r') as f:
                            Xb_sal = np.copy(f.get('Xb_sal'))
                            if log_level > 3:
                                print(Xb_sal.shape)
                            Xb_omega = np.copy((f.get('Xb_omega')))
                            Xb_ph = np.copy(f.get('Xb_ph'))
                        Xa_sal_full = Dataset(nc_filename).variables['ocn_sur_sal_Xa_full']
                        Xa_ph_full  = Dataset(nc_filename).variables['misc_pH_Xa_full']
                        Xa_omega_full = Dataset(nc_filename).variables['carb_sur_ohm_cal_Xa_full']

                    elif data_psm_d18o_find == 1:
                        with h5py.File(hdf5name, 'r') as f:
                            Xb_sal = np.copy(f.get('Xb_sal'))
                            if log_level > 3:
                                print(Xb_sal.shape)
                            Xb_ph = np.copy(f.get('Xb_ph'))
                        Xa_sal_full = Dataset(nc_filename).variables['ocn_sur_sal_Xa_full']
                        Xa_ph_full  = Dataset(nc_filename).variables['misc_pH_Xa_full']


                    ### Read Proxy ###
                    proxies = pandas.read_hdf(hdf5name, 'proxies')
                    #prior_variable_dict = pandas.read_hdf(hdf5name, 'prior_variable_dict')

                    if proxy_frac <= 1.0:
                        sites_eval = pandas.read_hdf(hdf5name, 'sites_eval')
                        sites_withhold_len  = len(sites_eval)
                        if log_level > 1:
                            print('Site withhold:       {}'.format(sites_eval['Site'].values))
                            print('Proxy        :       {}'.format(sites_eval['Proxy'].values))

                    proxy_psm_type_dict_df = pandas.read_hdf(hdf5name, 'proxy_psm_type_dict_df')
                    proxy_psm_type_dict_list = proxy_psm_type_dict_df[0].values.tolist()

                    for j in range(sites_withhold_len):
                        data_psm_type = sites_eval['Proxy'][j]
                        for key, value in proxy_assim2.items():
                            if data_psm_type in value:
                                #print(proxy_psm_type[key])
                                key0 = key
                                psm_required_variable_key = list(yml_dict['psm'][proxy_psm_type[key]]['psm_required_variables'].keys())[0]
                                xb_key = psm_required_variable_key+'_Xb_full'
                                xa_key = psm_required_variable_key+'_Xa_full'
                                #print('xa_key is {}'.format(xa_key))
                                Xb_full_field0 = Dataset(nc_filename).variables[xb_key] #
                                Xb_full_field0 = Xb_full_field0[:,:,:,0].reshape(dum_imax*dum_jmax, nens)
                                Xa_full_field0 = Dataset(nc_filename).variables[xa_key]

                        if proxy_psm_type[key0] in ['bayesreg_tex86', 'cgenie_caco3']:
                            if proxy_psm_type[key0] in ['bayesreg_tex86']:
                                proxy_i = 'tex86'
                            else:
                                proxy_i = 'caco3'
                            Ye = DeepDA_psm.cal_ye_cgenie(yml_dict,sites_eval,j,Xb_full_field0,proxy_assim2,proxy_psm_type,dum_lon_offset,dum_imax,dum_jmax)

                            xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,j] = np.copy(Ye)

                            #print('Prior Ye is {:.6f}'.format(np.mean(Ye)))

                            for reconi in range(recon_period_len):

                                Xa_reconi = np.copy(Xa_full_field0[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))

                                Ye = DeepDA_psm.cal_ye_cgenie(yml_dict,sites_eval,j,Xa_reconi,proxy_assim2,proxy_psm_type,dum_lon_offset,dum_imax,dum_jmax)

                                #xa_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j]   = np.mean(Ye)
                                #xa_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi+1][j] = np.var(Ye)
                                xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,reconi,j]   = np.copy(Ye)
                                #print('Analysis Ye is {:.6f}'.format(np.mean(Ye)))
                                #ob_stat[j][reconi*2]   = sites_eval[data_period_id[reconi]][j]

                                ob_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j] = sites_eval[data_period_id[reconi]][j]

                                # error
                                if ~np.isnan(sites_eval[data_period_id[reconi]][j]):

                                    if proxy_psm_type[key0] in ['bayesreg_tex86']:

                                        if proxy_err_eval in ['proxy_err_psm']:
                                            ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = DeepDA_psm.obs_estimate_r_fixed_tex86(31) + sites_eval[data_period_idstd[reconi]][j] ** 2
                                        else:
                                            ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = DeepDA_psm.obs_estimate_r_fixed_tex86(31)

                                    if proxy_psm_type[key0] in ['cgenie_caco3','cgenie_caco3_13c']:

                                        psm_error = yml_dict['psm'][proxy_psm_type[key0]]['psm_error']

                                        if proxy_err_eval in ['proxy_err_psm']:
                                            ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = psm_error + sites_eval[data_period_idstd[reconi]][j] ** 2
                                        else:
                                            ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = psm_error

                        elif proxy_psm_type[key0] in ['bayesreg_d18o_pooled']:

                            proxy_i = 'd18o'

                            Ye = DeepDA_psm.cal_ye_cgenie_d18O(yml_dict,sites_eval,j,Xb_full_field0,Xb_sal,Xb_ph,proxy_assim2,proxy_psm_type,dum_lon_offset,dum_imax,dum_jmax)

                            xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,j] = np.copy(Ye)

                            for reconi in range(recon_period_len):

                                Xa_reconi = np.copy(Xa_full_field0[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))
                                Xa_sal_i  = np.copy(Xa_sal_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))
                                Xa_ph_i   = np.copy(Xa_ph_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))

                                Ye = DeepDA_psm.cal_ye_cgenie_d18O(yml_dict,sites_eval,j,Xa_reconi,Xa_sal_i,Xa_ph_i,proxy_assim2,proxy_psm_type,dum_lon_offset,dum_imax,dum_jmax)

                                xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,reconi,j]   = np.copy(Ye)

                                ob_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j] = sites_eval[data_period_id[reconi]][j]

                                # error
                                if ~np.isnan(sites_eval[data_period_id[reconi]][j]):

                                    if proxy_err_eval in ['proxy_err_psm']:
                                        ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = DeepDA_psm.obs_estimate_r_fixed_d18o(15) + sites_eval[data_period_idstd[reconi]][j] ** 2
                                    else:
                                        ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = DeepDA_psm.obs_estimate_r_fixed_d18o(15)

                        elif proxy_psm_type[key0] in ['bayesreg_mgca_pooled_bcp', 'bayesreg_mgca_pooled_red']:
                            proxy_i = 'mgca'
                            spp = 'all'
                            cleaningr = np.tile(np.array([1]),nens)
                            cleaningb = np.tile(np.array([0]),nens)

                            if proxy_psm_type[key0] in ['bayesreg_mgca_pooled_red']:
                                clearning_one = cleaningr
                                proxy_explain = 'reductive'

                            elif proxy_psm_type[key0] in ['bayesreg_mgca_pooled_bcp']:
                                clearning_one = cleaningb
                                proxy_explain = 'barker'

                            Ye = DeepDA_psm.cal_ye_cgenie_mgca(yml_dict,sites_eval,j,Xb_full_field0,proxy_psm_type[key0],dum_lon_offset,dum_imax,dum_jmax,Xb_sal,Xb_ph,Xb_omega,geologic_age)

                            if psm_baymag_ln in ['yes']:
                                xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,j] = np.copy(np.exp(Ye))
                            else:
                                xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,j] = np.copy(Ye)

                            #Xa_sal_full = Dataset(nc_filename).variables['ocn_sur_sal_Xa_full']
                            #Xa_ph_full  = Dataset(nc_filename).variables['misc_pH_Xa_full']
                            #Xa_omega_full = Dataset(nc_filename).variables['carb_sur_ohm_cal_Xa_full']

                            for reconi in range(recon_period_len):

                                Xa_reconi  =   Xa_full_field0[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens))
                                Xa_sal_i   =   np.copy(Xa_sal_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))
                                Xa_ph_i    =   np.copy(Xa_ph_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))
                                Xa_omega_i =   np.copy(Xa_omega_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))

                                Ye = DeepDA_psm.cal_ye_cgenie_mgca(yml_dict,sites_eval,j,Xa_reconi,proxy_psm_type[key0],dum_lon_offset,dum_imax,dum_jmax,Xa_sal_i,Xa_ph_i,Xa_omega_i,geologic_age)

                                if psm_baymag_ln in ['yes']:
                                    xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,reconi,j]   = np.copy(np.exp(Ye))
                                else:
                                    xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,reconi,j]   = np.copy(Ye)

                                #xa_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j]   = np.mean(Ye)
                                #xa_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi+1][j] = np.var(Ye)

                                ob_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j] = sites_eval[data_period_id[reconi]][j]

                                if ~np.isnan(sites_eval[data_period_id[reconi]][j]):
                                    ob_err0 = DeepDA_psm.obs_estimate_r_fixed_mgca_pooled((15, 16), clearning_one[0], np.nanmean(Xb_sal), np.nanmean(Xb_ph), np.nanmean(Xb_omega), spp, geologic_age)
                                    if proxy_err_eval in ['proxy_err_psm']:
                                        ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = ob_err0 + sites_eval[data_period_idstd[reconi]][j] ** 2
                                    else:
                                        ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = ob_err0

                        # save proxy, prior, and posterior data and then standardized

                        # info
                        df_i = pandas.DataFrame({'site':sites_eval['Site'][j],'proxy':proxy_i,'locRad':locRadv,'proxy_frac':proxy_frac,'Rscale':Rscale,'MC':MCi}, index=[df_ind])
                        df_eval = pandas.concat([df_eval,df_i])

                        df_ind += 1

                    # obs
                    ob_data = np.swapaxes(ob_stat[locRadi,proxy_fraci,Rscalei,MCi,:,:],0,1)
                    df_obi = pandas.DataFrame(data=ob_data,columns=df_ind_recon)
                    df_ob = pandas.concat([df_ob,df_obi])

                    #xb_std = np.copy(xb_stat[locRadi][proxy_fraci][Rscalei][MCi][:][:])
                    #ob_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, recon_period_len*2, sites_withhold_len), np.nan)
                    #xb_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, nens, sites_withhold_len), np.nan)   # save full prior for withhold data
                    #xa_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, nens, recon_period_len, sites_withhold_len), np.nan)

                    # xb
                    xb_data = np.swapaxes(xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,:],0,1)  # withhold x nens
                    df_xb_i  = pandas.DataFrame(data=xb_data)
                    df_xb = pandas.concat([df_xb,df_xb_i])

                    # xa
                    xa_data = np.swapaxes(xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,:,:],0,2)  # withhold x recon x nens
                    xa_data1 = xa_data.reshape((sites_withhold_len, recon_period_len*nens))
                    df_xa_i  = pandas.DataFrame(data=xa_data1)
                    df_xa = pandas.concat([df_xa,df_xa_i])

                df_ob = df_ob.reset_index()
                df_xb = df_xb.reset_index()
                df_xa = df_xa.reset_index()
                print('')
                print('Step #3: evaluation data preparation - Done')
                print('')
                if log_level >= 3:
                    print('df_eval')
                    print(df_eval)
                    print('df_ob')
                    print(df_ob)
                    print('df_xb')
                    print(df_xb)
                    print('df_xa')
                    print(df_xa)

                    
            ###############################################################
            # calculate RMSE, CE, R^2 for each time slice
            ###############################################################
            ###############################################################

                
                df_reconi = pandas.DataFrame()

                df_ind_i = 0
                df_zscore_mc   = pandas.DataFrame()
                
                for reconi in range(recon_period_len):
                    
                    df_zscore   = pandas.DataFrame()
                    data_period_id_i = data_period_id[reconi]
                    
                    if log_level > 1:
                        print('')
                        print(data_period_id_i)
                        print('')

                    for proxy_j in range(pn):
                        
                        df_zscore_j = pandas.DataFrame()
                        
                        proxy_i = Typelist[proxy_j].lower()
                        
                        print('proxy i {}'.format(proxy_i))
                        
                        #  df_eval_pi = df_eval_pi_all_col[df_eval_pi_all_col['proxy'] == proxy_i]
                        
                        df_eval_pi = df_eval[df_eval['proxy'] == proxy_i]
                        
                        if pn > 1:
                            df_ob_pi   = df_ob[df_eval['proxy'] == proxy_i]
                            df_xb_pi   = df_xb[df_eval['proxy'] == proxy_i]
                            df_xa_pi   = df_xa[df_eval['proxy'] == proxy_i]
                        else:
                            df_ob_pi   = df_ob 
                            df_xb_pi   = df_xb
                            df_xa_pi   = df_xa
                            
                        # drop index reset index
                        
                        df_ob_pi = df_ob_pi.drop(columns='index')
                        df_ob_pi = df_ob_pi.reset_index()
                        df_ob_pi = df_ob_pi.drop(columns='index')
                        df_xb_pi = df_xb_pi.drop(columns='index')
                        df_xb_pi = df_xb_pi.reset_index()
                        df_xb_pi = df_xb_pi.drop(columns='index')
                        df_xa_pi = df_xa_pi.drop(columns='index')
                        df_xa_pi = df_xa_pi.reset_index()
                        df_xa_pi = df_xa_pi.drop(columns='index')
                        df_xa_pi   = df_xa_pi[df_xa_pi.columns[reconi*nens:(reconi+1)*nens]]
                        df_xa_pi.columns = range(df_xa_pi.shape[1])
                        
                        # show 1 data
                        df_ob_pi_all = df_ob_pi[data_period_id_i]
                        df_xb_pi_all = df_xb_pi.mean(axis=1)
                        df_xa_pi_all = df_xa_pi.mean(axis=1)
                        
                        #df_xb_pi_all = df_xb_pi.mask(df_xb_pi.eq(np.nan)).mean(axis=1)
                        #df_xa_pi_all = df_xa_pi.mask(df_xa_pi.eq(np.nan)).mean(axis=1)
                        if log_level > 5:
                            print('df_ob_pi')
                            print(df_ob_pi)
                            print('df_xb_pi')
                            #print(df_xb_pi)
                            print('df_xa_pi')
                            #print(df_xa_pi)
                            print('df_ob_pi_all')
                            print(df_ob_pi_all)
                            print('df_xb_pi_all')
                            print(df_xb_pi_all)
                            print('df_xa_pi_all')
                            print(df_xa_pi_all)
                            #print(len(df_xa_pi_all))
                        
                        df_ob_pi_mean = pandas.Series.mean(df_ob_pi[data_period_id_i])
                        df_ob_pi_std  = pandas.Series.std(df_ob_pi[data_period_id_i])
                        #if proxy_i == 'caco3':
                        #    df_ob_pi_mean = list(df_ob_pi_mean)
                        #    df_ob_pi_std = list(df_ob_pi_std)
                        
                        if log_level > 3:
                            print('df_ob_pi_mean')
                            print(df_ob_pi_mean)
                            print('df_ob_pi_std')
                            print(df_ob_pi_std)

                        df_ob_pi_zscore = (df_ob_pi[data_period_id_i] - df_ob_pi_mean) / df_ob_pi_std
                        df_xb_pi_zscore = (df_xb_pi_all - df_ob_pi_mean) / df_ob_pi_std
                        df_xa_pi_zscore = (df_xa_pi_all - df_ob_pi_mean) / df_ob_pi_std
                        
                        if log_level > 5:
                            print('>>>>> ')
                            print('df_ob_pi_zscore')
                            print(df_ob_pi_zscore)
                            print('df_xb_pi_zscore')
                            print(df_xb_pi_zscore)
                            print('df_xa_pi_zscore')
                            print(df_xa_pi_zscore)
                            
                        MCnn = len(df_xa_pi_all)
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['proxy']] = proxy_i
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['reconi']] = data_period_id_i
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['loc']] = locRadi
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['proxy_frac']] = proxy_fraci
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['Rscale']] = Rscale
                        
                        df_zscore_j['xb'] = df_xb_pi_all
                        df_zscore_j['ob'] = df_ob_pi_all
                        df_zscore_j['xa'] = df_xa_pi_all

                        df_zscore_j['xb_zscore'] = df_xb_pi_zscore
                        df_zscore_j['ob_zscore'] = df_ob_pi_zscore
                        df_zscore_j['xa_zscore'] = df_xa_pi_zscore
                        
                        if log_level > 5:
                            print('>>>>> ')
                            print('df_zscore_j')
                            print(df_zscore_j)
                            
                        df_zscore = pandas.concat([df_zscore,df_zscore_j])

                        if log_level > 5:
                            print('df_eval_pi')
                            print('df_ob_pi')
                            print('df_xb_pi[:][0:6]')
                            print('df_xa_pi')

                            print(df_eval_pi)
                            print(df_ob_pi)
                            print(df_xb_pi[:][0:6])
                            print(df_xa_pi)

                    if log_level > 5:
                        print('df_zscore:')
                        print(df_zscore)


                    rmse_xb = DeepDA_psm.rmse(df_zscore['ob_zscore'],df_zscore['xb_zscore'])
                    
                    if log_level > 1:
                        print('RMSE of Ob vs. Xb {}'.format(rmse_xb))

                    CE_xb = DeepDA_psm.CE_NS70(df_zscore['ob_zscore'],df_zscore['xb_zscore'],1)
                    if log_level > 1:
                        print('CE of Ob vs. Xb {}'.format(CE_xb))

                    a=ma.masked_invalid(df_zscore['ob_zscore'])
                    b=ma.masked_invalid(df_zscore['xb_zscore'])
                    msk = (~a.mask & ~b.mask)
                    cor_matrix = ma.corrcoef(a[msk],b[msk])
                    r_2_xb = cor_matrix[0,1]**2

                    if log_level > 1:
                        print('r^2 of Ob vs. Xb {}'.format(r_2_xb))
                        print('')


                    rmse_xa = DeepDA_psm.rmse(df_zscore['ob_zscore'],df_zscore['xa_zscore'])
                    
                    if log_level > 1:
                        print('RMSE of Ob vs. Xa {}'.format(rmse_xa))
                    
                    CE_xa = DeepDA_psm.CE_NS70(df_zscore['ob_zscore'],df_zscore['xa_zscore'],1)
                    
                    if log_level > 1:
                        print('CE of Ob vs. Xb {}'.format(CE_xa))
                    
                    a=ma.masked_invalid(df_zscore['ob_zscore'])
                    b=ma.masked_invalid(df_zscore['xa_zscore'])
                    msk = (~a.mask & ~b.mask)
                    cor_matrix = ma.corrcoef(a[msk],b[msk])
                    r_2_xa = cor_matrix[0,1]**2
                    
                    if log_level > 1:
                        print('r^2 of Ob vs. Xa {}'.format(r_2_xa))
                        print(' --- ')
                        print('')
                    # delta RMSE, CE, and R^2
                    drmse = 100 * (rmse_xb - rmse_xa)/rmse_xb
                    dce   = 100 * (CE_xb - CE_xa)/CE_xb
                    dr2   = 100 * (r_2_xb - r_2_xa)/r_2_xb
                    
                    df_reconi = pandas.DataFrame({'reconi':reconi,
                                                  'data_period_id_i':data_period_id_i,
                                                  'loc':locRadv,
                                                  'proxy_frac':proxy_frac,
                                                  'Rscale':Rscale,
                                                  'RMSE Xb':rmse_xb,
                                                  'RMSE Xa':rmse_xa,
                                                  'dRMSE':drmse,
                                                  'CE Xb':CE_xb,
                                                  'CE Xa': CE_xa,
                                                  'dCE':dce,
                                                  'R^2 Xb':r_2_xb,
                                                  'R^2 Xa': r_2_xa,
                                                  'dR^2':dr2}, index=[df_ind_i])

                    df_evaluation = pandas.concat([df_evaluation,df_reconi])

                    df_ind_i += 1
                    
                    df_zscore_mc = pandas.concat([df_zscore_mc,df_zscore])
                    
                # all slice zscore
                df_zscore_slice = pandas.DataFrame()
                for proxy_j in range(pn):
                    
                    df_zscore_j_slice = pandas.DataFrame()
                    
                    proxy_i = Typelist[proxy_j].lower()
                    print('proxy i {}'.format(proxy_i))
                    
                    df_proxy_j_slice   = df_zscore_mc[df_zscore_mc['proxy'] == proxy_i]
                    
                    
                    df_ob_slice_mean   = pandas.Series.mean(df_proxy_j_slice['ob'])
                    df_ob_slice_std    = pandas.Series.std(df_proxy_j_slice['ob'])
                
                    df_ob_slice_zscore = (df_proxy_j_slice['ob'] - df_ob_slice_mean) / df_ob_slice_std
                    df_xb_slice_zscore = (df_proxy_j_slice['xb'] - df_ob_slice_mean) / df_ob_slice_std
                    df_xa_slice_zscore = (df_proxy_j_slice['xa'] - df_ob_slice_mean) / df_ob_slice_std
                    
                    df_ob_slice_zscore = df_ob_slice_zscore.reset_index()
                    df_ob_slice_zscore = df_ob_slice_zscore.drop(columns='index')
                    df_xb_slice_zscore = df_xb_slice_zscore.reset_index()
                    df_xb_slice_zscore = df_xb_slice_zscore.drop(columns='index')
                    df_xa_slice_zscore = df_xa_slice_zscore.reset_index()
                    df_xa_slice_zscore = df_xa_slice_zscore.drop(columns='index')
                    
                    if log_level > 3:
                        print('df_ob_slice_mean')
                        print(df_ob_slice_mean)
                        print('df_ob_slice_std')
                        print(df_ob_slice_std)
                        print('df_ob_slice_zscore')
                        print(df_ob_slice_zscore)
                        print('df_xb_slice_zscore')
                        print(df_xb_slice_zscore)
                        print('df_xa_slice_zscore')
                        print(df_xa_slice_zscore)
                        
                    MCnn = len(df_proxy_j_slice['ob'])
                    
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['proxy']] = proxy_i
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['reconi']] = data_period_id_i
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['loc']] = locRadi
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['proxy_frac']] = proxy_fraci
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['Rscale']] = Rscale


                    df_zscore_j_slice['ob_zscore'] = df_ob_slice_zscore
                    df_zscore_j_slice['xb_zscore'] = df_xb_slice_zscore
                    df_zscore_j_slice['xa_zscore'] = df_xa_slice_zscore
                    df_zscore_slice = pandas.concat([df_zscore_slice,df_zscore_j_slice])
                    
                    
                    if log_level > 3:
                        print('df_proxy_j_slice')
                        print(df_proxy_j_slice)
                        
                    
                rmse_slice_xb = DeepDA_psm.rmse(df_zscore_slice['ob_zscore'],df_zscore_slice['xb_zscore'])
                CE_slice_xb = DeepDA_psm.CE_NS70(df_zscore_slice['ob_zscore'],df_zscore_slice['xb_zscore'],1)
                rmse_slice_xa = DeepDA_psm.rmse(df_zscore_slice['ob_zscore'],df_zscore_slice['xa_zscore'])
                CE_slice_xa = DeepDA_psm.CE_NS70(df_zscore_slice['ob_zscore'],df_zscore_slice['xa_zscore'],1)
                
                drmse_slice = 100 * (rmse_slice_xb - rmse_slice_xa)/rmse_slice_xb
                dce_slice   = 100 * (CE_slice_xb - CE_slice_xa)/CE_slice_xb
                
                df_reconi = pandas.DataFrame({'reconi':reconi,
                                              'data_period_id_i':'all',
                                              'loc':locRadv,
                                              'proxy_frac':proxy_frac,
                                              'Rscale':Rscale,
                                              'RMSE Xb':rmse_slice_xb,
                                              'RMSE Xa':rmse_slice_xa,
                                              'dRMSE':drmse_slice,
                                              'CE Xb':CE_slice_xb,
                                              'CE Xa': CE_slice_xa,
                                              'dCE':dce_slice,
                                              'R^2 Xb':'',
                                              'R^2 Xa': '',
                                              'dR^2':''}, index=[df_ind_i])

                df_evaluation = pandas.concat([df_evaluation,df_reconi])
                
                if log_level > 3:
                    print('')
                    print('df_zscore_slice')
                    print(df_zscore_slice)
                    
                if log_level > 1:
                    print('All slice: RMSE of Ob vs. Xb --> Ob vs. Xa === {} --> {}'.format(rmse_slice_xb, rmse_slice_xa))
                    print('           CE   of Ob vs. Xb --> Ob vs. Xa === {} --> {}'.format(CE_slice_xb, CE_slice_xa))
                # all df_sscore
                df_zscore_all = pandas.concat([df_zscore_all,df_zscore_mc])
                print(' This loop done ')

    if savesummary:
        df_zscore_all.to_csv(yml_dict['core']['proj_dir']+'/wrk/'+en+savefilename_add+'_df_zscore_all'+'.csv')
        df_evaluation.sort_index().to_csv(yml_dict['core']['proj_dir']+'/wrk/'+en+'_df_evaluation_log.csv')
        
    print('')
    print('Step #4: evaluation - Done')
    print('')
    print('All done!')

>>  Import package => OKAY

petmproxy3slices_v0.1.csv_petm29_v0.1deep_20211020_All.noAc._bays_MCsd50_pHcor_frac0.98_testR_Ca75
>>  Loading configuration file => OKAY

['sed_CaCO3', 'atm_pCO2', 'fburial_CaCO3']
>>  nc_keyvalue {'biogem': 'fields_biogem_2d', 'sedgem': 'fields_sedgem_2d'}...
>>  biogem: fields_biogem_2d
>>  sedgem: fields_sedgem_2d
>>  nc_keyvalue {'biogem': 'fields_biogem_3d'}...
>>  biogem: fields_biogem_3d
>>  Number of 2d prior variables is: 9. List:
      ['ocn_sur_temp', 'atm_temp', 'atm_pCO2', 'ocn_sur_sal', 'misc_pH', 'carb_sur_ohm_cal', 'ocn_ben_temp', 'ocn_sur_ALK', 'sed_CaCO3']
>>  Number of 3d prior variables is: 0. List:
      []
>>  Read nc file: /volumes/DA/DeepDA/wrk/petmproxy3slices_v0.1.csv_petm29_v0.1deep_20211020_All.noAc._bays_MCsd50_pHcor_frac0.98_testR_Ca75/_loc_0_proxy_frac_0.98_Rscale_100.0_MC_0.nc
>>  Read nc file: /volumes/DA/DeepDA/wrk/petmproxy3slices_v0.1.csv_petm29_v0.1deep_20211020_All.noAc._bays_MCsd50_pHcor_frac0.98_testR_Ca75/_loc_0_prox

In [2]:
'''
DeepDA_verify is to verify DA output

It read proxy, prior, and posterior from DA outputs files and configuration files.
Then, it calculates the statistics (corrcoef and CE) of the DA results and save the outputs.

By Mingsong Li
    Penn State 
    Now at Peking University
    2/17/2020
    
Updated Mar. 03, 2020
Updated Oct. 11, 2020  # plot enhanced
Updated Oct. 12, 2020  # multi jobs
Updated June 30, 2021 
Updated July 15, 2021  # ZSCORE

#df_ob_pi   = df_ob[df_eval['proxy'] == proxy_i]   : only work for single proxy experiment
'''

import sys
sys.path.append('../')
from DeepDA_lib import modules_nc
from DeepDA_lib import DeepDA_psm
from scipy import stats
import shutil

import h5py
#import time
import yaml
import numpy as np
import numpy.ma as ma
import pandas
import os
from netCDF4 import Dataset
from sys import platform as sys_pf
import matplotlib.pyplot as plt
if sys_pf == 'darwin':
    import matplotlib
    matplotlib.use("TkAgg")
    import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)
try:
    import bayspline
except ImportError as e1:
    print('Warning:', e1)
try:
    import bayspar
except ImportError as e2:
    print('Warning:', e2)
try:
    import bayfox
except ImportError as e3:
    print('Warning:', e3)
try:
    import baymag
except ImportError as e4:
    print('Warning:', e4)

print('>>  Import package => OKAY')
print('')

###################################################################
#####################    User defined start   #####################
###################################################################

# DA output folders
if sys_pf == 'darwin':
    xlsxdir = '/volumes/DA/DeepDA/wrk/'
    xlsxdir = '/volumes/Backup/DeepDA/'
else:
    xlsxdir = '/mnt/d/DeepDA/wrk/'

# Experiment style: 
#    0 = given lsit
#    1 = all folders
#    
expstyle = 0
#expstyle = 1

# needed when explist style is 0
explist = ['petmproxy3slices_v0.0.20_w_deepmip.csv_petm29_v20_20210718_all_bays_MCsd100_pHcor_omega5_frac0.98']
Typelist = ['d18O','TEX86','MgCa','caco3']  # proxy type list
#explist = ['petmproxy3slices_v0.0.20_w_deepmip.csv_petm29_v20_20210715_d18o_bays_MCsd100_pHcor_omega5_frac0.95']  # 1 unselected proxy
#Typelist = ['d18O']
#explist = ['petmproxy3slices_v0.0.20_w_deepmip.csv_petm29_v20_20210715_caco3_bays_MCsd100_pHcor_omega5_frac0.95'] #
#Typelist = ['caco3']
#explist = ['petmproxy3slices_v0.0.20_w_deepmip.csv_petm29_v20_20210715_tex_bays_MCsd100_pHcor_omega5_frac0.95']
#Typelist = ['TEX86'] 
#explist = ['petmproxy3slices_v0.0.20_w_deepmip.csv_petm29_v20_20210715_mgca_bays_MCsd100_pHcor_omega5_frac0.95']
#Typelist = ['MgCa']

label_all = ('prePETM', 'peakPETM','postPETM', 'PETM_body')  # slice name
warmcomp = [0,1]  # ID for petm warming 


pn = len(Typelist)
dum_jmax = 36
dum_imax = 36

AnalysisStd = True   # True: standardize; False: use raw analysis data

# output
savesummary = True
savesummary_slice=  True
# for evaluation save and plot
showplot = False

#pn = 4  # use the first pn data

axis_lim = np.array([[-6,1],[0,1],[0,7],[0,100]])   # axis limit for the plot
axis_limz = np.array([-4,4])   # set axis limit for the zscore plot

###################################################################
#####################    User defined end     #####################
###################################################################
label_all_len = len(label_all)

if expstyle == 0:
    explist = explist
    
elif expstyle == 1:
    # read content
    dir1 = [o for o in os.listdir(xlsxdir) if os.path.isdir(os.path.join(xlsxdir,o))]
    explist = dir1
    #print(dir1[0])

for diri in range(len(explist)):
    
    # run the first 5 folders
    #if diri < 5:
    #    continue
    en = explist[diri]
    print(en)

    dum_ijmax = dum_imax * dum_jmax
    config_name_f = "../DeepDA_config.yml"
    f = open(config_name_f, 'r')
    yml_dict_f = yaml.load(f, Loader=yaml.FullLoader)
    f.close()

    dir_data_save = yml_dict_f['core']['wrkdir']

    config_name = dir_data_save + '/' + en + '.yml'
    f = open(config_name, 'r')
    yml_dict = yaml.load(f, Loader=yaml.FullLoader)
    f.close()
    print('>>  Loading configuration file => OKAY')
    print('')
    # Read parameters from configurations
    MCn = yml_dict['MonteCarlo']['number']
    #debug
    #MCn = 30
    nens = yml_dict['core']['nens']

    nexp = yml_dict['core']['nexp']
    dir_data_save = yml_dict['core']['wrkdir']
    log_level = yml_dict['log_level']
    #debug
    #log_level = 4
    recon_period = yml_dict['core']['recon_period']
    recon_timescale = yml_dict['core']['recon_timescale_interval']
    recon_period_full = np.arange(recon_period[0],recon_period[1]+1,recon_timescale)
    recon_period_len = recon_period_full.shape[0]
    recon_timescale = yml_dict['core']['recon_timescale_interval']
    save_ens_full = yml_dict['core']['save_ens_full']
    proxy_assim2 = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_assim2']
    proxy_psm_type    = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_psm_type']
    proxy_blacklist   = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_blacklist']
    proxy_order       = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_order']
    proxy_list = [item for item in proxy_order if item not in proxy_blacklist]
    proxy_err_eval   = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_err_eval']
    lon_label = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['lon_label']
    lat_label = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['lat_label']

    proxy_frac      = yml_dict['proxies']['proxy_frac']
    prior_source = yml_dict['prior']['prior_source'] #
    dum_lon_offset = yml_dict['prior'][prior_source]['dum_lon_offset'] # longitude offset
    limit_hard_keys = list(yml_dict['prior'][prior_source]['limit_hard'].keys())
    psm_baymag_ln =  yml_dict['psm']['bayesreg_mgca_pooled_red']['psm_baymag_ln']
    print(limit_hard_keys)

    data_period_id    = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_id']
    data_period_idstd = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_idstd']
    geologic_age = yml_dict['core']['geologic_age']

    # read preprior HDF5 file
    dir_proxy_data = dir_data_save +'/'+ yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion']
    proxy_err_eval = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_err_eval']

    # ========= dataset for plot =========
    cGENIEGrid = yml_dict['core']['proj_dir'] + '/data_misc/cGENIEGrid.csv'
    cGENIEGrid = pandas.read_csv(cGENIEGrid)
    cGENIEGridB_lat36 = cGENIEGrid['lat']
    cGENIEGridB_lon36 = cGENIEGrid['lon']
    cGENIEGrid = cGENIEGrid.to_numpy()
    #print('>>  OKAY.')

    # Read global mean and plot to show results
    ########## Prior #########
    prior_state_variable = yml_dict['prior'][prior_source]['state_variable']  # note: ['2d': xxx; '3d': xxx]
    dum_lon_offset = yml_dict['prior'][prior_source]['dum_lon_offset'] # longitude offset
    
    # ========= Monte Carlo =========
    local_rad_list = yml_dict['core']['local_rad_list'] #
    locRadn= len(local_rad_list)
    local_rad_list = np.asarray(local_rad_list)
    #print(local_rad_list)
    #print(locRadn)
    proxy_frac_list   = yml_dict['proxies']['proxy_frac']
    proxy_fracn = len(proxy_frac_list)
    proxy_frac_list = np.asarray(proxy_frac_list)
    Rscale_style = yml_dict['core']['Rscale_style']
    
    if Rscale_style == 1:
        Rscale_list = yml_dict['core']['Rscale']
        Rscalen = len(Rscale_list)
        Rscale_list = np.asarray(Rscale_list)
        
        # debug
        #Rscale_list = np.array([2.0])
        #Rscalen = len(Rscale_list)
        
    elif Rscale_style == 2:
        Rscalen = 1
        Rscale_list = [0]
    # save prior variable list
    prior_variable_dict = []  # variable list
    prior_nc_file_list = []  # nc file list
    prior_variable_dict_3d = []  # variable list
    prior_nc_file_list_3d = []  # nc file list

    for key, value in prior_state_variable.items():
        nc_keyvalue = prior_state_variable[key]['ncname']  # note: 2d dict
        print('>>  nc_keyvalue {}...'.format(nc_keyvalue))
        for key1, value1 in nc_keyvalue.items():
            print('>>  {}: {}'.format(key1,value1))
            for i in range(len(prior_state_variable[key][value1])):
                if key in ['2d']:
                    prior_variable_dict.append(prior_state_variable[key][value1][i])
                    prior_nc_file_list.append(key1+'/'+value1+'.nc')
                elif key in ['3d']:
                    prior_variable_dict_3d.append(prior_state_variable[key][value1][i])
                    prior_nc_file_list_3d.append(key1+'/'+value1+'.nc')

    # variable list
    prior_variable_len = len(prior_variable_dict)
    prior_variable3d_len = len(prior_variable_dict_3d)
    print('>>  Number of 2d prior variables is: {}. List:'.format(prior_variable_len))
    print('      {}'.format(prior_variable_dict))
    print('>>  Number of 3d prior variables is: {}. List:'.format(prior_variable3d_len))
    print('      {}'.format(prior_variable_dict_3d))

    MC_dir =  dir_data_save + '/' + en + '/'

    Xa2d_full_np   = np.full((locRadn,proxy_fracn,Rscalen,MCn*nens,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_mean_np   = np.full((locRadn,proxy_fracn,Rscalen,MCn,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_std_np    = np.full((locRadn,proxy_fracn,Rscalen,MCn,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_all_np    = np.full((dum_jmax, dum_imax,locRadn,proxy_fracn,Rscalen,MCn, prior_variable_len, recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_allstd_np = np.full((dum_jmax, dum_imax, locRadn,proxy_fracn,Rscalen,MCn, prior_variable_len, recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_mean_np2  = np.full((locRadn,proxy_fracn,Rscalen,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    Xa2d_std_np2   = np.full((locRadn,proxy_fracn,Rscalen,prior_variable_len,recon_period_len),np.nan)  # save mean of each variable (column) of each MC run (row)
    df_evaluation  = pandas.DataFrame()
    df_zscore_all  = pandas.DataFrame()
    for locRadi in range(locRadn):
        locRad = local_rad_list[locRadi]
        if locRad is None:
            locRadv = 0 # for filename only
        else:
            locRadv = locRad
        for proxy_fraci in range(proxy_fracn):
            proxy_frac = proxy_frac_list[proxy_fraci]

            for Rscalei in range(Rscalen):
                Rscale = Rscale_list[Rscalei]
                
                savefilename_add = '_loc_'+ str(locRadv)+'_proxy_frac_'+ str(proxy_frac)+'_Rscale_'+str(Rscale)
                
                for MCi in range(MCn):
                    # NetCDF file name
                    filename_short = '_loc_', str(locRadv),'_proxy_frac_', str(proxy_frac),'_Rscale_',str(Rscale),'_MC_',str(MCi) 
                    nc_filename = MC_dir + ''.join(filename_short) + '.nc'
                    hdf5name    = MC_dir + ''.join(filename_short) + '.hdf5'

                    print('>>  Read nc file: {}'.format(nc_filename))
                    
                    for Xa2d_vari in range(prior_variable_len):
                        
                        Xa_full_name_vari = prior_variable_dict[Xa2d_vari] +'_Xa_full'
                        Xa_mean_name_vari = prior_variable_dict[Xa2d_vari] +'_Xa_mean'
                        Xa_variance_name_vari = prior_variable_dict[Xa2d_vari] +'_Xa_variance'
                        Xa_full_vari = Dataset(nc_filename).variables[Xa_full_name_vari][:]
                        Xa_mean_vari = Dataset(nc_filename).variables[Xa_mean_name_vari][:]
                        Xa_variance_vari = Dataset(nc_filename).variables[Xa_variance_name_vari][:]

                        if prior_variable_dict[Xa2d_vari] in limit_hard_keys:
                            # some variables have hard limitation: e.g., CaCO3 = [0, 100]                        
                            lim_min = yml_dict['prior'][prior_source]['limit_hard'][prior_variable_dict[Xa2d_vari]]['lim_min']
                            lim_max = yml_dict['prior'][prior_source]['limit_hard'][prior_variable_dict[Xa2d_vari]]['lim_max']
                            #print('limit min {} and max {}'.format(lim_min, lim_max))
                            if lim_min:
                                if np.any(Xa_full_vari<lim_min):
                                    Xa_full_vari[Xa_full_vari<lim_min] = lim_min
                                    Xa_mean_vari = np.mean(Xa_full_vari,axis=2)
                                    Xa_variance_vari = np.var(Xa_full_vari,axis=2)
                                    print('>>    Force {} value to be >= {}'.format(prior_variable_dict[Xa2d_vari],lim_min))
                            if lim_max:
                                if np.any(Xa_full_vari>lim_max):
                                    Xa_full_vari[Xa_full_vari>lim_max] = lim_max
                                    Xa_mean_vari = np.mean(Xa_full_vari,axis=2)
                                    Xa_variance_vari = np.var(Xa_full_vari,axis=2)
                                    print('>>    Force {} value to be <= {}'.format(prior_variable_dict[Xa2d_vari], lim_max))

                        for reconi in range(recon_period_len):

                            Xa_full_reconi = Xa_full_vari[:,:,:,0,reconi].reshape((dum_ijmax,nens))
                            Xa_full_reconi_mean = np.nanmean(Xa_full_reconi,axis=0)

                            Xa_mean_reconi = Xa_mean_vari[:,:,0,reconi]
                            Xa2d_all_np[:,:,locRadi,proxy_fraci,Rscalei,MCi,Xa2d_vari,reconi] = np.copy(Xa_mean_vari[:,:,0,reconi])
                            Xa_mean_reconi_mean = np.nanmean(Xa_mean_reconi)

                            Xa_variance_reconi = Xa_variance_vari[:,:,0,reconi]
                            Xa2d_allstd_np[:,:,locRadi,proxy_fraci,Rscalei,MCi,Xa2d_vari,reconi] = Xa_variance_vari[:,:,0,reconi]
                            Xa_std_reconi_mean = np.sqrt(np.nanmean(Xa_variance_reconi))

                            #print('>>  reconi = {}, mean is {}, std is {}'.format(reconi, Xa_mean_reconi_mean, Xa_std_reconi_mean))
                            Xa2d_full_np[locRadi,proxy_fraci,Rscalei,MCi*nens:(MCi+1)*nens,Xa2d_vari,reconi] = Xa_full_reconi_mean
                            Xa2d_mean_np[locRadi,proxy_fraci,Rscalei,MCi,Xa2d_vari,reconi] = Xa_mean_reconi_mean
                            Xa2d_std_np[locRadi,proxy_fraci,Rscalei,MCi,Xa2d_vari,reconi] = Xa_std_reconi_mean
                print('First variable: all MC mean')
                print(Xa2d_mean_np[locRadi,proxy_fraci,Rscalei,:,0,0])

                Xa2d_all_np = np.ma.masked_where(Xa2d_all_np > 9.0e+36, Xa2d_all_np)
                Xa2d_allstd_np = np.ma.masked_where(Xa2d_all_np > 9.0e+36, Xa2d_allstd_np)
                for Xa2d_vari in range(prior_variable_len):
                    for reconi in range(recon_period_len):
                        Xa2d_mean_np2[locRadi,proxy_fraci,Rscalei,Xa2d_vari,reconi] = np.nanmean(Xa2d_all_np[:,:,locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi])
                        Xa2d_std_np2[locRadi,proxy_fraci,Rscalei,Xa2d_vari,reconi] = np.sqrt(np.nanmean(Xa2d_allstd_np[:,:,locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi]))

                np.set_printoptions(precision=6, suppress=True)
                if log_level > 1:
                    print('All variable. Mean of variables x reconi')
                    print('{}'.format(Xa2d_mean_np2[locRadi,proxy_fraci,Rscalei,:,:]))
                #print('std  of variables x reconi')
                #print('{}'.format(Xa2d_std_np2))
    
                print('')
                print('Step #1: read data - Done')
                print('')

                # Calculate mean and std of each variable for each time slice
                # plot the ensemble values

                df = pandas.DataFrame()
                print('')
                print('DA - Summary of global mean and standard deviation')
                print('')
                
                if showplot:
                    fig, (ax0, ax1, ax2, ax3) = plt.subplots(nrows=4, figsize=(3, 6))
                    if recon_period_len>1:
                        fig2, (ax10, ax11, ax12, ax13) = plt.subplots(nrows=4, figsize=(3, 6))
                    params = {'mathtext.default': 'regular' }
                    plt.rcParams.update(params)
                    #plt.rcParams.update({'figure.figsize':(5,3), 'figure.dpi':110})
                    #fig.suptitle('DA')

                # 2d variables
                for Xa2d_vari in range(prior_variable_len):

                    print(prior_variable_dict[Xa2d_vari])
                    datadf = {'field':prior_variable_dict[Xa2d_vari],'mean':[np.nan],'std':[np.nan],
                              '2.5%':[np.nan],'5%':[np.nan],'25%':[np.nan],'median':[np.nan],'75%':[np.nan],'95%':[np.nan],'97.5%':[np.nan],'label':''}
                    df2 = pandas.DataFrame(datadf, index=[Xa2d_vari])
                    df = pandas.concat([df,df2])

                    sst_std_mc = np.std(Xa2d_mean_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,:],axis=0)
                    if log_level > 2:
                        print('  _locR '+str(locRadv)+' proxy_frac '+str(proxy_frac)+' scaled r '+str(Rscale))

                    for reconi in range(recon_period_len):

                        meani = np.nanmean(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi])
                        stdi = np.std(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi])
                        perc = np.percentile(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi],np.array([2.5, 5, 25, 50, 75, 95, 97.5]))
                        datadf = {'field':'','mean':[meani],'std':[stdi],
                                  '2.5%':[perc[0]],'5%':[perc[1]],'25%':[perc[2]],'median':[perc[3]],'75%':[perc[4]],'95%':[perc[5]],'97.5%':[perc[6]],'label':label_all[reconi]}
                        df2 = pandas.DataFrame(data = datadf, index=[Xa2d_vari])
                        df = pandas.concat([df,df2])
                        if log_level > 2:
                            print('    {:.3f} ± {:.3f}: {}'.format(meani, stdi, label_all[reconi]))

                        if recon_period_len>2:
                            warmpeak = Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,1]-Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,0]
                            #warmbody = Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,2]-Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,0]
                            coolpeak = Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,2]-Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,1]
                            warmpeakmean = np.nanmean(warmpeak)
                            warmpeakstd  = np.std(warmpeak)
                            warmperc = np.percentile(warmpeak,np.array([2.5, 5, 25, 50, 75, 95, 97.5]))
                            coolpeakmean = np.nanmean(coolpeak)
                            coolpeakstd  = np.std(coolpeak)
                            coolperc = np.percentile(coolpeak,np.array([2.5, 5, 25, 50, 75, 95, 97.5]))

                    if recon_period_len>2:
                        df2 = pandas.DataFrame({'field':'','mean':[warmpeakmean],'std':[warmpeakstd],
                                                '2.5%':[warmperc[0]],'5%':[warmperc[1]],'25%':[warmperc[2]],'median':[warmperc[3]],'75%':[warmperc[4]],'95%':[warmperc[5]],'97.5%':[warmperc[6]],'label':'Peak_warming'}, index=[Xa2d_vari])
                        df3 = pandas.DataFrame({'field':'','mean':[coolpeakmean],'std':[coolpeakstd],
                                                '2.5%':[coolperc[0]],'5%':[coolperc[1]],'25%':[coolperc[2]],'median':[coolperc[3]],'75%':[coolperc[4]],'95%':[coolperc[5]],'97.5%':[coolperc[6]],'label':'Peak_cooling'}, index=[Xa2d_vari])
                        df = pandas.concat([df,df2,df3])
                        if log_level > 2:
                            print('    {:.6f} ± {:.6f}: peak warming'.format(warmpeakmean,warmpeakstd))
                            print('    {:.6f} ± {:.6f}: peak cooling'.format(coolpeakmean,coolpeakstd))

                    if showplot:
                        for reconi in range(recon_period_len):
                            if reconi == 3:
                                continue
                            kwargs = dict(alpha=0.5, bins=50)

                            if Xa2d_vari == 0:
                                ax0.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                                ax0.set_ylabel('#')
                                ax0.set_xlabel('SST (\u00B0C)')
                                ax0.tick_params(labelsize='small')
                                ax0.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 1:
                                ax1.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                                ax1.set_ylabel('#')
                                ax1.set_xlabel('SAT (\u00B0C)')
                                ax1.tick_params(labelsize='small')
                                ax1.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 2:
                                ax2.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                                ax2.set_ylabel('#')
                                ax2.set_xlabel('$\it{p}$CO$_2$ (ppm)')
                                ax2.set_xlim(0, 2800)
                                ax2.legend(prop={'size': 6.5})  
                                ax2.tick_params(labelsize='small')
                                ax2.xaxis.set_minor_locator(AutoMinorLocator())
                            #if Xa2d_vari == 3:
                            #    ax3.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                            #    ax3.set_ylabel('Number')
                            #    ax3.set_xlabel('Salinity (PSU)')
                            if Xa2d_vari == 4:
                                ax3.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                                ax3.set_ylabel('#')
                                ax3.set_xlabel('pH')     
                                ax3.tick_params(labelsize='small')
                                ax3.xaxis.set_minor_locator(AutoMinorLocator())
                            #if Xa2d_vari == 6:
                            #    ax5.hist(Xa2d_full_np[locRadi,proxy_fraci,Rscalei,:,Xa2d_vari,reconi], **kwargs, label = label_all[reconi])
                            #    ax5.set_ylabel('Number')
                            #    ax5.set_xlabel('$CaCO_3$ (%)')
                        fig.tight_layout()

                        if recon_period_len>1:
                            if Xa2d_vari == 0:                    
                                ax10.hist(warmpeak, **kwargs, color = "#ff7f0e", label = 'warming')
                                ax10.hist(coolpeak, **kwargs, color = "#2ca02c", label = 'cooling')
                                ax10.set_ylabel('#')
                                ax10.set_xlabel('\u0394SST (\u00B0C)')                        
                                ax10.legend(prop={'size': 6.5});
                                ax10.tick_params(labelsize='small')
                                ax10.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 1:
                                ax11.hist(warmpeak, **kwargs, color = "#ff7f0e")
                                ax11.hist(coolpeak, **kwargs, color = "#2ca02c")
                                ax11.set_ylabel('#')
                                ax11.set_xlabel('\u0394SAT (\u00B0C)')
                                ax11.tick_params(labelsize='small')
                                ax11.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 2:
                                ax12.hist(warmpeak, **kwargs, color = "#ff7f0e")
                                ax12.hist(coolpeak, **kwargs, color = "#2ca02c")
                                ax12.set_ylabel('#')
                                ax12.set_xlabel('\u0394$\it{p}$CO$_2$ (ppm)')
                                ax12.tick_params(labelsize='small')
                                ax12.xaxis.set_minor_locator(AutoMinorLocator())
                            if Xa2d_vari == 4:
                                ax13.hist(warmpeak, **kwargs, color = "#ff7f0e")
                                ax13.hist(coolpeak, **kwargs, color = "#2ca02c")
                                ax13.set_ylabel('#')
                                ax13.set_xlabel('\u0394pH')
                                ax13.tick_params(labelsize='small')
                                ax13.xaxis.set_minor_locator(AutoMinorLocator())
                            fig2.tight_layout()

                if showplot:
                    fig.savefig(yml_dict['core']['proj_dir']+'/wrk/'+en+'.summary.pdf')
                    if recon_period_len > 1:
                        fig2.savefig(yml_dict['core']['proj_dir']+'/wrk/'+en+'.delta.pdf')
            
                # print and save excel
                if savesummary_slice:
                    print('saved @')
                    fullname = yml_dict['core']['proj_dir']+'/wrk/'+en+savefilename_add+'.summary.csv'
                    print(fullname)
                    df.to_csv(fullname)

                print('')
                print('Step #2: summary - Done')
                print('')
    


                ### Purpose of this block
                # Prepare data for verification
                #
                ### Steps
                # 1. Prepare matrix for data saving: proxy, prior, posterior; std or not
                # 2. calculate and save each Monte Carlo runs

                #####################    User defined start   #####################
                if log_level > 1:
                    print('DA - Read proxy, prior, and posterior, standardize')
                    print('')
                #####################    User defined end     #####################

                df_eval = pandas.DataFrame()
                df_ob   = pandas.DataFrame()
                df_xb   = pandas.DataFrame()
                df_xa   = pandas.DataFrame()
                
                # Get the sites_withhold_len
                
                #locRad = local_rad_list[0]
                #if locRad is None:
                #    locRadv = 0 # for filename only
                #else:
                #    locRadv = locRad

                #proxy_frac = proxy_frac_list[0]
                #Rscale = Rscale_list[0]
                filename_short = '_loc_', str(locRadv),'_proxy_frac_', str(proxy_frac),'_Rscale_',str(Rscale),'_MC_0.hdf5'
                hdf5name = MC_dir + ''.join(filename_short)
                if log_level > 1:
                    print('Read first hdf5 file {} to get the number of withold datasets.'.format(hdf5name))
                sites_eval = pandas.read_hdf(hdf5name, 'sites_eval')
                sites_withhold_len  = len(sites_eval)
                if log_level > 1:
                    print(' Site withhold length ： {}'.format(sites_withhold_len))

                data_psm_d18o_find = 0
                data_psm_mgca_find = 0
                if 'Marine sediments_mgca_pooled_bcp' in proxy_list or 'Marine sediments_mgca_pooled_red' in proxy_list:
                    data_psm_mgca_find = 1

                if 'Marine sediments_d18o_pooled' in proxy_list:
                    data_psm_d18o_find = 1

                # Prepare empty matrix for saving the data of proxy, prior, and posterior
                ob_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, recon_period_len*2, sites_withhold_len), np.nan)
                xb_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, nens, sites_withhold_len), np.nan)   # save full prior for withhold data
                xa_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, nens, recon_period_len, sites_withhold_len), np.nan)

                df_ind = 0

                # columns name for the observation
                df_ind_recon = []

                for reconi in range(recon_period_len):
                    df_ind_recon_i = [data_period_id[reconi]] + [data_period_idstd[reconi]]
                    df_ind_recon = df_ind_recon + df_ind_recon_i

                if log_level > 2:
                    print(df_ind_recon)


                for MCi in range(MCn):
                #for MCi in range(1):
                    # NetCDF file name
                    filename_short = '_loc_', str(locRadv),'_proxy_frac_', str(proxy_frac),'_Rscale_',str(Rscale),'_MC_' + str(MCi)
                    nc_filename = MC_dir + ''.join(filename_short) + '.nc'
                    print('    {}'.format(nc_filename))
                    hdf5name    = MC_dir + ''.join(filename_short) + '.hdf5'

                    if data_psm_mgca_find == 1:
                        with h5py.File(hdf5name, 'r') as f:
                            Xb_sal = np.copy(f.get('Xb_sal'))
                            if log_level > 3:
                                print(Xb_sal.shape)
                            Xb_omega = np.copy((f.get('Xb_omega')))
                            Xb_ph = np.copy(f.get('Xb_ph'))
                        Xa_sal_full = Dataset(nc_filename).variables['ocn_sur_sal_Xa_full']
                        Xa_ph_full  = Dataset(nc_filename).variables['misc_pH_Xa_full']
                        Xa_omega_full = Dataset(nc_filename).variables['carb_sur_ohm_cal_Xa_full']

                    elif data_psm_d18o_find == 1:
                        with h5py.File(hdf5name, 'r') as f:
                            Xb_sal = np.copy(f.get('Xb_sal'))
                            if log_level > 3:
                                print(Xb_sal.shape)
                            Xb_ph = np.copy(f.get('Xb_ph'))
                        Xa_sal_full = Dataset(nc_filename).variables['ocn_sur_sal_Xa_full']
                        Xa_ph_full  = Dataset(nc_filename).variables['misc_pH_Xa_full']


                    ### Read Proxy ###
                    proxies = pandas.read_hdf(hdf5name, 'proxies')
                    #prior_variable_dict = pandas.read_hdf(hdf5name, 'prior_variable_dict')

                    if proxy_frac <= 1.0:
                        sites_eval = pandas.read_hdf(hdf5name, 'sites_eval')
                        sites_withhold_len  = len(sites_eval)
                        if log_level > 1:
                            print('Site withhold:       {}'.format(sites_eval['Site'].values))
                            print('Proxy        :       {}'.format(sites_eval['Proxy'].values))

                    proxy_psm_type_dict_df = pandas.read_hdf(hdf5name, 'proxy_psm_type_dict_df')
                    proxy_psm_type_dict_list = proxy_psm_type_dict_df[0].values.tolist()

                    for j in range(sites_withhold_len):
                        data_psm_type = sites_eval['Proxy'][j]
                        for key, value in proxy_assim2.items():
                            if data_psm_type in value:
                                #print(proxy_psm_type[key])
                                key0 = key
                                psm_required_variable_key = list(yml_dict['psm'][proxy_psm_type[key]]['psm_required_variables'].keys())[0]
                                xb_key = psm_required_variable_key+'_Xb_full'
                                xa_key = psm_required_variable_key+'_Xa_full'
                                #print('xa_key is {}'.format(xa_key))
                                Xb_full_field0 = Dataset(nc_filename).variables[xb_key] #
                                Xb_full_field0 = Xb_full_field0[:,:,:,0].reshape(dum_imax*dum_jmax, nens)
                                Xa_full_field0 = Dataset(nc_filename).variables[xa_key]

                        if proxy_psm_type[key0] in ['bayesreg_tex86', 'cgenie_caco3']:
                            if proxy_psm_type[key0] in ['bayesreg_tex86']:
                                proxy_i = 'tex86'
                            else:
                                proxy_i = 'caco3'
                            Ye = DeepDA_psm.cal_ye_cgenie(yml_dict,sites_eval,j,Xb_full_field0,proxy_assim2,proxy_psm_type,dum_lon_offset,dum_imax,dum_jmax)

                            xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,j] = np.copy(Ye)

                            #print('Prior Ye is {:.6f}'.format(np.mean(Ye)))

                            for reconi in range(recon_period_len):

                                Xa_reconi = np.copy(Xa_full_field0[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))

                                Ye = DeepDA_psm.cal_ye_cgenie(yml_dict,sites_eval,j,Xa_reconi,proxy_assim2,proxy_psm_type,dum_lon_offset,dum_imax,dum_jmax)

                                #xa_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j]   = np.mean(Ye)
                                #xa_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi+1][j] = np.var(Ye)
                                xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,reconi,j]   = np.copy(Ye)
                                #print('Analysis Ye is {:.6f}'.format(np.mean(Ye)))
                                #ob_stat[j][reconi*2]   = sites_eval[data_period_id[reconi]][j]

                                ob_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j] = sites_eval[data_period_id[reconi]][j]

                                # error
                                if ~np.isnan(sites_eval[data_period_id[reconi]][j]):

                                    if proxy_psm_type[key0] in ['bayesreg_tex86']:

                                        if proxy_err_eval in ['proxy_err_psm']:
                                            ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = DeepDA_psm.obs_estimate_r_fixed_tex86(31) + sites_eval[data_period_idstd[reconi]][j] ** 2
                                        else:
                                            ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = DeepDA_psm.obs_estimate_r_fixed_tex86(31)

                                    if proxy_psm_type[key0] in ['cgenie_caco3','cgenie_caco3_13c']:

                                        psm_error = yml_dict['psm'][proxy_psm_type[key0]]['psm_error']

                                        if proxy_err_eval in ['proxy_err_psm']:
                                            ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = psm_error + sites_eval[data_period_idstd[reconi]][j] ** 2
                                        else:
                                            ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = psm_error

                        elif proxy_psm_type[key0] in ['bayesreg_d18o_pooled']:

                            proxy_i = 'd18o'

                            Ye = DeepDA_psm.cal_ye_cgenie_d18O(yml_dict,sites_eval,j,Xb_full_field0,Xb_sal,Xb_ph,proxy_assim2,proxy_psm_type,dum_lon_offset,dum_imax,dum_jmax)

                            xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,j] = np.copy(Ye)

                            for reconi in range(recon_period_len):

                                Xa_reconi = np.copy(Xa_full_field0[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))
                                Xa_sal_i  = np.copy(Xa_sal_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))
                                Xa_ph_i   = np.copy(Xa_ph_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))

                                Ye = DeepDA_psm.cal_ye_cgenie_d18O(yml_dict,sites_eval,j,Xa_reconi,Xa_sal_i,Xa_ph_i,proxy_assim2,proxy_psm_type,dum_lon_offset,dum_imax,dum_jmax)

                                xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,reconi,j]   = np.copy(Ye)

                                ob_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j] = sites_eval[data_period_id[reconi]][j]

                                # error
                                if ~np.isnan(sites_eval[data_period_id[reconi]][j]):

                                    if proxy_err_eval in ['proxy_err_psm']:
                                        ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = DeepDA_psm.obs_estimate_r_fixed_d18o(15) + sites_eval[data_period_idstd[reconi]][j] ** 2
                                    else:
                                        ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = DeepDA_psm.obs_estimate_r_fixed_d18o(15)

                        elif proxy_psm_type[key0] in ['bayesreg_mgca_pooled_bcp', 'bayesreg_mgca_pooled_red']:
                            proxy_i = 'mgca'
                            spp = 'all'
                            cleaningr = np.tile(np.array([1]),nens)
                            cleaningb = np.tile(np.array([0]),nens)

                            if proxy_psm_type[key0] in ['bayesreg_mgca_pooled_red']:
                                clearning_one = cleaningr
                                proxy_explain = 'reductive'

                            elif proxy_psm_type[key0] in ['bayesreg_mgca_pooled_bcp']:
                                clearning_one = cleaningb
                                proxy_explain = 'barker'

                            Ye = DeepDA_psm.cal_ye_cgenie_mgca(yml_dict,sites_eval,j,Xb_full_field0,proxy_psm_type[key0],dum_lon_offset,dum_imax,dum_jmax,Xb_sal,Xb_ph,Xb_omega,geologic_age)

                            if psm_baymag_ln in ['yes']:
                                xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,j] = np.copy(np.exp(Ye))
                            else:
                                xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,j] = np.copy(Ye)

                            #Xa_sal_full = Dataset(nc_filename).variables['ocn_sur_sal_Xa_full']
                            #Xa_ph_full  = Dataset(nc_filename).variables['misc_pH_Xa_full']
                            #Xa_omega_full = Dataset(nc_filename).variables['carb_sur_ohm_cal_Xa_full']

                            for reconi in range(recon_period_len):

                                Xa_reconi  =   Xa_full_field0[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens))
                                Xa_sal_i   =   np.copy(Xa_sal_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))
                                Xa_ph_i    =   np.copy(Xa_ph_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))
                                Xa_omega_i =   np.copy(Xa_omega_full[:,:,:,0,reconi].reshape((dum_imax*dum_jmax,nens)))

                                Ye = DeepDA_psm.cal_ye_cgenie_mgca(yml_dict,sites_eval,j,Xa_reconi,proxy_psm_type[key0],dum_lon_offset,dum_imax,dum_jmax,Xa_sal_i,Xa_ph_i,Xa_omega_i,geologic_age)

                                if psm_baymag_ln in ['yes']:
                                    xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,reconi,j]   = np.copy(np.exp(Ye))
                                else:
                                    xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,reconi,j]   = np.copy(Ye)

                                #xa_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j]   = np.mean(Ye)
                                #xa_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi+1][j] = np.var(Ye)

                                ob_stat[locRadi][proxy_fraci][Rscalei][MCi][2*reconi][j] = sites_eval[data_period_id[reconi]][j]

                                if ~np.isnan(sites_eval[data_period_id[reconi]][j]):
                                    ob_err0 = DeepDA_psm.obs_estimate_r_fixed_mgca_pooled((15, 16), clearning_one[0], np.nanmean(Xb_sal), np.nanmean(Xb_ph), np.nanmean(Xb_omega), spp, geologic_age)
                                    if proxy_err_eval in ['proxy_err_psm']:
                                        ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = ob_err0 + sites_eval[data_period_idstd[reconi]][j] ** 2
                                    else:
                                        ob_stat[locRadi][proxy_fraci][Rscalei][MCi][reconi*2+1][j] = ob_err0

                        # save proxy, prior, and posterior data and then standardized

                        # info
                        df_i = pandas.DataFrame({'site':sites_eval['Site'][j],'proxy':proxy_i,'locRad':locRadv,'proxy_frac':proxy_frac,'Rscale':Rscale,'MC':MCi}, index=[df_ind])
                        df_eval = pandas.concat([df_eval,df_i])

                        df_ind += 1

                    # obs
                    ob_data = np.swapaxes(ob_stat[locRadi,proxy_fraci,Rscalei,MCi,:,:],0,1)
                    df_obi = pandas.DataFrame(data=ob_data,columns=df_ind_recon)
                    df_ob = pandas.concat([df_ob,df_obi])

                    #xb_std = np.copy(xb_stat[locRadi][proxy_fraci][Rscalei][MCi][:][:])
                    #ob_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, recon_period_len*2, sites_withhold_len), np.nan)
                    #xb_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, nens, sites_withhold_len), np.nan)   # save full prior for withhold data
                    #xa_stat = np.full((locRadn,proxy_fracn,Rscalen,MCn, nens, recon_period_len, sites_withhold_len), np.nan)

                    # xb
                    xb_data = np.swapaxes(xb_stat[locRadi,proxy_fraci,Rscalei,MCi,:,:],0,1)  # withhold x nens
                    df_xb_i  = pandas.DataFrame(data=xb_data)
                    df_xb = pandas.concat([df_xb,df_xb_i])

                    # xa
                    xa_data = np.swapaxes(xa_stat[locRadi,proxy_fraci,Rscalei,MCi,:,:,:],0,2)  # withhold x recon x nens
                    xa_data1 = xa_data.reshape((sites_withhold_len, recon_period_len*nens))
                    df_xa_i  = pandas.DataFrame(data=xa_data1)
                    df_xa = pandas.concat([df_xa,df_xa_i])

                df_ob = df_ob.reset_index()
                df_xb = df_xb.reset_index()
                df_xa = df_xa.reset_index()
                print('')
                print('Step #3: evaluation data preparation - Done')
                print('')
                if log_level >= 3:
                    print('df_eval')
                    print(df_eval)
                    print('df_ob')
                    print(df_ob)
                    print('df_xb')
                    print(df_xb)
                    print('df_xa')
                    print(df_xa)


>>  Import package => OKAY

petmproxy3slices_v0.0.20_w_deepmip.csv_petm29_v20_20210718_all_bays_MCsd100_pHcor_omega5_frac0.98
>>  Loading configuration file => OKAY

['sed_CaCO3', 'atm_pCO2']
>>  nc_keyvalue {'biogem': 'fields_biogem_2d'}...
>>  biogem: fields_biogem_2d
>>  nc_keyvalue {'biogem': 'fields_biogem_3d'}...
>>  biogem: fields_biogem_3d
>>  Number of 2d prior variables is: 9. List:
      ['ocn_sur_temp', 'atm_temp', 'atm_pCO2', 'ocn_sur_sal', 'misc_pH', 'carb_sur_ohm_cal', 'ocn_ben_temp', 'sed_CaCO3', 'ocn_sur_ALK']
>>  Number of 3d prior variables is: 0. List:
      []
>>  Read nc file: /volumes/DA/DeepDA/wrk/petmproxy3slices_v0.0.20_w_deepmip.csv_petm29_v20_20210718_all_bays_MCsd100_pHcor_omega5_frac0.98/_loc_0_proxy_frac_0.98_Rscale_10.0_MC_0.nc
>>  Read nc file: /volumes/DA/DeepDA/wrk/petmproxy3slices_v0.0.20_w_deepmip.csv_petm29_v20_20210718_all_bays_MCsd100_pHcor_omega5_frac0.98/_loc_0_proxy_frac_0.98_Rscale_10.0_MC_1.nc
>>  Read nc file: /volumes/DA/DeepDA/wrk/petmpro

KeyboardInterrupt: 

In [22]:
print(pn)
print(log_level)

4
4


In [39]:
for diri in range(len(explist)):
    for locRadi in range(locRadn):
        locRad = local_rad_list[locRadi]
        if locRad is None:
            locRadv = 0 # for filename only
        else:
            locRadv = locRad
        for proxy_fraci in range(proxy_fracn):
            proxy_frac = proxy_frac_list[proxy_fraci]

            for Rscalei in range(Rscalen):

            ###############################################################
            # calculate RMSE, CE, R^2 for each time slice
            ###############################################################
            ###############################################################

                
                df_reconi = pandas.DataFrame()

                df_ind_i = 0
                df_zscore_mc   = pandas.DataFrame()
                
                for reconi in range(recon_period_len):
                    
                    df_zscore   = pandas.DataFrame()
                    data_period_id_i = data_period_id[reconi]
                    
                    if log_level > 1:
                        print('')
                        print(data_period_id_i)
                        print('')

                    for proxy_j in range(pn):
                        
                        df_zscore_j = pandas.DataFrame()
                        
                        proxy_i = Typelist[proxy_j].lower()
                        
                        print('proxy i {}'.format(proxy_i))
                        
                        #  df_eval_pi = df_eval_pi_all_col[df_eval_pi_all_col['proxy'] == proxy_i]
                        
                        df_eval_pi = df_eval[df_eval['proxy'] == proxy_i]
                        
                        if pn > 1:
                            df_ob_pi   = df_ob[df_eval['proxy'] == proxy_i]
                            df_xb_pi   = df_xb[df_eval['proxy'] == proxy_i]
                            df_xa_pi   = df_xa[df_eval['proxy'] == proxy_i]
                        else:
                            df_ob_pi   = df_ob 
                            df_xb_pi   = df_xb
                            df_xa_pi   = df_xa
                            
                        # drop index reset index
                        
                        df_ob_pi = df_ob_pi.drop(columns='index')
                        df_ob_pi = df_ob_pi.reset_index()
                        df_ob_pi = df_ob_pi.drop(columns='index')
                        df_xb_pi = df_xb_pi.drop(columns='index')
                        df_xb_pi = df_xb_pi.reset_index()
                        df_xb_pi = df_xb_pi.drop(columns='index')
                        df_xa_pi = df_xa_pi.drop(columns='index')
                        df_xa_pi = df_xa_pi.reset_index()
                        df_xa_pi = df_xa_pi.drop(columns='index')
                        df_xa_pi   = df_xa_pi[df_xa_pi.columns[reconi*nens:(reconi+1)*nens]]
                        df_xa_pi.columns = range(df_xa_pi.shape[1])
                        
                        # show 1 data
                        df_ob_pi_all = df_ob_pi[data_period_id_i]
                        df_xb_pi_all = df_xb_pi.mean(axis=1)
                        df_xa_pi_all = df_xa_pi.mean(axis=1)
                        
                        #df_xb_pi_all = df_xb_pi.mask(df_xb_pi.eq(np.nan)).mean(axis=1)
                        #df_xa_pi_all = df_xa_pi.mask(df_xa_pi.eq(np.nan)).mean(axis=1)
                        if log_level > 5:
                            print('df_ob_pi')
                            print(df_ob_pi)
                            print('df_xb_pi')
                            #print(df_xb_pi)
                            print('df_xa_pi')
                            #print(df_xa_pi)
                            print('df_ob_pi_all')
                            print(df_ob_pi_all)
                            print('df_xb_pi_all')
                            print(df_xb_pi_all)
                            print('df_xa_pi_all')
                            print(df_xa_pi_all)
                            #print(len(df_xa_pi_all))
                        
                        df_ob_pi_mean = pandas.Series.mean(df_ob_pi[data_period_id_i])
                        df_ob_pi_std  = pandas.Series.std(df_ob_pi[data_period_id_i])
                        #if proxy_i == 'caco3':
                        #    df_ob_pi_mean = list(df_ob_pi_mean)
                        #    df_ob_pi_std = list(df_ob_pi_std)
                        
                        if log_level > 3:
                            print('df_ob_pi_mean')
                            print(df_ob_pi_mean)
                            print('df_ob_pi_std')
                            print(df_ob_pi_std)

                        df_ob_pi_zscore = (df_ob_pi[data_period_id_i] - df_ob_pi_mean) / df_ob_pi_std
                        df_xb_pi_zscore = (df_xb_pi_all - df_ob_pi_mean) / df_ob_pi_std
                        df_xa_pi_zscore = (df_xa_pi_all - df_ob_pi_mean) / df_ob_pi_std
                        
                        if log_level > 5:
                            print('>>>>> ')
                            print('df_ob_pi_zscore')
                            print(df_ob_pi_zscore)
                            print('df_xb_pi_zscore')
                            print(df_xb_pi_zscore)
                            print('df_xa_pi_zscore')
                            print(df_xa_pi_zscore)
                            
                        MCnn = len(df_xa_pi_all)
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['proxy']] = proxy_i
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['reconi']] = data_period_id_i
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['loc']] = locRadi
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['proxy_frac']] = proxy_fraci
                        for MCii in range(MCnn): df_zscore_j.loc[MCii, ['Rscale']] = Rscale
                        
                        df_zscore_j['xb'] = df_xb_pi_all
                        df_zscore_j['ob'] = df_ob_pi_all
                        df_zscore_j['xa'] = df_xa_pi_all

                        df_zscore_j['xb_zscore'] = df_xb_pi_zscore
                        df_zscore_j['ob_zscore'] = df_ob_pi_zscore
                        df_zscore_j['xa_zscore'] = df_xa_pi_zscore
                        
                        if log_level > 5:
                            print('>>>>> ')
                            print('df_zscore_j')
                            print(df_zscore_j)
                            
                        df_zscore = pandas.concat([df_zscore,df_zscore_j])

                        if log_level > 5:
                            print('df_eval_pi')
                            print('df_ob_pi')
                            print('df_xb_pi[:][0:6]')
                            print('df_xa_pi')

                            print(df_eval_pi)
                            print(df_ob_pi)
                            print(df_xb_pi[:][0:6])
                            print(df_xa_pi)

                    if log_level > 5:
                        print('df_zscore:')
                        print(df_zscore)


                    rmse_xb = DeepDA_psm.rmse(df_zscore['ob_zscore'],df_zscore['xb_zscore'])
                    
                    if log_level > 1:
                        print('RMSE of Ob vs. Xb {}'.format(rmse_xb))

                    CE_xb = DeepDA_psm.CE_NS70(df_zscore['ob_zscore'],df_zscore['xb_zscore'],1)
                    if log_level > 1:
                        print('CE of Ob vs. Xb {}'.format(CE_xb))

                    a=ma.masked_invalid(df_zscore['ob_zscore'])
                    b=ma.masked_invalid(df_zscore['xb_zscore'])
                    msk = (~a.mask & ~b.mask)
                    cor_matrix = ma.corrcoef(a[msk],b[msk])
                    r_2_xb = cor_matrix[0,1]**2

                    if log_level > 1:
                        print('r^2 of Ob vs. Xb {}'.format(r_2_xb))
                        print('')


                    rmse_xa = DeepDA_psm.rmse(df_zscore['ob_zscore'],df_zscore['xa_zscore'])
                    
                    if log_level > 1:
                        print('RMSE of Ob vs. Xa {}'.format(rmse_xa))
                    
                    CE_xa = DeepDA_psm.CE_NS70(df_zscore['ob_zscore'],df_zscore['xa_zscore'],1)
                    
                    if log_level > 1:
                        print('CE of Ob vs. Xb {}'.format(CE_xa))
                    
                    a=ma.masked_invalid(df_zscore['ob_zscore'])
                    b=ma.masked_invalid(df_zscore['xa_zscore'])
                    msk = (~a.mask & ~b.mask)
                    cor_matrix = ma.corrcoef(a[msk],b[msk])
                    r_2_xa = cor_matrix[0,1]**2
                    
                    if log_level > 1:
                        print('r^2 of Ob vs. Xa {}'.format(r_2_xa))
                        print(' --- ')
                        print('')
                    # delta RMSE, CE, and R^2
                    drmse = 100 * (rmse_xb - rmse_xa)/rmse_xb
                    dce   = 100 * (CE_xb - CE_xa)/CE_xb
                    dr2   = 100 * (r_2_xb - r_2_xa)/r_2_xb
                    
                    df_reconi = pandas.DataFrame({'reconi':reconi,
                                                  'data_period_id_i':data_period_id_i,
                                                  'loc':locRadv,
                                                  'proxy_frac':proxy_frac,
                                                  'Rscale':Rscale,
                                                  'RMSE Xb':rmse_xb,
                                                  'RMSE Xa':rmse_xa,
                                                  'dRMSE':drmse,
                                                  'CE Xb':CE_xb,
                                                  'CE Xa': CE_xa,
                                                  'dCE':dce,
                                                  'R^2 Xb':r_2_xb,
                                                  'R^2 Xa': r_2_xa,
                                                  'dR^2':dr2}, index=[df_ind_i])

                    df_evaluation = pandas.concat([df_evaluation,df_reconi])

                    df_ind_i += 1
                    
                    df_zscore_mc = pandas.concat([df_zscore_mc,df_zscore])
                    
                # all slice zscore
                df_zscore_slice = pandas.DataFrame()
                for proxy_j in range(pn):
                    
                    df_zscore_j_slice = pandas.DataFrame()
                    
                    proxy_i = Typelist[proxy_j].lower()
                    print('proxy i {}'.format(proxy_i))
                    
                    df_proxy_j_slice   = df_zscore_mc[df_zscore_mc['proxy'] == proxy_i]
                    
                    
                    df_ob_slice_mean   = pandas.Series.mean(df_proxy_j_slice['ob'])
                    df_ob_slice_std    = pandas.Series.std(df_proxy_j_slice['ob'])
                
                    df_ob_slice_zscore = (df_proxy_j_slice['ob'] - df_ob_slice_mean) / df_ob_slice_std
                    df_xb_slice_zscore = (df_proxy_j_slice['xb'] - df_ob_slice_mean) / df_ob_slice_std
                    df_xa_slice_zscore = (df_proxy_j_slice['xa'] - df_ob_slice_mean) / df_ob_slice_std
                    
                    df_ob_slice_zscore = df_ob_slice_zscore.reset_index()
                    df_ob_slice_zscore = df_ob_slice_zscore.drop(columns='index')
                    df_xb_slice_zscore = df_xb_slice_zscore.reset_index()
                    df_xb_slice_zscore = df_xb_slice_zscore.drop(columns='index')
                    df_xa_slice_zscore = df_xa_slice_zscore.reset_index()
                    df_xa_slice_zscore = df_xa_slice_zscore.drop(columns='index')
                    
                    if log_level > 3:
                        print('df_ob_slice_mean')
                        print(df_ob_slice_mean)
                        print('df_ob_slice_std')
                        print(df_ob_slice_std)
                        print('df_ob_slice_zscore')
                        print(df_ob_slice_zscore)
                        print('df_xb_slice_zscore')
                        print(df_xb_slice_zscore)
                        print('df_xa_slice_zscore')
                        print(df_xa_slice_zscore)
                        
                    MCnn = len(df_proxy_j_slice['ob'])
                    
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['proxy']] = proxy_i
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['reconi']] = data_period_id_i
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['loc']] = locRadi
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['proxy_frac']] = proxy_fraci
                    for MCii in range(MCnn): df_zscore_j_slice.loc[MCii, ['Rscale']] = Rscale


                    df_zscore_j_slice['ob_zscore'] = df_ob_slice_zscore
                    df_zscore_j_slice['xb_zscore'] = df_xb_slice_zscore
                    df_zscore_j_slice['xa_zscore'] = df_xa_slice_zscore
                    df_zscore_slice = pandas.concat([df_zscore_slice,df_zscore_j_slice])
                    
                    
                    if log_level > 3:
                        print('df_proxy_j_slice')
                        print(df_proxy_j_slice)
                        
                    
                rmse_slice_xb = DeepDA_psm.rmse(df_zscore_slice['ob_zscore'],df_zscore_slice['xb_zscore'])
                CE_slice_xb = DeepDA_psm.CE_NS70(df_zscore_slice['ob_zscore'],df_zscore_slice['xb_zscore'],1)
                rmse_slice_xa = DeepDA_psm.rmse(df_zscore_slice['ob_zscore'],df_zscore_slice['xa_zscore'])
                CE_slice_xa = DeepDA_psm.CE_NS70(df_zscore_slice['ob_zscore'],df_zscore_slice['xa_zscore'],1)
                
                drmse_slice = 100 * (rmse_slice_xb - rmse_slice_xa)/rmse_slice_xb
                dce_slice   = 100 * (CE_slice_xb - CE_slice_xa)/CE_slice_xb
                
                df_reconi = pandas.DataFrame({'reconi':reconi,
                                              'data_period_id_i':'all',
                                              'loc':locRadv,
                                              'proxy_frac':proxy_frac,
                                              'Rscale':Rscale,
                                              'RMSE Xb':rmse_slice_xb,
                                              'RMSE Xa':rmse_slice_xa,
                                              'dRMSE':drmse_slice,
                                              'CE Xb':CE_slice_xb,
                                              'CE Xa': CE_slice_xa,
                                              'dCE':dce_slice,
                                              'R^2 Xb':'',
                                              'R^2 Xa': '',
                                              'dR^2':''}, index=[df_ind_i])

                df_evaluation = pandas.concat([df_evaluation,df_reconi])
                
                if log_level > 3:
                    print('')
                    print('df_zscore_slice')
                    print(df_zscore_slice)
                    
                if log_level > 1:
                    print('All slice: RMSE of Ob vs. Xb --> Ob vs. Xa === {} --> {}'.format(rmse_slice_xb, rmse_slice_xa))
                    print('           CE   of Ob vs. Xb --> Ob vs. Xa === {} --> {}'.format(CE_slice_xb, CE_slice_xa))
                # all df_sscore
                df_zscore_all = pandas.concat([df_zscore_all,df_zscore_mc])
                print(' This loop done ')

    if savesummary:
        df_zscore_all.to_csv(yml_dict['core']['proj_dir']+'/wrk/'+en+savefilename_add+'_df_zscore_all'+'.csv')
        df_evaluation.sort_index().to_csv(yml_dict['core']['proj_dir']+'/wrk/'+en+'_df_evaluation_log.csv')
        
    print('')
    print('Step #4: evaluation - Done')
    print('')
    print('All done!')


prePETMmean

proxy i d18o
df_ob_pi_mean
-3.064153317909091
df_ob_pi_std
0.7959239579907676
proxy i tex86
df_ob_pi_mean
0.717976466
df_ob_pi_std
0.11215094448335024
proxy i mgca
df_ob_pi_mean
3.71463179125
df_ob_pi_std
0.2402609413908526
proxy i caco3
df_ob_pi_mean
63.93315421181819
df_ob_pi_std
28.634923280027426
RMSE of Ob vs. Xb 2.3856014505222367
CE of Ob vs. Xb -5.220498399871826
r^2 of Ob vs. Xb 0.03911499814573178

RMSE of Ob vs. Xa 2.0249564577697265
CE of Ob vs. Xb -3.4818857401296714
r^2 of Ob vs. Xa 0.08888697313614041
 --- 


peakPETM

proxy i d18o
df_ob_pi_mean
-3.5521458219374997
df_ob_pi_std
0.6552883323622742
proxy i tex86
df_ob_pi_mean
0.8031635338571429
df_ob_pi_std
0.1494239330601818
proxy i mgca
df_ob_pi_mean
5.177309416615385
df_ob_pi_std
0.690489865102188
proxy i caco3
df_ob_pi_mean
19.42665009104762
df_ob_pi_std
27.259388865600524
RMSE of Ob vs. Xb 1.7051976181949071
CE of Ob vs. Xb -2.1271478919728737
r^2 of Ob vs. Xb 0.03958876282125485

RMSE of Ob vs. Xa 1.611