In [126]:
'''
Prepare prior for DTDA

OUTPUT:
    example:
    prior2proxyunit hdf5 file saved: /mnt/c/Users/mul450/Dropbox/git/petmda/code/DTDA/mlwrk/proxy/petmproxy3slices_v0.0.10gt1.csv.hdf5

Mingsong Li
1/15/2020
'''
from DTDAlib import modules_nc
from DTDAlib import modules_find_layer #.find_layer as find_layer
from DTDAlib import modules_psm_linear
import h5py
import time
import yaml
import numpy as np
import numpy.ma as ma
import numpy.matlib as mat
import pandas
import os
import warnings
from sys import platform as sys_pf
import matplotlib
import matplotlib.pyplot as plt
from netCDF4 import Dataset
if sys_pf == 'darwin':
    matplotlib.use("TkAgg")
    from matplotlib.mlab import griddata
%matplotlib inline

from mpl_toolkits.basemap import Basemap

try:
    import bayspline
except ImportError as e1:
    print('Warning:', e1)
try:
    import bayspar
except ImportError as e2:
    print('Warning:', e2)
try:
    import bayfox
except ImportError as e3:
    print('Warning:', e3)
try:
    import baymag
except ImportError as e4:
    print('Warning:', e4)

In [127]:
dum_lon_offset = -180
dum_imax = 36  # lon
dum_jmax = 36  # lat
dum_ijmax = 36*36
nc_file ='fields_biogem_2d.nc'
nc_field = 'ocn_sur_temp'
t = 12  # last time slice, cGENIE
k = 0   # first layer, SST

In [128]:
# build Ye
# If there is no field in the model, convert model unit to proxy unit
f = open("DTDA-config.yml", 'r')
yml_dict = yaml.load(f, Loader=yaml.FullLoader)
#print(yml_dict)
dir_prior = yml_dict['core']['prior_dir']
dir_prior_full = os.listdir(dir_prior)
prior_len = len(dir_prior_full)
#print('dir_prior: {}'.format(dir_prior))
print('Prior member size: {}'.format(prior_len))

# read Xb
Xb = np.full((dum_ijmax, prior_len),np.nan)
for i in range(prior_len):
    name_nc = dir_prior+'/'+dir_prior_full[i]+'/'+nc_file
    x = Dataset(name_nc).variables[nc_field][t,:,:]
    Xb[:,i] = x.reshape(dum_ijmax)
    if i > prior_len-2:
        print('data_nc_field.shape {}'.format(data_nc_field.shape))
        print('{}: {}'.format(i, dir_prior_full[i]))
          
#print(x)

# read config.yml settings
dir_proxies = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['datadir_proxy'] +'/'+ yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion']
proxy_psm_type = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_psm_type']
proxy_assim2 = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_assim2']
psm_d18osw_adjust = yml_dict['psm']['bayesreg_d18o_pooled']['psm_d18osw_adjust']
#print(proxy_psm_type)
#print(proxy_assim2)
#print(psm_d18osw_adjust)
data_period_id    = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_id']
data_period_idstd = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_idstd']
recon_period = yml_dict['core']['recon_period']
recon_timescale = yml_dict['core']['recon_timescale_interval']
recon_period_full = np.arange(recon_period[0],recon_period[1]+1,recon_timescale)
recon_period_len = recon_period_full.shape[0]
print('recon_period {} - {}'.format(recon_period[0], recon_period[1]))
print(recon_period_full)

# for saving proxy unit data Ye
Ye   = np.full((proxies_len,prior_len),np.nan)
Yevar= np.full((proxies_len,prior_len),np.nan)
obvalue  = np.full((proxies_len,recon_period_len),np.nan)
ob_err   = np.full((proxies_len,recon_period_len),np.nan)

# read proxies database
proxies = pandas.read_csv(dir_proxies)
proxies_len = proxies.shape[0]
# read lon lat for each line of proxy
proi = 0
for j in range(proxies_len):
    dum_lat = proxies['Lat'][j]  # (paleo)latitude of this site
    dum_lon = proxies['Lon'][j]  # (paleo)longitude of this site
    lonlat = modules_nc.cal_find_ij(dum_lon,dum_lat,dum_lon_offset,dum_imax,dum_jmax) 
    lonlati = lonlat[1] * dum_jmax + lonlat[0]
    prior_1grid = Xb[lonlati,:]   # prior
    print('Data row {}, grid [lon lat] {}'.format(j,lonlat))
    print(prior_1grid.shape)
    #print(Xb[lonlati,i])
    #print(name_nc)
    #result = np.where(Xb[:,i] == Xb[lonlati,i])
    #print(result)
    
    # Read proxy type from the database
    data_psm_type = proxies['Proxy'][j]
    # Read allowed proxy from the DTDA-config.yml
    data_psm_type_find = 0
    for key, value in proxy_assim2.items():
        #print(key,value)
        # find this proxy type exist or not, how many times it occurrs
        if data_psm_type in proxy_assim2[key]:
            data_psm_type_find = data_psm_type_find + 1
    if data_psm_type_find == 1:
        for key, value in proxy_psm_type.items():
            if data_psm_type in proxy_assim2[key]:
                data_psm_key = key
        proxy_psm_type_i = proxy_psm_type[data_psm_key]
        print('PSM for {} is {}'.format(data_psm_type,proxy_psm_type_i))
    elif data_psm_type_find == 0:
        warnings.warn('Warning, this proxy type in database is not find in DTDA-config.yml dictionary')
    else:
        warnings.warn('Warning, this proxy type in database appears more than 1 time in DTDA-config.yml dictionary')
    
    # Now PSM type has been found. Let's precal. Ye
    
    if proxy_psm_type_i in ['bayesreg_d18o_pooled']:
        # bayfox
        d18o_localsw = modules_psm_linear.d18o_localsw(abs(dum_lat))
        d18o_petm = -0.96
        # total d18osw = d18o_localsw + d18o_adj + d18o_petm
        # d18o_adj was included in the bayfox model
        prediction_d18O = bayfox.predict_d18oc(prior_1grid,d18o_localsw+d18o_petm) # pool model for bayfox
        print('prediction_d18O.ensemble shape {}'.format(prediction_d18O.ensemble.shape))
        Ye[proi,:] = np.mean(prediction_d18O.ensemble, axis = 1)
        Yevar[proi,:] = np.var(prediction_d18O.ensemble, axis = 1, ddof=1)
        for reconi in range(recon_period_len):
            obvalue[proi,reconi] = proxies[data_period_id[reconi]][j]
            ob_err[proi,reconi] = proxies[data_period_idstd[reconi]][j] ** 2
            #obvalue[proi,] = proxies['Lat'][j]
        proi = proi + 1  # increasement
        print('proxyunit, first example {}'.format(proxyunit[0,:]))
        print('proxyunitvar, first example {}'.format(proxyunitvar[0,:]))
    elif proxy_psm_type_i in ['bayesreg_tex86']:
        # bayfox
        a = 1
    elif proxy_psm_type_i in ['bayesreg_uk37']:
        # 
        a = 1
    elif proxy_psm_type_i in ['bayesreg_mgca_pooled_red']:
        #
        a = 1
    elif proxy_psm_type_i in ['bayesreg_mgca_pooled_bcp']:
        #
        a = 1
    else:
        a = 1
    print(np.mean(Ye,axis=1))
print('obvalue {},  ob_err {}'.format(obvalue, ob_err))

Prior member size: 150
data_nc_field.shape (13, 36, 36)
149: ML.petm008.ID.9
recon_period 0 - 2
[0 1 2]
Data row 0, grid [lon lat] [1, 19]
(150,)
PSM for d18o_morozovella is bayesreg_d18o_pooled
prediction_d18O.ensemble shape (150, 10000)
proxyunit, first example [-4.23047827 -4.35463506 -3.56001233 -4.37945786 -4.32447638 -4.21580146
 -3.9219248  -3.66355983 -3.75458497 -4.27848282 -4.47326729 -3.59785795
 -2.85811589 -3.86614639 -4.36124125 -4.52260659 -3.01327014 -4.25473419
 -3.74870828 -4.21922924 -3.22347101 -3.7777343  -3.47914186 -3.66876534
 -2.88555977 -4.34038097 -3.1276865  -3.54214677 -4.15753814 -4.33106925
 -4.28918138 -4.09955505 -3.9214062  -3.41267199 -3.59174912 -3.0413636
 -4.38548272 -3.83151089 -3.48584235 -4.03750821 -3.21567955 -4.32220578
 -3.48210728 -3.55196633 -4.35860204 -4.31338074 -3.15786278 -3.79629933
 -2.98677663 -4.26087285 -3.86187068 -3.84086108 -3.91691857 -3.3711626
 -3.29105243 -4.00421594 -4.10622602 -3.51823744 -3.67215698 -4.14778366
 -4.0628

In [129]:
hdf5name = dir_proxies + '.hdf5'
with h5py.File(hdf5name, 'w') as f:
    #g = f.create_group('proxy')
    #dset1 = g.create_dataset('data', data=proxies)
    #g = f.create_group('prior2proxyunit')
#    g = f.create_group('ML.petm004.SST')
    f.create_dataset('Xb', data=Xb)
    f.create_dataset('obvalue', data=obvalue)
    f.create_dataset('Ye', data=np.transpose(Ye))
    f.create_dataset('Yevar', data=np.transpose(Yevar))
    f.create_dataset('ob_err', data=ob_err)
    
    #dset1 = g.create_dataset('Xb', data=Xb)
    #dset2 = g.create_dataset('obvalue', data=obvalue)
    #dset3 = g.create_dataset('Ye', data=Ye)
    #dset4 = g.create_dataset('Yevar', data=Yevar)
    #dset5 = g.create_dataset('ob_err', data=ob_err)

    metadata = {'Date': time.time(),
                'proxy_dbversion':yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion'],
                'exp_dir':yml_dict['core']['prior_dir'],
               'nc_file':nc_file,
               'nc_field': nc_field,
               'Nens':str(prior_len)}
    f.attrs.update(metadata)
print('prior2proxyunit hdf5 file saved: {}'.format(hdf5name))

prior2proxyunit hdf5 file saved: /mnt/c/Users/mul450/Dropbox/git/petmda/code/DTDA/mlwrk/proxy/petmproxy3slices_v0.0.10gt1test.csv.hdf5


In [119]:
print(Xb.shape)

(1296, 150)


In [None]:
obvalue [[4.85 4.85 4.85]],  ob_err [[ 0.247  0.159 -0.292]]