In [17]:
'''
Prepare prior for DeepDA

OUTPUT:
    example:
    prior2proxyunit hdf5 file saved: /mnt/c/Users/mul450/Dropbox/git/deepDA/mlwrk/proxy/petmproxy3slices_v0.0.10gt1.csv.hdf5

Mingsong Li
1/15/2020
'''
from DeepDA_lib import modules_nc
from DeepDA_lib import modules_psm_linear
import h5py
import time
import yaml
import numpy as np
import pandas
import os
from netCDF4 import Dataset

try:
    import bayspline
except ImportError as e1:
    print('Warning:', e1)
try:
    import bayspar
except ImportError as e2:
    print('Warning:', e2)
try:
    import bayfox
except ImportError as e3:
    print('Warning:', e3)
try:
    import baymag
except ImportError as e4:
    print('Warning:', e4)

In [18]:
dum_lon_offset = -180
dum_imax = 36  # lon
dum_jmax = 36  # lat
dum_ijmax = 36*36
nc_file ='fields_biogem_2d.nc'
nc_field = 'ocn_sur_temp'
t = 12  # last time slice, cGENIE
k = 0   # first layer, SST

In [19]:
# build Ye
# If there is no field in the model, convert model unit to proxy unit
f = open("DeepDA_config.yml", 'r')
yml_dict = yaml.load(f, Loader=yaml.FullLoader)
#print(yml_dict)
dir_prior = yml_dict['core']['prior_dir']
dir_prior_full = os.listdir(dir_prior)
prior_len = len(dir_prior_full)
#print('dir_prior: {}'.format(dir_prior))
print('Prior member size: {}'.format(prior_len))

prior_variable_dict = yml_dict['prior']['state_variables_info']
print(prior_variable_dict)

# read prior and save Xb
Xb = np.full((dum_ijmax, prior_len),np.nan)
for i in range(prior_len):
    name_nc = dir_prior+'/'+dir_prior_full[i]+'/'+nc_file
    
    x = Dataset(name_nc).variables[nc_field][t,:,:]
    Xb[:,i] = x.reshape(dum_ijmax)
    # print the last one data
    if i > prior_len-2:
        print('data_nc_field.shape {}'.format(x.shape))
        print('{}: {}'.format(i, dir_prior_full[i]))
          
#print(x)

# read config.yml settings
dir_proxies = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['datadir_proxy'] +'/'+ yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion']
proxy_psm_type = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_psm_type']
proxy_assim2 = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_assim2']
psm_d18osw_adjust = yml_dict['psm']['bayesreg_d18o_pooled']['psm_d18osw_adjust']
#print(proxy_psm_type)
#print(proxy_assim2)
#print(psm_d18osw_adjust)
data_period_id    = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_id']
data_period_idstd = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_idstd']
recon_period = yml_dict['core']['recon_period']
recon_timescale = yml_dict['core']['recon_timescale_interval']
recon_period_full = np.arange(recon_period[0],recon_period[1]+1,recon_timescale)
recon_period_len = recon_period_full.shape[0]
print('recon_period {} - {}'.format(recon_period[0], recon_period[1]))
print(recon_period_full)

# read proxies database
proxies = pandas.read_csv(dir_proxies)
proxies_len = proxies.shape[0]


# for saving proxy unit data Ye
Ye   = np.full((proxies_len,prior_len),np.nan)
Yevar= np.full((proxies_len,prior_len),np.nan)
obvalue  = np.full((proxies_len,recon_period_len),np.nan)
ob_err   = np.full((proxies_len,recon_period_len),np.nan)


# read lon lat for each line of proxy
proi = 0
for j in range(proxies_len):
    dum_lat = proxies['Lat'][j]  # (paleo)latitude of this site
    dum_lon = proxies['Lon'][j]  # (paleo)longitude of this site
    lonlat = modules_nc.cal_find_ij(dum_lon,dum_lat,dum_lon_offset,dum_imax,dum_jmax) 
    lonlati = lonlat[1] * dum_jmax + lonlat[0]
    prior_1grid = Xb[lonlati,:]   # prior
    print('Data row {}, grid [lon lat] {}'.format(j,lonlat))
    print(prior_1grid.shape)
    #print(Xb[lonlati,i])
    #print(name_nc)
    #result = np.where(Xb[:,i] == Xb[lonlati,i])
    #print(result)
    
    # Read proxy type from the database
    data_psm_type = proxies['Proxy'][j]
    # Read allowed proxy from the DTDA-config.yml
    data_psm_type_find = 0
    for key, value in proxy_assim2.items():
        #print(key,value)
        # find this proxy type exist or not, how many times it occurrs
        if data_psm_type in proxy_assim2[key]:
            data_psm_type_find = data_psm_type_find + 1
    if data_psm_type_find == 1:
        for key, value in proxy_psm_type.items():
            if data_psm_type in proxy_assim2[key]:
                data_psm_key = key
        proxy_psm_type_i = proxy_psm_type[data_psm_key]
        print('PSM for {} is {}'.format(data_psm_type,proxy_psm_type_i))
    elif data_psm_type_find == 0:
        warnings.warn('Warning, this proxy type in database is not find in DTDA-config.yml dictionary')
    else:
        warnings.warn('Warning, this proxy type in database appears more than 1 time in DTDA-config.yml dictionary')
    
    # Now PSM type has been found. Let's precal. Ye
    
    if proxy_psm_type_i in ['bayesreg_d18o_pooled']:
        # bayfox
        d18o_localsw = modules_psm_linear.d18o_localsw(abs(dum_lat))
        d18o_petm = -0.96
        # total d18osw = d18o_localsw + d18o_adj + d18o_petm
        # d18o_adj was included in the bayfox model
        prediction_d18O = bayfox.predict_d18oc(prior_1grid,d18o_localsw+d18o_petm) # pool model for bayfox
        print('prediction_d18O.ensemble shape {}'.format(prediction_d18O.ensemble.shape))
        Ye[proi,:] = np.mean(prediction_d18O.ensemble, axis = 1)
        Yevar[proi,:] = np.var(prediction_d18O.ensemble, axis = 1, ddof=1)
        for reconi in range(recon_period_len):
            obvalue[proi,reconi] = proxies[data_period_id[reconi]][j]
            ob_err[proi,reconi] = proxies[data_period_idstd[reconi]][j] ** 2
            #obvalue[proi,] = proxies['Lat'][j]
        proi = proi + 1  # increasement
        print('Ye, first example {}'.format(Ye[0,:]))
        print('Yevar, first example {}'.format(Yevar[0,:]))
    elif proxy_psm_type_i in ['bayesreg_tex86']:
        # bayfox
        a = 1
    elif proxy_psm_type_i in ['bayesreg_uk37']:
        # 
        a = 1
    elif proxy_psm_type_i in ['bayesreg_mgca_pooled_red']:
        #
        a = 1
    elif proxy_psm_type_i in ['bayesreg_mgca_pooled_bcp']:
        #
        a = 1
    else:
        a = 1
    print(np.mean(Ye,axis=1))
print('obvalue {},  ob_err {}'.format(obvalue, ob_err))

Prior member size: 150
{'temperature': ['ocn_sur_temp', 'atm_temp']}
data_nc_field.shape (36, 36)
149: ML.petm008.ID.9
recon_period 0 - 2
[0 1 2]
Data row 0, grid [lon lat] [1, 19]
(150,)
PSM for d18o_morozovella is bayesreg_d18o_pooled
prediction_d18O.ensemble shape (150, 10000)
Ye, first example [-4.21491247 -4.34884739 -3.55200177 -4.3912051  -4.32633535 -4.22906824
 -3.91420736 -3.67854976 -3.74945319 -4.26554107 -4.47401398 -3.60713219
 -2.85424131 -3.86647294 -4.35363265 -4.51578242 -3.01148947 -4.26314487
 -3.74778161 -4.22142136 -3.23524572 -3.77281422 -3.47596401 -3.66569059
 -2.88688534 -4.34192644 -3.12207068 -3.54007434 -4.16236949 -4.34882206
 -4.28923961 -4.11061838 -3.92935883 -3.41526207 -3.59491905 -3.02813932
 -4.38857508 -3.82311589 -3.49437742 -4.03432769 -3.20954621 -4.33858871
 -3.48244445 -3.55595494 -4.35166332 -4.30757883 -3.1576935  -3.79807317
 -2.98032177 -4.27231158 -3.85754837 -3.83416948 -3.91776238 -3.36248808
 -3.30159807 -4.01259052 -4.10562691 -3.5239

In [20]:
hdf5name = dir_proxies + '.hdf5'
with h5py.File(hdf5name, 'w') as f:
    #g = f.create_group('proxy')
    #dset1 = g.create_dataset('data', data=proxies)
    #g = f.create_group('prior2proxyunit')
#    g = f.create_group('ML.petm004.SST')
    f.create_dataset('Xb', data=Xb)
    f.create_dataset('obvalue', data=obvalue)
    f.create_dataset('Ye', data=np.transpose(Ye))
    f.create_dataset('Yevar', data=np.transpose(Yevar))
    f.create_dataset('ob_err', data=ob_err)
    
    #dset1 = g.create_dataset('Xb', data=Xb)
    #dset2 = g.create_dataset('obvalue', data=obvalue)
    #dset3 = g.create_dataset('Ye', data=Ye)
    #dset4 = g.create_dataset('Yevar', data=Yevar)
    #dset5 = g.create_dataset('ob_err', data=ob_err)

    metadata = {'Date': time.time(),
                'proxy_dbversion':yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion'],
                'exp_dir':yml_dict['core']['prior_dir'],
               'nc_file':nc_file,
               'nc_field': nc_field,
               'Nens':str(prior_len)}
    f.attrs.update(metadata)
print('prior2proxyunit hdf5 file saved: {}'.format(hdf5name))
print('Step 1 finished. Run Step 2: DeepDA_main.ipynb now')

prior2proxyunit hdf5 file saved: /mnt/c/Users/mul450/Dropbox/git/deepDA/mlwrk/proxy/petmproxy3slices_v0.0.10gt1test.csv.hdf5
Step 1 finished. Run Step 2: DeepDA_main.ipynb now


In [21]:
print(Xb.shape)

(1296, 150)
