In [1]:
'''
Prepare prior for DeepDA

OUTPUT:
    example:
    prior2proxyunit hdf5 file saved: /mnt/c/Users/mul450/Dropbox/git/deepDA/mlwrk/proxy/petmproxy3slices_v0.0.10gt1.csv.hdf5

Mingsong Li
1/15/2020
'''
from DeepDA_lib import modules_nc
from DeepDA_lib import modules_psm_linear
import h5py
import time
import yaml
import numpy as np
import pandas
import os
from netCDF4 import Dataset

try:
    import bayspline
except ImportError as e1:
    print('Warning:', e1)
try:
    import bayspar
except ImportError as e2:
    print('Warning:', e2)
try:
    import bayfox
except ImportError as e3:
    print('Warning:', e3)
try:
    import baymag
except ImportError as e4:
    print('Warning:', e4)

In [2]:
f = open("DeepDA_config.yml", 'r')
yml_dict = yaml.load(f, Loader=yaml.FullLoader)
f.close()

t = 12  # last time slice, cGENIE
k = 0   # first layer, SST

In [3]:

# read config.yml settings

########## Proxy + PSM #########
dir_proxy = yml_dict['core']['proxy_dir']
dir_proxy_data = dir_proxy +'/'+ yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion']
dir_proxy_save = yml_dict['core']['wrkdir'] + '/'+ yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion']
proxy_psm_type = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_psm_type']
proxy_assim2 = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['proxy_assim2']
psm_d18osw_adjust = yml_dict['psm']['bayesreg_d18o_pooled']['psm_d18osw_adjust']
# read proxies database
proxies = pandas.read_csv(dir_proxy_data)
proxies_len = proxies.shape[0]

#print(proxy_psm_type)
#print(proxy_assim2)
#print(psm_d18osw_adjust)

########## Reconstruction #########
nexp = yml_dict['core']['nexp']
data_period_id    = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_id']
data_period_idstd = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['data_period_idstd']
recon_period = yml_dict['core']['recon_period']
recon_timescale = yml_dict['core']['recon_timescale_interval']
recon_period_full = np.arange(recon_period[0],recon_period[1]+1,recon_timescale)
recon_period_len = recon_period_full.shape[0]
geologic_age = yml_dict['core']['geologic_age']
print('>>  recon_period {} - {}. List: '.format(recon_period[0], recon_period[1]))
print('      {}'.format(recon_period_full))

########## Prior #########
prior_source = yml_dict['prior']['prior_source'] #
prior_state_variable = yml_dict['prior'][prior_source]['state_variable']  # note: ['2d': xxx; '3d': xxx]

# save prior variable list
prior_variable_dict = []  # variable list
prior_nc_file_list = []  # nc file list
prior_variable_dict_3d = []  # variable list
prior_nc_file_list_3d = []  # nc file list

for key, value in prior_state_variable.items():
    nc_keyvalue = prior_state_variable[key]['ncname']  # note: 2d dict
    
    print('nc_keyvalue {}...'.format(nc_keyvalue))
    for key1, value1 in nc_keyvalue.items():
        print('{}: {}'.format(key1,value1))
        
        for i in range(len(prior_state_variable[key][value1])):
            if key in ['2d']:
                prior_variable_dict.append(prior_state_variable[key][value1][i])
                prior_nc_file_list.append(key1+'/'+value1+'.nc')
            elif key in ['3d']:
                prior_variable_dict_3d.append(prior_state_variable[key][value1][i])
                prior_nc_file_list_3d.append(key1+'/'+value1+'.nc')

dum_lon_offset = yml_dict['prior'][prior_source]['dum_lon_offset'] # longitude offset

########  Prior read   ########
dir_prior = yml_dict['core']['prior_dir']
dir_prior_full = os.listdir(dir_prior)
prior_len = len(dir_prior_full)
#print('dir_prior: {}'.format(dir_prior))
print('>>  Prior member size: {}'.format(prior_len))

# prepare variable list for Xb
prior_variable2d_len = len(prior_variable_dict)
prior_variable3d_len = len(prior_variable_dict_3d)
print('>>  Number of 2d prior variables is: {}. List:'.format(prior_variable2d_len))
print('      {}'.format(prior_variable_dict))
print('>>  Prior nc file list {}'.format(prior_nc_file_list))
print('>>  Number of 3d prior variables is: {}. List:'.format(prior_variable3d_len))
print('      {}'.format(prior_variable_dict_3d))
print('>>  Prior nc file list {}'.format(prior_nc_file_list_3d))

######## Ye   ########
# for saving proxy unit data Ye
Ye       = np.full((proxies_len,prior_len),np.nan)
Yevar    = np.full((proxies_len,prior_len),np.nan)
obvalue  = np.full((proxies_len,recon_period_len),np.nan)
ob_err   = np.full((proxies_len,recon_period_len),np.nan)

print('>>  OKAY.')

>>  recon_period 0 - 2. List: 
      [0 1 2]
nc_keyvalue {'biogem': 'fields_biogem_2d'}...
biogem: fields_biogem_2d
nc_keyvalue {'biogem': 'fields_biogem_3d'}...
biogem: fields_biogem_3d
>>  Prior member size: 150
>>  Number of 2d prior variables is: 3. List:
      ['ocn_sur_temp', 'atm_temp', 'ocn_ben_DIC_13C']
>>  Prior nc file list ['biogem/fields_biogem_2d.nc', 'biogem/fields_biogem_2d.nc', 'biogem/fields_biogem_2d.nc']
>>  Number of 3d prior variables is: 1. List:
      ['ocn_temp']
>>  Prior nc file list ['biogem/fields_biogem_3d.nc']
>>  OKAY.


In [13]:
for j in range(proxies_len):
    # Read proxy type from the database
    data_psm_type = proxies['Proxy'][j]
    # Read allowed proxy from the DTDA-config.yml
    data_psm_type_find = 0
    for key, value in proxy_assim2.items():
        #print(key,value)
        # find this proxy type exist or not, how many times it occurrs
        if data_psm_type in proxy_assim2[key]:
            data_psm_type_find = data_psm_type_find + 1
    if data_psm_type_find == 1:
        for key, value in proxy_psm_type.items():
            if data_psm_type in proxy_assim2[key]:
                data_psm_key = key
        proxy_psm_type_i = proxy_psm_type[data_psm_key]
        print('PSM for {} is {}'.format(data_psm_type,proxy_psm_type_i))
        
    elif data_psm_type_find == 0:
        print('Warning, {} in database is not find in DTDA-config.yml dictionary'.format(data_psm_type))
    else:
        print('Warning, {} in database appears more than 1 time in DTDA-config.yml dictionary'.format(data_psm_type))
    
    # Now PSM type has been found. Let's precal Ye
    
    if proxy_psm_type_i in ['bayesreg_mgca_pooled_red','bayesreg_mgca_pooled_bcp']:
        data_psm_mgca_find = 1
        print('MgCa proxy found')

PSM for tex86 is bayesreg_tex86
PSM for d18o_m.subb is bayesreg_d18o_pooled
PSM for mgca_m.subb:barker is bayesreg_mgca_pooled_bcp
MgCa proxy found
PSM for mgca_acarinina:barker is bayesreg_mgca_pooled_bcp
MgCa proxy found
PSM for mgca_m.subb:barker is bayesreg_mgca_pooled_bcp
MgCa proxy found
PSM for d18o_morozovella is bayesreg_d18o_pooled
PSM for mgca_m.subbotinae:barker is bayesreg_mgca_pooled_bcp
MgCa proxy found
PSM for mgca_m.velascoensis:barker is bayesreg_mgca_pooled_bcp
MgCa proxy found
PSM for mgca_morozovella:barker is bayesreg_mgca_pooled_bcp
MgCa proxy found
PSM for mgca_acarinina:barker is bayesreg_mgca_pooled_bcp
MgCa proxy found
PSM for tex86 is bayesreg_tex86
PSM for tex86 is bayesreg_tex86
PSM for mgca_morozovella:reductive is bayesreg_mgca_pooled_red
MgCa proxy found
PSM for mgca_acarinina:reductive is bayesreg_mgca_pooled_red
MgCa proxy found
PSM for tex86 is bayesreg_tex86
PSM for d18o_acarinina is bayesreg_d18o_pooled
PSM for d18o_morozovella is bayesreg_d18o_poo

In [5]:
# build Ye
# If there is no field in the model, convert model unit to proxy unit
print('>>  Reading prior state variables')
# read first variable data, first time slice, to get the shape of prior grid
try:
    #x0 = Dataset(dir_prior+'/'+dir_prior_full[0]+'/'+ nc_file_2d).variables[prior_variable_dict[0]][0,:,:]
    x1 = Dataset(dir_prior+'/'+dir_prior_full[0]+'/'+ prior_nc_file_list_3d[0]).variables[prior_variable_dict_3d[0]][0,:,:,:]
    #print('    Shape of prior 2d grid {}'.format(x0.shape))
    dum_dmax = x1.shape[0] # depth
    dum_imax = x1.shape[1]  # lon
    dum_jmax = x1.shape[2]  # lat
except:
    try:
        x0 = Dataset(dir_prior+'/'+dir_prior_full[0]+'/'+ prior_nc_file_list[0]).variables[prior_variable_dict[0]][0,:,:]
        dum_imax = x1.shape[0]  # lon
        dum_jmax = x1.shape[1]  # lat
        dum_dmax = 16
    except:
        dum_dmax = 16
        dum_imax = 36
        dum_jmax = 36
# prepare 2d Xb for lon-lat state 
dum_ijmax = dum_imax*dum_jmax  # lonn * latn
if prior_variable2d_len>0:
    Xb_shape = (dum_ijmax*prior_variable2d_len, prior_len)  # lonn * latn * varn
    Xb   = np.full(Xb_shape,np.nan)
# prep 3d version of Xb
if prior_variable3d_len > 0:
    Xb3d_shape = (dum_ijmax*dum_dmax*prior_variable3d_len, prior_len)  # lonn * latn * varn
    Xb3d = np.full(Xb3d_shape,np.nan)
    # read prior and save Xb
    #Xb = np.full((dum_ijmax, prior_len),np.nan)

if data_psm_mgca_find == 1:
    print('>>  Prepare Mg/Ca related state variable ...')
    # for Mg/Ca SST proxy salinity, ph, omega
    Xb_sal       = np.full(Xb_shape,np.nan)
    Xb_ph        = np.full(Xb_shape,np.nan)
    Xb_omega     = np.full(Xb_shape,np.nan)
    spp = 'all'
    # ``1`` for reductive, ``0`` for BCP (Barker).
    cleaningr = np.tile(np.array([1]),prior_len)
    cleaningb = np.tile(np.array([0]),prior_len)
    
# loop for each member of a prior
for i in range(prior_len):
    # loop for each variable of each member
    if prior_variable2d_len>0:
        for j in range(prior_variable2d_len):
            # full directory of netcdf file
            name_nc_2d = dir_prior+'/'+dir_prior_full[i]+'/'+ prior_nc_file_list[j]
            j0 = dum_ijmax * j
            j1 = dum_ijmax * (j+1)
            nc_field = prior_variable_dict[j]
            x = Dataset(name_nc_2d).variables[nc_field][t,:,:]

            Xb[j0:j1,i] = np.copy(x.reshape(dum_ijmax))
            
            if data_psm_mgca_find == 1:
                try:
                    name_nc_2d_mgca = dir_prior+'/'+dir_prior_full[i]+'/biogem/'+ 'fields_biogem_2d.nc'
                    x = Dataset(name_nc_2d_mgca).variables['ocn_sur_sal'][t,:,:]
                    Xb_sal[j0:j1,i] = np.copy(x.reshape(dum_ijmax))
                    name_nc_3d_mgca = dir_prior+'/'+dir_prior_full[i]+'/biogem/'+ 'fields_biogem_3d.nc'
                    x = Dataset(name_nc_3d_mgca).variables['misc_pH'][t,k,:,:]
                    Xb_ph[j0:j1,i] = np.copy(x.reshape(dum_ijmax))
                    x = Dataset(name_nc_3d_mgca).variables['carb_ohm_cal'][t,k,:,:]
                    Xb_omega[j0:j1,i] = np.copy(x.reshape(dum_ijmax))
                except:
                    if i == 0:
                        # warning one time
                        print('>>  Warning: reading state variable error. ocn_sur_sal, misc_pH, carb_ohm_cal')

            # print the last one data
            if i > prior_len-2:
                print('>>  Last member: x.shape {}'.format(x.shape))
                print('      {}: {}: {}'.format(i, dir_prior_full[i], prior_variable_dict[j]))
    # if 3d variables are used
    if prior_variable3d_len > 0:
        for k in range(prior_variable3d_len):
            name_nc_3d = dir_prior+'/'+dir_prior_full[i]+'/'+ prior_nc_file_list_3d[k]
            nc_field = prior_variable_dict_3d[k]
            k0 = dum_ijmax*dum_dmax * k
            k1 = dum_ijmax*dum_dmax * (k+1)
            x = Dataset(name_nc_3d).variables[nc_field][t,:,:,:]  # depth-lon-lat
            x = np.swapaxes(x,0,2)  # lat-lon-depth
            x = np.swapaxes(x,0,1)  # lon-lat-depth
            Xb3d[k0:k1,i] = np.copy(x.reshape(dum_ijmax*dum_dmax))
            

    #print(x)
print('>>  OKAY. Xb ready, to be saved')

>>  Reading prior state variables
>>  Last member: x.shape (36, 36)
      149: ML.petm005.ID.9: ocn_sur_temp
>>  Last member: x.shape (36, 36)
      149: ML.petm005.ID.9: atm_temp
>>  Last member: x.shape (36, 36)
      149: ML.petm005.ID.9: ocn_ben_DIC_13C
>>  OKAY. Xb ready, to be saved


In [12]:
# precal_Ye

proi = 0
for j in range(proxies_len):
    # read lon lat for each line of proxy
    dum_lat = proxies['Lat'][j]  # (paleo)latitude of this site
    dum_lon = proxies['Lon'][j]  # (paleo)longitude of this site
    lonlat = modules_nc.cal_find_ij(dum_lon,dum_lat,dum_lon_offset,dum_imax,dum_jmax) 
    Filei = proxies['File'][j]
    ######################## TO DO: including d13C or other proxies ##############
    # find 1d grid location
    lonlati = lonlat[1] * dum_jmax + lonlat[0]
    # read prior
    prior_1grid = np.copy(Xb[lonlati,:])   # prior
    
    #print(prior_1grid.shape)
    ######################## TO DO: add  dum_ijmax * j etc. ##############
    
    #print(Xb[lonlati,i])
    #result = np.where(Xb[:,i] == Xb[lonlati,i])
    #print(result)
    
    # Read proxy type from the database
    data_psm_type = proxies['Proxy'][j]
    # Read allowed proxy from the DTDA-config.yml
    data_psm_type_find = 0
    for key, value in proxy_assim2.items():
        #print(key,value)
        # find this proxy type exist or not, how many times it occurrs
        if data_psm_type in proxy_assim2[key]:
            data_psm_type_find = data_psm_type_find + 1
            
    if data_psm_type_find == 1:
        for key, value in proxy_psm_type.items():
            if data_psm_type in proxy_assim2[key]:
                data_psm_key = key
        proxy_psm_type_i = proxy_psm_type[data_psm_key]
        print('')
        print('>>  Data row {}: {}, grid [lon lat] {}, grid id {}'.format(j,Filei,lonlat,lonlati))
        print('>>  PSM for {} is {}, prior mean is {}'.format(data_psm_type,proxy_psm_type_i, np.mean(prior_1grid)))
        
    elif data_psm_type_find == 0:
        print('Warning, this proxy type in database is not find in DTDA-config.yml dictionary')
    else:
        print('Warning, this proxy type in database appears more than 1 time in DTDA-config.yml dictionary')
    
    
    # Now PSM type has been found. Let's precal Ye
    
    if proxy_psm_type_i in ['bayesreg_d18o_pooled']:
        try:
            # bayfox
            d18o_localsw = modules_psm_linear.d18o_localsw(abs(dum_lat))
            psm_d18osw_adjust = yml_dict['psm']['bayesreg_d18o_pooled']['psm_d18osw_adjust']
            # total d18osw = d18o_localsw + d18o_adj + psm_d18osw_adjust
            # d18o_adj has been included in the bayfox model
            prediction_d18O = bayfox.predict_d18oc(prior_1grid,d18o_localsw+psm_d18osw_adjust) # pool model for bayfox
            #print('>>  prediction_d18O.ensemble shape {}'.format(prediction_d18O.ensemble.shape))
            Ye[proi,:] = np.mean(prediction_d18O.ensemble, axis = 1)
            Yevar[proi,:] = np.var(prediction_d18O.ensemble, axis = 1, ddof=1)
            for reconi in range(recon_period_len):
                obvalue[proi,reconi] = proxies[data_period_id[reconi]][j]
                ob_err[proi,reconi] = proxies[data_period_idstd[reconi]][j] ** 2
                #obvalue[proi,] = proxies['Lat'][j]
            #print('>>  {}'.format(proxy_psm_type_i))
            #print('>>  id {}, Ye, first 10 example {}'.format(proi,Ye[j,0:10]))
            #print('>>  id {}, Yevar, first 10 example {}'.format(proi,Yevar[j,0:10]))
            print('>>  mean of Ye is {}, variance is {} '.format(np.mean(Ye[proi,:]), np.var(Yevar[proi,:])))
            proi = proi + 1  # increasement
        except:
            print('>>  Warning {}'.format(proxy_psm_type_i))
    elif proxy_psm_type_i in ['bayesreg_tex86']:
        # bayspar
        try:
            # bayspar
            search_tol_i = yml_dict['psm']['bayesreg_tex86']['search_tol']
            nens_i = yml_dict['psm']['bayesreg_tex86']['nens']
            prediction = bayspar.predict_tex_analog(prior_1grid, temptype = 'sst', search_tol = search_tol_i, nens=nens_i)
            Ye[proi,:] = np.mean(prediction.ensemble, axis = 1)
            Yevar[proi,:] = np.var(prediction.ensemble, axis = 1, ddof=1)
            for reconi in range(recon_period_len):
                obvalue[proi,reconi] = proxies[data_period_id[reconi]][j]
                ob_err[proi,reconi] = proxies[data_period_idstd[reconi]][j] ** 2
                #obvalue[proi,] = proxies['Lat'][j]
            #print('>>  {}'.format(proxy_psm_type_i))
            #print('>>  id {}, Ye, first 10 example {}'.format(proi,Ye[j,0:10]))
            #print('>>  id {}, Yevar, first 10 example {}'.format(proi,Yevar[j,0:10]))
            print('>>  mean of Ye is {}, variance is {} '.format(np.mean(Ye[proi,:]), np.var(Yevar[proi,:])))
            proi = proi + 1  # increasement
        except:
            print('>>  Warning {}'.format(proxy_psm_type_i))
            print('search_tol too small for {}: mean sst is {}'.format(ii, np.mean(sst)))
    elif proxy_psm_type_i in ['bayesreg_uk37']:
        # 
        print('... To be done ...')
        
    elif proxy_psm_type_i in ['bayesreg_mgca_pooled_red']:
        try:
            # prior_1grid = np.copy(Xb[lonlati,:])   # prior
            salinity =  np.copy(Xb_sal[lonlati,:])
            ph       =  np.copy(Xb_ph[lonlati,:])
            omega    =  np.copy(Xb_omega[lonlati,:])

            prediction_mgca = baymag.predict_mgca(prior_1grid, cleaningr, salinity, ph, omega, spp) # pool model for baymag reductive
            pred_mgca_adj = baymag.sw_correction(prediction_mgca, np.array([geologic_age]))
            Ye[proi,:] = np.mean(pred_mgca_adj.ensemble, axis = 1)
            Yevar[proi,:] = pred_mgca_adj.ensemble.var()

            for reconi in range(recon_period_len):
                obvalue[proi,reconi] = proxies[data_period_id[reconi]][j]
                ob_err[proi,reconi] = proxies[data_period_idstd[reconi]][j] ** 2

            #print('>>  {}'.format(proxy_psm_type_i))
            #print('>>  id {}, Ye, first 10 example {}'.format(proi,Ye[j,0:10]))
            #print('>>  id {}, Yevar, first 10 example {}'.format(proi,Yevar[j,0:10]))
            print('>>  mean of Ye is {}, variance is {} '.format(np.mean(Ye[proi,:]), np.var(Yevar[proi,:])))
            print('      reductive: mean salinity {}, ph {}, omega {}'.format(np.mean(salinity), np.mean(ph), np.mean(omega)))
            proi = proi + 1  # increasement
        except:
            print('>>  Warning {}'.format(proxy_psm_type_i))

    elif proxy_psm_type_i in ['bayesreg_mgca_pooled_bcp']:
        try:
            # prior_1grid = np.copy(Xb[lonlati,:])   # prior
            salinity =  np.copy(Xb_sal[lonlati,:])
            ph       =  np.copy(Xb_ph[lonlati,:])
            omega    =  np.copy(Xb_omega[lonlati,:])

            prediction_mgca = baymag.predict_mgca(prior_1grid, cleaningb, salinity, ph, omega, spp) # pool model for baymag barker
            pred_mgca_adj = baymag.sw_correction(prediction_mgca, np.array([geologic_age]))
            Ye[proi,:] = np.mean(pred_mgca_adj.ensemble, axis = 1)
            Yevar[proi,:] = pred_mgca_adj.ensemble.var()

            for reconi in range(recon_period_len):
                obvalue[proi,reconi] = proxies[data_period_id[reconi]][j]
                ob_err[proi,reconi] = proxies[data_period_idstd[reconi]][j] ** 2

            #print('>>  {}'.format(proxy_psm_type_i))
            #print('>>  id {}, Ye, first 10 example {}'.format(proi,Ye[j,0:10]))
            #print('>>  id {}, Yevar, first 10 example {}'.format(proi,Yevar[j,0:10]))
            print('>>  mean of Ye is {}, variance is {} '.format(np.mean(Ye[proi,:]), np.var(Yevar[proi,:])))
            print('      barker: mean salinity {}, ph {}, omega {}'.format(np.mean(salinity), np.mean(ph), np.mean(omega)))
            proi = proi + 1  # increasement
        except:
            print('>>  Warning {}'.format(proxy_psm_type_i))
    else:
        a = 1
print('>>  Ye mean {}'.format(np.mean(Ye,axis=1)))
print('>>  obvalue {},  ob_err {}'.format(obvalue, ob_err))
print('>>  OKAY.')


>>  Data row 0: sluijs2006-acex302-4a.txt, grid [lon lat] [18, 35], grid id 1278
>>  PSM for tex86 is bayesreg_tex86, prior mean is 5.352113623221715
>>  mean of Ye is 0.38386683495203056, variance is 8.653461130743147e-08 

>>  Data row 1: gutjahr2017-dsdp401d18omgca.txt, grid [lon lat] [16, 29], grid id 1060
>>  PSM for d18o_m.subb is bayesreg_d18o_pooled, prior mean is 26.80037425994873
>>  mean of Ye is -3.08747264348772, variance is 1.7327341229832285e-05 

>>  Data row 2: gutjahr2017-dsdp401d18omgca.txt, grid [lon lat] [16, 29], grid id 1060
>>  PSM for mgca_m.subb:barker is bayesreg_mgca_pooled_bcp, prior mean is 26.80037425994873
>>  mean of Ye is nan, variance is nan 
      barker: mean salinity nan, ph nan, omega nan

>>  Data row 3: tripati2005-527mgcasoldadoensis.txt, grid [lon lat] [16, 7], grid id 268
>>  PSM for mgca_acarinina:barker is bayesreg_mgca_pooled_bcp, prior mean is 30.845927200317384
>>  mean of Ye is nan, variance is nan 
      barker: mean salinity nan, ph 

In [34]:
hdf5name = dir_proxy_save + nexp + '_precal_ye.hdf5'
with h5py.File(hdf5name, 'w') as f:
    if prior_variable2d_len>0:
        f.create_dataset('Xb', data=Xb)
    f.create_dataset('obvalue', data=obvalue)
    f.create_dataset('Ye', data=np.transpose(Ye))
    f.create_dataset('Yevar', data=np.transpose(Yevar))
    f.create_dataset('ob_err', data=ob_err)
    if prior_variable3d_len>0:
        f.create_dataset('Xb3d', data=Xb3d)

    metadata = {'Date': time.time(),
                'proxy_dbversion':yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion'],
                'exp_dir':yml_dict['core']['prior_dir'],
                'Nens':str(prior_len)}
    f.attrs.update(metadata)
    
print('  prior2proxyunit hdf5 file saved: {}'.format(hdf5name))
print('  Step 1 finished. Run Step 2: DeepDA_main.ipynb now')
print('>>  Done!')

  prior2proxyunit hdf5 file saved: /mnt/c/Users/mul450/Dropbox/git/deepDA/mlwrk/wrk/petmproxy3slices_v0.0.10g.csvexp_petm78_allSSTobs_20200120_02_precal_ye.hdf5
  Step 1 finished. Run Step 2: DeepDA_main.ipynb now
>>  Done!


In [13]:
print(Xb.shape)

(3888, 150)
