In [15]:
'''
Data assimilation for deep time
Stage 1:    Prior: cGENIE only
            Proxy: petmproxy3slices format database
            PSM: bayesian proxy system model
            DA: Mingsong Li, with LMR DA Core
            
            Mingsong Li
            1/15/2020
'''
# Package
import h5py
from DeepDA_lib import LMR_DA

from netCDF4 import Dataset
import os
import numpy as np
import numpy.ma as ma
import numpy.matlib as mat
import scipy.stats as stats
import pandas
from sys import platform as sys_pf
import yaml
import matplotlib.pyplot as plt
if sys_pf == 'darwin':
    import matplotlib
    matplotlib.use("TkAgg")
    import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.basemap import Basemap, shiftgrid, cm

print('>>  OKAY.')

>>  OKAY.


In [16]:
nc_file ='fields_biogem_2d.nc'
config_name = "DeepDA_config.yml"
# read DTDA-config.yml
f = open(config_name, 'r')
yml_dict = yaml.load(f, Loader=yaml.FullLoader)
f.close()

########## Prior #########
prior_source = yml_dict['prior']['prior_source'] #
prior_state_variable = yml_dict['prior'][prior_source]['state_variable']  # note: ['2d': xxx; '3d': xxx]

# save prior variable list
prior_variable_dict = []  # variable list
prior_nc_file_list = []  # nc file list
prior_variable_dict_3d = []  # variable list
prior_nc_file_list_3d = []  # nc file list

for key, value in prior_state_variable.items():
    nc_keyvalue = prior_state_variable[key]['ncname']  # note: 2d dict
    
    print('nc_keyvalue {}...'.format(nc_keyvalue))
    for key1, value1 in nc_keyvalue.items():
        print('{}: {}'.format(key1,value1))
        
        for i in range(len(prior_state_variable[key][value1])):
            if key in ['2d']:
                prior_variable_dict.append(prior_state_variable[key][value1][i])
                prior_nc_file_list.append(key1+'/'+value1+'.nc')
            elif key in ['3d']:
                prior_variable_dict_3d.append(prior_state_variable[key][value1][i])
                prior_nc_file_list_3d.append(key1+'/'+value1+'.nc')
                
# variable list
prior_variable_len = len(prior_variable_dict)
prior_variable3d_len = len(prior_variable_dict_3d)
print('>>  Number of prior variables is: {}. List:'.format(prior_variable_len))
print('      {}'.format(prior_variable_dict))

dir_prior = yml_dict['core']['prior_dir']
dir_prior_full = os.listdir(dir_prior)
try:
    #x0 = Dataset(dir_prior+'/'+dir_prior_full[0]+'/'+ nc_file_2d).variables[prior_variable_dict[0]][0,:,:]
    x1 = Dataset(dir_prior+'/'+dir_prior_full[0]+'/'+ prior_nc_file_list_3d[0]).variables[prior_variable_dict_3d[0]][0,:,:,:]
    zt = Dataset(dir_prior+'/'+dir_prior_full[0]+'/'+ prior_nc_file_list_3d[0]).variables['zt'][:]
    print('    Shape of prior 2d grid {}'.format(x0.shape))
    #print(zt)
    dum_dmax = x1.shape[0] # depth
    dum_imax = x1.shape[1]  # lon
    dum_jmax = x1.shape[2]  # lat
except:
    try:
        x0 = Dataset(dir_prior+'/'+dir_prior_full[0]+'/'+ prior_nc_file_list[0]).variables[prior_variable_dict[0]][0,:,:]
        dum_imax = x1.shape[0]  # lon
        dum_jmax = x1.shape[1]  # lat
        dum_dmax = 16
    except:
        dum_dmax = 16
        dum_imax = 36
        dum_jmax = 36
# prepare 2d Xb for lon-lat state 
dum_ijmax = dum_imax*dum_jmax  # lonn * latn

######## 

nexp = yml_dict['core']['nexp']
nens = yml_dict['core']['nens']
dir_data_save = yml_dict['core']['wrkdir']
recon_period = yml_dict['core']['recon_period']
recon_timescale = yml_dict['core']['recon_timescale_interval']
recon_period_full = np.arange(recon_period[0],recon_period[1]+1,recon_timescale)
recon_period_len = recon_period_full.shape[0]

# for saving DA product Xa
if prior_variable_len > 0:
    Xa_output   = np.full((dum_ijmax * prior_variable_len, nens, recon_period_len),np.nan)
    Xa_output_all = Xa_output
    if prior_variable3d_len > 0:
        Xa3d_output   = np.full((dum_ijmax * dum_dmax * prior_variable_len, nens, recon_period_len),np.nan)
        Xa_output_all = np.concatenate((Xa_output, Xa3d_output), axis=0)
    else:
        print('>>  No 3d variable listed in {}'.format(config_name))
elif prior_variable_len == 0:
    if prior_variable3d_len > 0:
        Xa3d_output   = np.full((dum_ijmax * dum_dmax * prior_variable_len, nens, recon_period_len),np.nan)
        Xa_output_all = Xa3d_output
    print('>>  No 2d variable listed in {}'.format(config_name))
else:
    print('>>  Error! No 3d or 2d variables are listed in {}'.format(config_name))
print('>>  OKAY.')

nc_keyvalue {'biogem': 'fields_biogem_2d'}...
biogem: fields_biogem_2d
nc_keyvalue {'biogem': 'fields_biogem_3d'}...
biogem: fields_biogem_3d
>>  Number of prior variables is: 3. List:
      ['ocn_sur_temp', 'atm_temp', 'ocn_ben_DIC_13C']
    Shape of prior 2d grid (36, 36)
>>  OKAY.


In [17]:
loc=None
# ========= dataset for plot =========
cGENIEGrid = yml_dict['core']['proj_dir'] + '/data_misc/cGENIEGrid.csv'
cGENIEGrid = pandas.read_csv(cGENIEGrid)
#print(cGENIEGrid)
cGENIEGridB_lat36 = cGENIEGrid['lat']
cGENIEGridB_lon36 = cGENIEGrid['lon']

In [18]:
# DA core script

# NetCDF file name
nc_filename = dir_data_save + '/' + nexp + '.nc'
# read preprior HDF5 file
dir_proxy_data = dir_data_save +'/'+ yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion']
hdf5name = dir_proxy_data + nexp + '_precal_ye.hdf5'

with h5py.File(hdf5name, 'r') as f:
    Xb = f.get('Xb')  # read Xb, to be change to Xa
    Xb3d = f.get('Xb3d')  # read Xb, to be change to Xa
    
    print('>>  Xb3d: {}'.format(Xb3d))
    
    if Xb and Xb3d:
        Xball = np.concatenate((Xb, Xb3d), axis=0)
    elif Xb and Xb3d is None:
        Xball = Xb
    elif Xb is None and Xb3d:
        Xball = Xb3d
    else:
        print('>>  Error! No 3d or 2d variables are listed in {}'.format(config_name))
        
    Xb0 = np.copy(Xball)  # default Xb
    #Xb = ma.masked_where(Xb>9.9e+36, Xb)
    obvalue_full = f.get('obvalue')
    Ye_full = f.get('Ye')
    ob_err_full = f.get('ob_err')
    Yevar = f.get('Yevar')
    #print(Xb.shape) # (1296, 150)
    #print(Ye_full.shape) # (150, 1)
    print(obvalue_full)
    Xa_output_all = np.full((Xball.shape[0], Xball.shape[1], recon_period_len),np.nan)
    ob_len = obvalue_full.shape[0]
    
    print('>>  recon intervals: {}, obser number {}'.format(recon_period_len,ob_len))
    for reconi in range(recon_period_len):
        print('>>  recon ID: {}'.format(reconi))
        for obi in range(ob_len):
            #print('recon ID: {}, obser ID {}'.format(reconi,obi))
            obvalue  = obvalue_full[obi, reconi]
            ob_err= ob_err_full[obi, reconi]
            Ye = Ye_full[:,obi]
            if ~np.isnan(obvalue) and ~np.isnan(ob_err):
                # DA
                if ob_err < 0.5:
                    ob_err = 0.5

                Xa = LMR_DA.enkf_update_array(Xball, obvalue, Ye, ob_err)
                
                if reconi == 0:
                    if obi == 0:
                        mye= np.mean(Ye)
                        ye = np.subtract(Ye, mye)
                        xbm = np.mean(Xball,axis=1)
                        Xbp = np.subtract(Xball,xbm[:,None])  # "None" means replicate in this dimension
                        kcov = np.dot(Xbp,np.transpose(ye)) / (nens-1)
                # update Xb usin Xa
                Xball = np.copy(Xa)

        Xa_output_all[:,:,reconi] = np.copy(Xa)
        
        Xball = np.copy(Xb0)  # restore Xball
        
    if Xb is not None:
        lenn1 = f.get('Xb').shape[0]
        Xa_output_2d = Xa_output_all[0:lenn1,:,:]
        if Xb3d:
            lenn2 = f.get('Xb3d').shape[0]
            Xa_output_3d = Xa_output_all[lenn1:lenn2+lenn1,:,:]
    elif Xb is None:
        if Xb3d:
            lenn2 = f.get('Xb3d').shape[0]
            Xa_output_3d = Xa_output_all[0:lenn2,:,:]
    else:
        print('>>  Error! No 3d or 2d variables are listed in {}'.format(config_name))
print('>>  All Done')

>>  Xb3d: <HDF5 dataset "Xb3d": shape (41472, 150), type "<f8">
<HDF5 dataset "obvalue": shape (40, 3), type "<f8">
>>  recon intervals: 3, obser number 40
>>  recon ID: 0
>>  recon ID: 1
>>  recon ID: 2
>>  All Done


In [19]:
# DA save output

with h5py.File(hdf5name, 'r') as f:

    print('')
    print('>>   Start writing netCDF ...')
    
    # save netCDF file
    nf = Dataset(dir_data_save + '/' + nexp  + '.nc', 'w', format='NETCDF4')
    nf.description = 'DeepDA' + nc_filename
    #Specifying dimensions
    nf.createDimension('lon', len(cGENIEGridB_lat36))
    nf.createDimension('lat', len(cGENIEGridB_lon36))
    z = np.arange(0,1,1)
    nf.createDimension('z', len(z))  # level
    nf.createDimension('nens', nens)  # number of ens
    nf.createDimension('time', recon_period_len)
    # Building variables
    longitude = nf.createVariable('Longitude', 'f4', 'lon')
    # Passing data into variables
    longitude[:] = cGENIEGridB_lon36.values

    latitude = nf.createVariable('Latitude', 'f4', 'lat')
    latitude[:] = cGENIEGridB_lat36.values

    levels = nf.createVariable('Levels', 'i4', 'z')
    levels[:] = z
    if Xb3d is not None:
        nf.createDimension('zt', len(zt))
        levels = nf.createVariable('zt', 'f4', 'zt')
        levels[:] = zt
        
    if Xb is not None:
        for nc_var_i in range(prior_variable_len):
            nc_var_name = prior_variable_dict[nc_var_i]

            j0 = dum_ijmax * nc_var_i
            j1 = dum_ijmax * (nc_var_i+1)
            print('        id from {} to {}: {}'.format(j0, j1,nc_var_name))

            Xb0_i = np.copy(f.get('Xb')[j0:j1,:])
            
            Xa_output_i = np.copy(Xa_output_2d[j0:j1,:,:])
            Xa_outputi = Xa_output_i.reshape(dum_imax,dum_jmax,nens,recon_period_len)

            XbNC_mean = nf.createVariable(nc_var_name+'_Xb_mean', 'f4', ('lat', 'lon','z'))
            xbm = np.mean(Xb0_i,axis=1)
            XbNC_mean[:,:,:] = np.copy(xbm.reshape(dum_jmax,dum_imax,1))

            XbNC_variance = nf.createVariable(nc_var_name+'_Xb_variance', 'f4', ('lat', 'lon','z'))
            Xb_temp = np.copy(np.var(Xb0_i,axis=1).reshape(dum_jmax,dum_imax,1))
            Xb_temp = np.ma.MaskedArray(Xb_temp, np.copy(xbm.reshape(dum_jmax,dum_imax,1)) >= 9.9692e+36)
            XbNC_variance[:,:,:] = Xb_temp
            #XbNC_variance[:,:,:] = np.copy(np.var(Xb0_i,axis=1).reshape(dum_jmax,dum_imax,1))

            XaNC_mean = nf.createVariable(nc_var_name+'_Xa_mean', 'f4', ('lat', 'lon','z','time'))
            #print(Xb0_i[0:36,0])
            Xam_temp = np.copy(np.nanmean(Xa_outputi,axis=2).reshape(dum_jmax,dum_imax,1,recon_period_len))
            XaNC_mean[:,:,:,:] = Xam_temp

            XaNC_variance = nf.createVariable(nc_var_name+'_Xa_variance', 'f4', ('lat', 'lon','z','time'))
            #print(Xa_outputi[0,0:36,0,0])
            Xa_temp = np.copy(np.ma.var(Xa_outputi,axis=2).reshape(dum_jmax,dum_imax,1,recon_period_len))
            Xa_temp = np.ma.MaskedArray(Xa_temp, Xam_temp >= 9.9692e+36)
            #print(Xa_temp[0,0:36,0,0])
            XaNC_variance[:,:,:,:] = Xa_temp

            XaNC_full = nf.createVariable(nc_var_name+'_Xa_full', 'f4', ('lat', 'lon', 'nens', 'z','time'))
            XaNC_full[:,:,:,:,:] = np.copy(Xa_outputi.reshape(dum_jmax,dum_imax,nens,1,recon_period_len))


            kcov_i = np.copy(kcov[j0:j1]).reshape(dum_imax,dum_jmax,1)
            kcov_i = np.ma.MaskedArray(kcov_i, np.copy(xbm.reshape(dum_jmax,dum_imax,1)) >= 9.9692e+36)
            cov_ob0 = nf.createVariable(nc_var_name+'_obs0'+'_cov', 'f4', ('lat', 'lon','z'))
            cov_ob0[:,:,:] = kcov_i

            #Add local attributes to variable instances
            longitude.units = 'degrees east'
            latitude.units = 'degrees north'
            levels.units = 'layer'
            XbNC_mean.units = 'degC'
            XbNC_variance.units = 'degC^2'
            #XbNC_full.units = 'degC'
            XaNC_full.units = 'degC'

            #variance.warning = 'test ...'
    if Xb3d is not None:
        for nc_var_i in range(prior_variable3d_len):
            nc_var_name = prior_variable_dict_3d[nc_var_i]

            j0 = dum_ijmax * dum_dmax * nc_var_i
            j1 = dum_ijmax * dum_dmax * (nc_var_i+1)
            print('>> Writing 3d field. ID from {} to {}: {}'.format(j0, j1,nc_var_name))

            Xb0_i = np.copy(f.get('Xb3d')[j0:j1,:])
            Xa_output_i = np.copy(Xa_output_3d[j0:j1,:,:])
            Xa_outputi = Xa_output_i.reshape(dum_imax, dum_jmax,dum_dmax, nens,recon_period_len)

            XbNC_mean = nf.createVariable(nc_var_name+'_Xb_3d_mean', 'f4', ( 'lat', 'lon','zt'))
            xbm = np.mean(Xb0_i,axis=1)
            XbNC_mean[:,:,:] = np.copy(xbm.reshape(dum_jmax,dum_imax,dum_dmax))

            XbNC_variance = nf.createVariable(nc_var_name+'_Xb_3d_variance', 'f4', ('lat', 'lon','zt'))
            Xb_temp = np.copy(np.var(Xb0_i,axis= 1).reshape(dum_jmax,dum_imax,dum_dmax))
            #Xb_temp = np.ma.MaskedArray(Xb_temp, np.copy(xbm.reshape(dum_jmax,dum_imax,dum_dmax)) >= 9.9692e+36)
            XbNC_variance[:,:,:] = Xb_temp
            #XbNC_variance[:,:,:] = np.copy(np.var(Xb0_i,axis=1).reshape(dum_jmax,dum_imax,1))

            XaNC_mean = nf.createVariable(nc_var_name+'_Xa_3d_mean', 'f4', ('lat', 'lon','zt','time'))
            #print(Xb0_i[0:36,0])
            Xam_temp = np.copy(np.nanmean(Xa_outputi,axis=3).reshape(dum_jmax,dum_imax,dum_dmax,recon_period_len))
            XaNC_mean[:,:,:,:] = Xam_temp

            XaNC_variance = nf.createVariable(nc_var_name+'_Xa_3d_variance', 'f4', ('lat', 'lon','zt','time'))
            #print(Xa_outputi[0,0:36,0,0])
            Xa_temp = np.copy(np.ma.var(Xa_outputi,axis=3).reshape(dum_jmax,dum_imax,dum_dmax,recon_period_len))
            #Xa_temp = np.ma.MaskedArray(Xa_temp, Xam_temp >= 9.9692e+36)
            #print(Xa_temp[0,0:36,0,0])
            XaNC_variance[:,:,:,:] = Xa_temp

            XaNC_full = nf.createVariable(nc_var_name+'_Xa_3d_full', 'f4', ('lat', 'lon','zt', 'nens', 'time'))
            XaNC_full[:,:,:,:,:] = np.copy(Xa_outputi.reshape(dum_jmax,dum_imax,dum_dmax,nens,recon_period_len))

            kcov_i = np.copy(kcov[lenn1:lenn1+dum_ijmax*dum_dmax]).reshape(dum_imax,dum_jmax,dum_dmax)
            kcov_i = np.ma.MaskedArray(kcov_i, np.copy(xbm.reshape(dum_jmax,dum_imax,dum_dmax)) >= 9.9692e+36)
            cov_ob0 = nf.createVariable(nc_var_name+'_3d_obs0'+'_cov', 'f4', ('lat', 'lon','zt'))
            cov_ob0[:,:,:] = kcov_i

            #Add local attributes to variable instances
            longitude.units = 'degrees east'
            latitude.units = 'degrees north'
            levels.units = 'layer'
            XbNC_mean.units = 'degC'
            XbNC_variance.units = 'degC^2'
            #XbNC_full.units = 'degC'
            XaNC_full.units = 'degC'
            #variance.warning = 'test ...'  
    # Closing the dataset
    nf.close()  # close the new file
    print('>>  End writing netCDF')
print('************  All Done  ************')


>>   Start writing netCDF ...
        id from 0 to 1296: ocn_sur_temp
        id from 1296 to 2592: atm_temp
        id from 2592 to 3888: ocn_ben_DIC_13C
>> Writing 3d field. ID from 0 to 20736: ocn_temp
>> Writing 3d field. ID from 20736 to 41472: misc_pH
>>  End writing netCDF
************  All Done  ************


In [20]:
nf.close()  # close the new file