In [141]:
'''
Data assimilation for deep time
Stage 1:    Prior: cGENIE only
            Proxy: petmproxy3slices format database
            PSM: bayesian proxy system model
            DA: Mingsong Li, with LMR DA Core
            
            Mingsong Li
            1/15/2020
'''
# Package
import h5py
from DeepDA_lib import LMR_DA

from netCDF4 import Dataset
import os
import numpy as np
import numpy.ma as ma
import numpy.matlib as mat
import scipy.stats as stats
import pandas
from sys import platform as sys_pf
import yaml
import matplotlib.pyplot as plt
if sys_pf == 'darwin':
    import matplotlib
    matplotlib.use("TkAgg")
    import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.basemap import Basemap, shiftgrid, cm

print('>>  OKAY.')

>>  OKAY.


In [142]:
nc_file ='fields_biogem_2d.nc'

In [143]:
# read DTDA-config.yml
f = open("DeepDA_config.yml", 'r')
yml_dict = yaml.load(f, Loader=yaml.FullLoader)
f.close()
loc=None

dir_prior = yml_dict['core']['prior_dir']
dir_prior_full = os.listdir(dir_prior)
prior_variable_dict = yml_dict['prior']['state_variables_info']

# read first variable data, first time slice, to get the shape of prior grid
x0 = Dataset(dir_prior+'/'+dir_prior_full[0]+'/'+nc_file).variables[prior_variable_dict[0]][0,:,:]
print('    Shape of prior 2d grid {}'.format(x0.shape))
dum_imax = x0.shape[0]  # lon
dum_jmax = x0.shape[1]  # lat
dum_ijmax = dum_imax*dum_jmax  # lonn * latn


# ========= dataset for plot =========
cGENIEGrid = yml_dict['core']['data_dir'] + '/data_misc/cGENIEGrid.csv'
cGENIEGrid = pandas.read_csv(cGENIEGrid)
#print(cGENIEGrid)
cGENIEGridB_lat36 = cGENIEGrid['lat']
cGENIEGridB_lon36 = cGENIEGrid['lon']
#
# variable list
prior_variable_dict = yml_dict['prior']['state_variables_info']
prior_variable_len = len(prior_variable_dict)
print('>>  Number of prior variables is: {}. List:'.format(prior_variable_len))
print('      {}'.format(prior_variable_dict))
print('>>  OKAY.')

    Shape of prior 2d grid (36, 36)
>>  Number of prior variables is: 5. List:
      ['ocn_sur_temp', 'atm_temp', 'atm_pCO2', 'ocn_sur_sal', 'ocn_ben_DIC_13C']
>>  OKAY.


In [157]:
# DA

nexp = yml_dict['core']['nexp']
nens = yml_dict['core']['nens']
datadir_output = yml_dict['core']['datadir_output']
recon_period = yml_dict['core']['recon_period']
recon_timescale = yml_dict['core']['recon_timescale_interval']
recon_period_full = np.arange(recon_period[0],recon_period[1]+1,recon_timescale)
recon_period_len = recon_period_full.shape[0]

# for saving DA product Xa
Xa_output   = np.full((dum_ijmax * prior_variable_len, nens, recon_period_len),np.nan)

# NetCDF file name
nc_filename = datadir_output + '/' + nexp + '.nc'
# read preprior HDF5 file
dir_proxies = yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['datadir_proxy'] +'/'+ yml_dict['proxies'][yml_dict['proxies']['use_from'][0]]['dbversion']
hdf5name = dir_proxies + '.hdf5'

with h5py.File(hdf5name, 'r') as f:
    Xb = f.get('Xb')  # read Xb, to be change to Xa
    Xb0 = np.copy(Xb)  # default Xb
    #Xb = ma.masked_where(Xb>9.9e+36, Xb)
    obvalue_full = f.get('obvalue')
    Ye_full = f.get('Ye')
    ob_err_full = f.get('ob_err')
    Yevar = f.get('Yevar')
    #print(Xb.shape) # (1296, 150)
    #print(Ye_full.shape) # (150, 1)
    print(obvalue_full)
    
    ob_len = obvalue_full.shape[0]
    
    print('recon intervals: {}, obser number {}'.format(recon_period_len,ob_len))
    for reconi in range(recon_period_len):
        for obi in range(ob_len):
            #print('recon ID: {}, obser ID {}'.format(reconi,obi))
            obvalue  = obvalue_full[obi, reconi]
            ob_err= ob_err_full[obi, reconi]
            Ye = Ye_full[:,obi]
            # DA
            Xa = LMR_DA.enkf_update_array(Xb, obvalue, Ye, ob_err)
            if reconi == 0:
                if obi == 0:
                    mye= np.mean(Ye)
                    ye = np.subtract(Ye, mye)
                    xbm = np.mean(Xb,axis=1)
                    Xbp = np.subtract(Xb,xbm[:,None])  # "None" means replicate in this dimension
                    kcov = np.dot(Xbp,np.transpose(ye)) / (nens-1)
            # update Xb usin Xa
            Xb = np.copy(Xa)

        Xa_output[:,:,reconi] = np.copy(Xa)
        
        Xb = np.copy(Xb0)
        
    print('')
    print('start writing netCDF')
    
    
    
    # save netCDF file
    nf = Dataset(datadir_output + '/' + nexp  + '.nc', 'w', format='NETCDF4')
    nf.description = 'DeepDA' + nc_filename
    #Specifying dimensions
    nf.createDimension('lon', len(cGENIEGridB_lat36))
    nf.createDimension('lat', len(cGENIEGridB_lon36))
    z = np.arange(0,1,1)
    nf.createDimension('z', len(z))  # level
    nf.createDimension('nens', nens)  # number of ens
    nf.createDimension('time', recon_period_len)    
    # Building variables
    longitude = nf.createVariable('Longitude', 'f4', 'lon')
    # Passing data into variables
    longitude[:] = cGENIEGridB_lon36.values

    latitude = nf.createVariable('Latitude', 'f4', 'lat')
    latitude[:] = cGENIEGridB_lat36.values

    levels = nf.createVariable('Levels', 'i4', 'z')
    levels[:] = z
    
    
    
    for nc_var_i in range(prior_variable_len):
        nc_var_name = prior_variable_dict[nc_var_i]
        
        j0 = dum_ijmax * nc_var_i
        j1 = dum_ijmax * (nc_var_i+1)
        print('id from {} to {}: {}'.format(j0, j1,nc_var_name))

        Xb0_i = np.copy(Xb0[j0:j1,:])
        Xa_output_i = np.copy(Xa_output[j0:j1,:,:])
        Xa_outputi = Xa_output_i.reshape(dum_imax,dum_jmax,nens,recon_period_len)
        
        #Xa_outputi = np.ma.MaskedArray(Xa_outputi, Xa_outputi >= 9.9692e+36)
        #Xb0_i = np.ma.MaskedArray(Xb0_i, Xb0_i >= 9.9692e+36)
        XbNC_mean = nf.createVariable(nc_var_name+'_Xb_mean', 'f4', ('lat', 'lon','z'))
        xbm = np.mean(Xb0_i,axis=1)
        XbNC_mean[:,:,:] = np.copy(xbm.reshape(dum_jmax,dum_imax,1))
    
        XbNC_variance = nf.createVariable(nc_var_name+'_Xb_variance', 'f4', ('lat', 'lon','z'))
        Xb_temp = np.copy(np.var(Xb0_i,axis=1).reshape(dum_jmax,dum_imax,1))
        Xb_temp = np.ma.MaskedArray(Xb_temp, np.copy(xbm.reshape(dum_jmax,dum_imax,1)) >= 9.9692e+36)
        XbNC_variance[:,:,:] = Xb_temp
        #XbNC_variance[:,:,:] = np.copy(np.var(Xb0_i,axis=1).reshape(dum_jmax,dum_imax,1))
    
        XaNC_mean = nf.createVariable(nc_var_name+'_Xa_mean', 'f4', ('lat', 'lon','z','time'))
        #print(Xb0_i[0:36,0])
        Xam_temp = np.copy(np.nanmean(Xa_outputi,axis=2).reshape(dum_jmax,dum_imax,1,recon_period_len))
        XaNC_mean[:,:,:,:] = Xam_temp
    
        XaNC_variance = nf.createVariable(nc_var_name+'_Xa_variance', 'f4', ('lat', 'lon','z','time'))
        #print(Xa_outputi[0,0:36,0,0])
        Xa_temp = np.copy(np.ma.var(Xa_outputi,axis=2).reshape(dum_jmax,dum_imax,1,recon_period_len))
        Xa_temp = np.ma.MaskedArray(Xa_temp, Xam_temp >= 9.9692e+36)
        #print(Xa_temp[0,0:36,0,0])
        XaNC_variance[:,:,:,:] = Xa_temp
        #XaNC_variance[:,:,:,:] = np.copy(np.ma.var(Xa_outputi,axis=2).reshape(dum_jmax,dum_imax,1,recon_period_len))
    
        XaNC_full = nf.createVariable(nc_var_name+'_Xa_full', 'f4', ('lat', 'lon', 'nens', 'z','time'))
        XaNC_full[:,:,:,:,:] = np.copy(Xa_outputi.reshape(dum_jmax,dum_imax,nens,1,recon_period_len))
    
                
        kcov_i = np.copy(kcov[j0:j1]).reshape(dum_imax,dum_jmax,1)
        kcov_i = np.ma.MaskedArray(kcov_i, np.copy(xbm.reshape(dum_jmax,dum_imax,1)) >= 9.9692e+36)
        cov_ob0 = nf.createVariable(nc_var_name+'obs0'+'_cov', 'f4', ('lat', 'lon','z'))
        cov_ob0[:,:,:] = kcov_i
        
        #Add local attributes to variable instances
        longitude.units = 'degrees east'
        latitude.units = 'degrees north'
        levels.units = 'layer'
        XbNC_mean.units = 'degC'
        XbNC_variance.units = 'degC^2'
        #XbNC_full.units = 'degC'
        XaNC_full.units = 'degC'
        #variance.warning = 'test ...'
        
    # Closing the dataset
    nf.close()  # close the new file
    print('end writing netCDF')
print('All Done')

<HDF5 dataset "obvalue": shape (40, 3), type "<f8">
recon intervals: 3, obser number 40

start writing netCDF
id from 0 to 1296: ocn_sur_temp
id from 1296 to 2592: atm_temp
id from 2592 to 3888: atm_pCO2
id from 3888 to 5184: ocn_sur_sal
id from 5184 to 6480: ocn_ben_DIC_13C
end writing netCDF
All Done




In [156]:
nf.close()  # close the new file
w1 = np.array([1,2,3])
w2 = np.copy(w1)

w2[0] = 3

print(w2)   # [3 2 3]
print(w1)   # [3 2 3]

np.may_share_memory(w2, w1)  # True

[3 2 3]
[1 2 3]


False

In [112]:
nf = Dataset(nc_filename, 'r', format='NETCDF4')
Xa = nf.variables[nc_var_name+'_Xa_mean']
for reconi in range(recon_period_len):
    Xam1 = Xa[:,:,0,reconi]
    print(Xam1.shape)
    print(np.nanmean(Xam1))
nf.close()  # close the new file

(36, 36)
33.732727
(36, 36)
33.73631
(36, 36)
33.736412
