```
This notebook sets up and runs a set of benchmarks to compare
different numerical discretizations of the SWEs

Copyright (C) 2016  SINTEF ICT

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
```

# Stochastic Ensemble Kalman Filter with the complete model

This notebook implements an experiment of using a stochastic ensemble Kalman filter to assimilate information obtained from a Lagrangian drifter into the Eulerian ocean field.

### A particle
Each particle is an ocean model, which conceptually don't hold any drifters. Drifters are displayed in the simulations in order to show how each particle differs in the animations. The state vector, however, is purely based on 
$$\psi_i^n = [\eta_i^n, hu_i^n, hv_i^n]^T \in \mathbb{R}^{3 n_x n_y}$$


### The model
The model is a shallow water model solved with the CDKLM scheme. Drifter intergration is used in the syntetic truth, and in order to visualize differences between particles.
A small-scale stochastic term is added to each particle for every timestep, to represent the model error.


### The truth
A syntetic truth is used in the form of an identic twin.

### The observation
The observations are based on how the Lagrangian drifters change positions between observations.
This change represent a velocity, so that $y^n = [hu_{j,k, truth}^n, hv_{j,k, truth}^n]^T$, in which $(j,k)$ represent the index of the cell where the latest observation was made.



## Set environment

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
from matplotlib import animation, rc
from scipy.special import lambertw

import pyopencl
import os
import sys

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '../')))

#Set large figure sizes
rc('figure', figsize=(16.0, 12.0))
rc('animation', html='html5')
matplotlib.rcParams['contour.negative_linestyle'] = 'solid'

#Import our simulator
from SWESimulators import CDKLM16, PlotHelper, Common

from SWESimulators import BathymetryAndICs as BC
from SWESimulators import OceanStateNoise
from SWESimulators import OceanNoiseEnsemble
from SWESimulators import BaseOceanStateEnsemble
from SWESimulators import DataAssimilationUtils as dautils


In [None]:
#Make sure we get compiler output from OpenCL
os.environ["PYOPENCL_COMPILER_OUTPUT"] = "1"

#Set which CL device to use, and disable kernel caching
if (str.lower(sys.platform).startswith("linux")):
    os.environ["PYOPENCL_CTX"] = "0"
else:
    os.environ["PYOPENCL_CTX"] = "1"
os.environ["CUDA_CACHE_DISABLE"] = "1"
os.environ["PYOPENCL_COMPILER_OUTPUT"] = "1"
os.environ["PYOPENCL_NO_CACHE"] = "1"

#Create OpenCL context
cl_ctx = pyopencl.create_some_context()
cl_queue = pyopencl.CommandQueue(cl_ctx)
print "Using ", cl_ctx.devices[0].name

# Ensemble

We need an ensemble where each particle
- runs an independent ocean model
- drift a drifter
- applies a localized small-scale error
- observes the drifter position

Needs to be done:
- Initialize models (create netcdf with init, add error with amp 10*q0(?), put drifter into a small area of the 
- make useful plots to evaluate the results
    - Suggestion: 3-line [eta, hu, hv] plot, with truth, ensemble (mean field with individual drifters), mean-square diff?
    - 3x3/4x4/5x5 plot of eta from different ensemble members?
    - Standard animation of a single ensemble member.


### Utility functions

In [None]:

def showMatrices(x, y, title, z = None):
    num_cols = 2
    if z is not None:
        num_cols = 3
    fig = plt.figure(figsize=(num_cols*2,2))
    plt.subplot(1,num_cols,1)
    plt.imshow(x.copy(), origin="lower", interpolation="None")
    plt.xlabel('(%.2E, %.2E)' % (np.min(x), np.max(x)))
    plt.subplot(1,num_cols,2)
    plt.imshow(y.copy(), origin="lower", interpolation="None")
    plt.xlabel('(%.2E, %.2E)' % (np.min(y), np.max(y)))
    if z is not None:
        plt.subplot(1, num_cols, 3)
        plt.imshow(z.copy(), origin="lower", interpolation="None")
        plt.xlabel('(%.2E, %.2E)' % (np.min(z), np.max(z)))
    plt.suptitle(title)
    
def expand_to_periodic_boundaries(interior, ghostcells):
    if ghostcells == 0:
        return interior
    (ny, nx) = interior.shape
    
    nx_halo = nx + 2*ghostcells
    ny_halo = ny + 2*ghostcells
    newBuf = np.zeros((ny_halo, nx_halo))
    newBuf[ghostcells:-ghostcells, ghostcells:-ghostcells] = interior 
    for g in range(ghostcells):
        newBuf[g, :] = newBuf[ny_halo - 2*ghostcells + g, :]
        #newBuf[ny_halo - 2*ghostcells + g, :] *=0
        newBuf[ny_halo - 1 - g, :] = newBuf[2*ghostcells - 1 - g, :]
        #newBuf[2*ghostcells - 1 - g, :] *=0
    for g in range(ghostcells):
        newBuf[:, g] = newBuf[:, nx_halo - 2*ghostcells + g]
        newBuf[:, nx_halo - 1 - g] = newBuf[:, 2*ghostcells - 1 - g]
    return newBuf
    

# Stochastic Ensemble Kalman Filter

In [None]:
def cov(ensemble_data, ensemble_mean, field_a, a_x, a_y, field_b, b_x, b_y):
    c = 0.0
    for p in range(len(ensemble_data)):
        c += (ensemble_data[p][field_a][a_y, a_x] - ensemble_mean[field_a][a_y, a_x])* \
             (ensemble_data[p][field_b][b_y, b_x] - ensemble_mean[field_b][b_y, b_x])
    return c/(len(ensemble_data) - 1.0)

def loc_factor(a_x, a_y, b_x, b_y, localization_factor):
       
    r = np.sqrt((a_x - b_x)**2 + (a_y - b_y)**2)
    
    # map r so that it is in the scale for which this function was constructed
    r = r*5.0/localization_factor
    
    if r < 2:
        r = 2
    return  np.exp(-(r-2)**2/5.0)



def SEnKF(ensemble, localization_factor=5, inflation_factor=1.0, stochastic=True, debug=False):
    
    nx, ny = ensemble.nx, ensemble.ny
    
    # In this algorithm, we need the data from all ensemble members, along with the 
    # mean values of eta, hu and hv.

    ensemble_data = [None]*ensemble.getNumParticles()
    ensemble_mean = [np.zeros((ny, nx)), np.zeros((ny, nx)), np.zeros((ny,nx))]

    for i in range(ensemble.getNumParticles()):
        eta, hu, hv = ensemble.downloadParticleOceanState(i)
        ensemble_data[i] = [eta, hu, hv]

        ensemble_mean[0] += eta
        ensemble_mean[1] += hu
        ensemble_mean[2] += hv

    for dim in range(3):
        ensemble_mean[dim] /= ensemble.getNumParticles()
    
    for drifter in range(ensemble.driftersPerOceanModel):
        if debug: print "--------------------------------"
        if debug: print "Starting drifter " + str(drifter)
    
        drifter_pos = ensemble.observeTrueDrifters()[drifter,:]

        if debug: print "Ensemble size: ", ensemble.getNumParticles()

        # downloadTrueOceanState and downloadParticleOceanState gives us interior domain only,
        # and no ghost cells.
        cell_id_x = int(np.floor(drifter_pos[0]/ensemble.dx))
        cell_id_y = int(np.floor(drifter_pos[1]/ensemble.dy))


        

        # Obtain the matrix S = (HPH^T + R)^-1, which is N_y \times N_y
        S_inv = ensemble.observation_cov.copy()
        if debug: print "S_inv before cov-term:"
        if debug: print S_inv
        for row in range(2):
            for col in range(2):
                S_inv[row,col] += cov(ensemble_data, ensemble_mean, row+1, cell_id_x, cell_id_y, col+1, cell_id_x, cell_id_y)

        if debug: print "S_inv before cov-term:"
        if debug: print S_inv

        S = np.linalg.inv(S_inv)
        if debug: print "S: "
        if debug: print S



        # Form K, in the form of two state vectors
        # K = [eta_hu[y,x], hu_hu[y,x], hv_hu[y,x],
        #     [eta_hv[y,x], hu_hv[y,x], hv_hv[y,x]]
        K = [np.zeros((ny, nx)), np.zeros((ny, nx)), np.zeros((ny,nx)),
             np.zeros((ny, nx)), np.zeros((ny, nx)), np.zeros((ny,nx))]

        if debug: print "cell_id_x: ", cell_id_x

        loc = localization_factor
        if localization_factor is None:
            loc = max(nx, ny)

        for j in range(cell_id_y - loc, cell_id_y + loc+1):
            # Assuming periodic boundary conditions in y:
            loc_j = j
            if j < 0:
                j = j + ny
            elif j >= ny:
                j = j - ny

            for i in range(cell_id_x - loc, cell_id_x + loc+1):
                # Assuming periodic boundary conditions in x:
                loc_i = i
                if i < 0:
                    i = i + nx
                elif i >= nx:
                    i = i - nx

                l = loc_factor(loc_i, loc_j, cell_id_x, cell_id_y, loc)
                if localization_factor is None:
                    l = 1.0

                for field in range(3):
                    sigma_u = cov(ensemble_data, ensemble_mean, field, i, j, 1, cell_id_x, cell_id_y)
                    sigma_v = cov(ensemble_data, ensemble_mean, field, i, j, 2, cell_id_x, cell_id_y)

                    K[field  ][j,i] = l*(sigma_u*S[0,0] + sigma_v*S[0,1])
                    K[field+3][j,i] = l*(sigma_u*S[1,0] + sigma_v*S[1,1])


        if debug: showMatrices(K[0], K[1], "K1", K[2])
        if debug: showMatrices(K[3], K[4], "K2", K[5])


        # Obtain innovations:
        innovations = ensemble.getInnovations()[:,drifter,:]
        if debug: print "innovations:"
        if debug: print innovations
        if stochastic:
            for p in range(ensemble.getNumParticles()):
                obs_error = np.random.multivariate_normal(np.zeros(2), ensemble.observation_cov)
                if debug: print innovations[p,:], obs_error
                innovations[p,:] -= obs_error
                if debug: print innovations[p,:]
                if debug: print " "


        # Apply SEnKF to make the analysis from the forecast
        lookAt = 13
        if debug: showMatrices(ensemble_data[lookAt][0], ensemble_data[lookAt][1],\
                               "Particle " + str(lookAt) + " (pre)", ensemble_data[lookAt][2])

        for p in range(ensemble.getNumParticles()):
            for f in range(3):
                for j in range(ny):
                    for i in range(nx):
                        ensemble_data[p][f][j,i] += K[f][j,i]*innovations[p,0] + K[f+3][j,i]*innovations[p,1]

        if debug: showMatrices(ensemble_data[lookAt][0], ensemble_data[lookAt][1],\
                               "Particle " + str(lookAt) + " (post)", ensemble_data[lookAt][2])
        if debug: showMatrices(K[0]*innovations[lookAt,0] + K[3]*innovations[lookAt,1], \
                               K[1]*innovations[lookAt,0] + K[4]*innovations[lookAt,1], \
                               "Nudge for particle " + str(lookAt), \
                               K[2]*innovations[lookAt,0] + K[5]*innovations[lookAt,1])


    if debug: print "ensemble.observation_cov: "
    if debug: print ensemble.observation_cov
        
    # Upload the analysis back to the GPU:
    for p in range(ensemble.getNumParticles()):
        eta = expand_to_periodic_boundaries(ensemble_data[p][0], 2)
        hu  = expand_to_periodic_boundaries(ensemble_data[p][1], 2)
        hv  = expand_to_periodic_boundaries(ensemble_data[p][2], 2)
    
        ensemble.particles[p].upload(eta, hu, hv)
        
        
        
SEnKF(ensemble, localization_factor=7, debug=True)
print "waterHeight: ", waterDepth
    

## Create initial condition for ensemble:

In [None]:



# DEFINE PARAMETERS

#Coriolis well balanced reconstruction scheme
nx = 40
ny = 40

dx = 4.0
dy = 4.0

dt = 0.05*3
g = 9.81
r = 0.0

f = 0.05
beta = 0.0

ghosts = np.array([2,2,2,2]) # north, east, south, west
validDomain = np.array([2,2,2,2])
boundaryConditions = Common.BoundaryConditions(2,2,2,2)

# Define which cell index which has lower left corner as position (0,0)
x_zero_ref = 2
y_zero_ref = 2

dataShape = (ny + ghosts[0]+ghosts[2], 
             nx + ghosts[1]+ghosts[3])
dataShapeHi = (ny + ghosts[0]+ghosts[2]+1, 
             nx + ghosts[1]+ghosts[3]+1)

eta0 = np.zeros(dataShape, dtype=np.float32, order='C');
eta0_extra = np.zeros(dataShape, dtype=np.float32, order='C')
hv0 = np.zeros(dataShape, dtype=np.float32, order='C');
hu0 = np.zeros(dataShape, dtype=np.float32, order='C');
waterDepth = 1.0
Hi = np.ones(dataShapeHi, dtype=np.float32, order='C')*waterDepth

# Add disturbance:
initOption = 3
if initOption == 1:
    # Original initial conditions
    rel_grid_size = nx*1.0/dx
    BC.addBump(eta0, nx, ny, dx, dy, 0.3, 0.5, 0.05*rel_grid_size, validDomain)
    eta0 = eta0*0.3
    BC.addBump(eta0, nx, ny, dx, dy, 0.7, 0.3, 0.10*rel_grid_size, validDomain)
    eta0 = eta0*(-1.3)
    BC.addBump(eta0, nx, ny, dx, dy, 0.15, 0.8, 0.03*rel_grid_size, validDomain)
    eta0 = eta0*1.0
    BC.addBump(eta0, nx, ny, dx, dy, 0.6, 0.75, 0.06*rel_grid_size, validDomain)
    BC.addBump(eta0, nx, ny, dx, dy, 0.2, 0.2, 0.01*rel_grid_size, validDomain)
    eta0 = eta0*(-0.03)
    BC.addBump(eta0_extra, nx, ny, dx, dy, 0.5, 0.5, 0.4*rel_grid_size, validDomain)
    eta0 = eta0 + 0.02*eta0_extra
    BC.initializeBalancedVelocityField(eta0, Hi, hu0, hv0, f, beta, g, nx, ny, dx ,dy, ghosts)
    eta0 = eta0*0.5
elif initOption == 2:
    # Initial conditions used for the SIR filter
    rel_grid_size = nx*1.0/dx
    BC.addBump(eta0, nx, ny, dx, dy, 0.3, 0.5, 0.05*rel_grid_size, validDomain)
    eta0 = eta0*0.3
    BC.addBump(eta0, nx, ny, dx, dy, 0.7, 0.3, 0.10*rel_grid_size, validDomain)
    eta0 = eta0*(-1.3)
    BC.addBump(eta0, nx, ny, dx, dy, 0.15, 0.8, 0.03*rel_grid_size, validDomain)
    eta0 = eta0*1.0
    BC.addBump(eta0, nx, ny, dx, dy, 0.6, 0.75, 0.06*rel_grid_size, validDomain)
    BC.addBump(eta0, nx, ny, dx, dy, 0.2, 0.2, 0.01*rel_grid_size, validDomain)
    eta0 = eta0*(-0.03)
    BC.addBump(eta0_extra, nx, ny, dx, dy, 0.5, 0.5, 0.4*rel_grid_size, validDomain)
    eta0 = eta0 + 0.02*eta0_extra
    BC.initializeBalancedVelocityField(eta0, Hi, hu0, hv0, f, beta, g, nx, ny, dx ,dy, ghosts)
    eta0 = eta0*0.5
elif initOption == 3:
    # Initial conditions random - see further down!
    pass
    

if 'sim' in globals():
    sim.cleanUp()
if 'ensemble' in globals():
    ensemble.cleanUp()
    
q0 = 0.5*dt*f/(g*waterDepth)
print "q0: ", q0
print "[f, g, H]", [f, g, waterDepth]
print "f/gH: ", f/(g*waterDepth)
print "gH/f: ", g*waterDepth/f

reload(CDKLM16)
reload(BaseOceanStateEnsemble)
reload(OceanNoiseEnsemble)
reload(PlotHelper)
reload(dautils)
sim = CDKLM16.CDKLM16(cl_ctx, eta0, hu0, hv0, Hi, \
                      nx, ny, dx, dy, dt, g, f, r, \
                      boundary_conditions=boundaryConditions, \
                      write_netcdf=False, \
                      small_scale_perturbation=True, \
                      small_scale_perturbation_amplitude=q0)
if initOption == 3:
    sim.perturbState(q0_scale=50)

ensemble_size = 20
ensemble = OceanNoiseEnsemble.OceanNoiseEnsemble(ensemble_size, cl_ctx,  
                                                 #observation_type=dautils.ObservationType.UnderlyingFlow)
                                                 observation_type=dautils.ObservationType.DirectUnderlyingFlow)
ensemble.setGridInfoFromSim(sim)
ensemble.setStochasticVariables(#observation_variance_factor=2.0,
                                observation_variance = 0.01**2,
                                small_scale_perturbation_amplitude=q0)
                                #initialization_variance_factor_ocean_field=50)
ensemble.init(driftersPerOceanModel=3)
#ensemble.plotEnsemble()


fig = plt.figure()
plotter = PlotHelper.EnsembleAnimator(fig, ensemble, trueStateOnly=False)

#T = 50
sub_t = 2*dt
#resampling_points = [36, 72, 108]
#resampling_points = [9, 18, 27, 36, 45, 56]
#resampling_points = range(5, 100, 10)

T = 13*2*2
#T = 9
resampling_points = range(10, 250, 10)
#resampling_points = [T+1]

print "Will resample at iterations: ", resampling_points
infoPlots = []

def animate(i):
    if (i>0):
        t = ensemble.step(sub_t)
    else:
        t = 0.0

    for rp in resampling_points:
        if i == rp:
            print "resampling at iteration " + str(i)
            infoFig = ensemble.plotDistanceInfo(title="it = " + str(i) + " before resampling")
            plt.close(infoFig)
            infoPlots.append(infoFig)
            
            SEnKF(ensemble, localization_factor=5)
                        
            infoFig = ensemble.plotDistanceInfo(title="it = " + str(i) + " post resampling")
            plt.close(infoFig)
            infoPlots.append(infoFig)
    
    plotter.plot(ensemble);
    
    ensemble.getEnsembleVarAndRMSEUnderDrifter(i)

    fig.suptitle("Ensemble = " + "{:04.0f}".format(t) + " s", fontsize=18)

    if (i%10 == 0):
        print "{:03.0f}".format(100*i / T) + " % => t=" + str(t) 

anim = animation.FuncAnimation(fig, animate, range(T), interval=100)
plt.close(anim._fig)
anim

In [None]:
ensemble.plotEnsemble()

In [None]:
max_dt = ensemble.findLargestPossibleTimeStep()
print "Largest possible timestep with this case: ", max_dt

In [None]:
def show_figures(figs):
    for f in figs:
        dummy = plt.figure()
        new_manager = dummy.canvas.manager
        new_manager.canvas.figure = f
        f.set_canvas(new_manager.canvas)
        filename= f._suptitle.get_text().replace(" ", "_").replace("=_", "") + ".png"
        #plt.savefig(filename)
show_figures(infoPlots)
fig = ensemble.plotDistanceInfo(title="Final ensemble")

In [None]:
fig = plt.figure(figsize=(10,3))
plt.plot(ensemble.tArray, ensemble.rmseUnderDrifter_eta, label='eta')
plt.plot(ensemble.tArray, ensemble.rmseUnderDrifter_hu,  label='hu')
plt.plot(ensemble.tArray, ensemble.rmseUnderDrifter_hv,  label='hv')
plt.plot(resampling_points, 0.05*np.ones_like(resampling_points), 'o')
plt.title("RMSE under drifter")
plt.legend(loc=0)
plt.grid()
plt.ylim([0, 0.6])

fig = plt.figure(figsize=(10,3))
plt.plot(ensemble.tArray, ensemble.varianceUnderDrifter_eta, label='eta')
plt.plot(ensemble.tArray, ensemble.varianceUnderDrifter_hu,  label='hu')
plt.plot(ensemble.tArray, ensemble.varianceUnderDrifter_hv,  label='hv')
plt.plot(resampling_points, 0.05*np.ones_like(resampling_points), 'o')
plt.title("Variance under drifter")
plt.legend(loc=0)
plt.grid()
plt.ylim([0, 0.6])

fig = plt.figure(figsize=(10,3))
plt.plot(ensemble.tArray, ensemble.rUnderDrifter_eta, label='eta')
plt.plot(ensemble.tArray, ensemble.rUnderDrifter_hu,  label='hu')
plt.plot(ensemble.tArray, ensemble.rUnderDrifter_hv,  label='hv')
plt.plot(resampling_points, 1.0*np.ones_like(resampling_points), 'o')
plt.title("r = var/rmse under drifter")
plt.legend(loc=0)
plt.grid()
plt.ylim([0, 5])

print np.sqrt(ensemble.observation_cov[0,0])

In [None]:
def r(glob_x, cutoff):
       
    x = glob_x.copy()
    
    # map x so that glob_x is in the range 0:7
    x = x*5.0/cutoff
    
    for i in range(len(x)):
        if x[i] < 2:
            x[i] = 2
    return  np.exp(-(x-2)**2/5.0)

cutoff = 5
x = np.linspace(0, cutoff*1.5, 100)
fig = plt.figure(figsize=(6,3))
plt.plot(x, r(x, cutoff))
plt.grid()
plt.title("Cutoff: "+str(cutoff))

cutoff = 20
x = np.linspace(0, cutoff*1.5, 100)
fig = plt.figure(figsize=(6,3))
plt.plot(x, r(x, cutoff))
plt.grid()
plt.title("Cutoff: "+str(cutoff))

cutoff =10
x = np.linspace(0, cutoff*1.5, 100)
fig = plt.figure(figsize=(6,3))
plt.plot(x, r(x, cutoff))
plt.grid()
plt.title("Cutoff: "+str(cutoff))

