# Demo

This notebook is provided to describe the usage of pydpac using a one-dimensional toy model known as Lorenz96 ([Lorenz 1995](https://www.ecmwf.int/node/10829), [Lorenz and Emanuel 1998](https://doi.org/10.1175/1520-0469%281998%29055%3C0399:OSFSWO%3E2.0.CO;2)) described by the following equation.
$$
\frac{\mathrm{d}X_j}{\mathrm{d}t} = (X_{j+1} - X_{j-2})X_{j-1} - X_j + F
$${#eq-l96}
where $j=1,\cdots,40$ is a grid index and $X_0 = X_{40}$. 

## Available DA algorithms

A data assimilation (DA) algorithm can be chosen from the followings.

- Deterministic DA
    * Kalman filter ([Kalman 1960](https://doi.org/10.1115/1.3662552))
    * 3-dimensional, 4-dimensional variational method (3DVar, 4DVar; [Talagrand and Courtier 1987](https://doi.org/10.1002/qj.49711347812))
- Ensemble DA
    * Ensemble Kalman Filter ([Evensen 1994](https://doi.org/10.1029/94JC00572))
        + Ensemble transform Kalman filter (ETKF; [Bishop et al. 2001](https://doi.org/10.1175/1520-0493%282001%29129%3C0420:ASWTET%3E2.0.CO;2))
        + Perturbed observation method (PO; [Burgers et al. 1998](https://doi.org/10.1175/1520-0493%281998%29126%3C1719:ASITEK%3E2.0.CO;2), [Houtekamer et al.2005](https://doi.org/10.1175/MWR-2864.1))
        + Serial ensemble square root filter (EnSRF; [Whitaker and Hamill 2002](https://doi.org/10.1175/1520-0493%282002%29130%3C1913:EDAWPO%3E2.0.CO;2))
        + Ensemble adjustment Kalman filter (EAKF, local least-square formulation; [Anderson 2003](https://doi.org/10.1175/1520-0493%282003%29131<0634:ALLSFF>2.0.CO;2))
        + Local ensemble transform Kalman filter (LETKF; [Hunt et al. 2007](https://doi.org/10.1016/j.physd.2006.11.008))
    * Maximum likelihood ensemble filter (MLEF; [Zupanski 2005](https://doi.org/10.1175/MWR2946.1), [Zupanski et al. 2008](https://doi.org/10.1002/qj.251))
    * Ensemble variational method (EnVar; [Liu et al. 2008](https://doi.org/10.1175/2008MWR2312.1))

## Requirements

- numpy for the model and DA
- pandas for error statistics
- matplotlib for plots


In [1]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from analysis.obs import Obs

In [None]:
# model parameter
from model.lorenz import L96

global nx, F, dt

nx = 40     # number of points
F  = 8.0    # forcing
dt = 0.05 / 6  # time step (=1 hour)

# forecast model forward operator
step = L96(nx, dt, F)

In [3]:
# experimental parameter
params = dict()
params["t0off"] =   8 # initial offset between adjacent members
params["t0c"] =    500 # initial time (t0) for control
params["nt"] =     6 # number of step per forecast (=6 hours)
params["na"] =   100 # number of analysis
params["namax"] = 1460 # max number of analysis (1 year)

In [4]:
# observation settings
params["nobs"] = 40 # number of observation (less than or equal to nx)
## op = observation type
params["op"] = "linear"
## observation error
sigma = {"linear": 1.0, "quadratic": 8.0e-1, "cubic": 7.0e-2, \
    "quadratic-nodiff": 8.0e-1, "cubic-nodiff": 7.0e-2, "test":1.0}
ftype = {"mlef":"ensemble","etkf":"ensemble",\
    "po":"ensemble","srf":"ensemble","eakf":"ensemble","letkf":"ensemble",\
        "kf":"deterministic","var":"deterministic","var4d":"deterministic"}

In [10]:
# DA settings
## pt = DA type
params["pt"] = "mlef"
params["nmem"] =   40 # ensemble size
params["linf"] = True # inflation switch (True=Apply, False=Not apply)
params["infl_parm"] = 1.1 # multiplicative inflation parameter
params["lloc"] = False # localization switch (True=Apply, False=Not apply)
params["iloc"] = 0 # None -> No localization
                   # = 0  -> R-localization
                   # = 1  -> EVD of localized Pf
                   # = 2  -> Modulated ensemble
params["lsig"] = 8.0 # localization radius
params["ltlm"] = False  # tangent linear operator switch (True=Use, False=Not use)
params["a_window"] = 1 # assimilation window length (for 4-dimensional DA)
params["sigb"] = 0.6 # (for var & 4dvar) background error standard deviation
params["lb"]         = -1.0     # (For var & 4dvar) correlation length for background error covariance

In [None]:
from main import get_daclass, main
from exp_func import Exp_func
# module setting
global op, pt, ft
op = params["op"]
pt = params["pt"]
ft = ftype[pt]
params["ft"] = ft

# observation operator
obs = Obs(op, sigma[op])

# import DA class
analysis = get_daclass(params, step, obs, "l96")

# load experimental tools
func = Exp_func("l96",step,obs,params,save_data=False)

In [None]:
# get truth and make observation
xt, yobs = func.get_true_and_obs()
# initialize all variables
u, xa, xf, pa = func.initialize()
pf = analysis.calc_pf(u, cycle=0)

# analysis-forecast cycle timelot 
na = params["na"]
a_window = params["a_window"]
a_time = range(0, na, a_window)
e = np.zeros(na) # RMSE between truth and analysis
stda = np.zeros(na) # analysis error standard deviation
for i in a_time:
    # read observation
    yloc = yobs[i:i+a_window,:,0]
    y = yobs[i:i+a_window,:,1]
    # analysis
    if pt[:2] == "4d": # assimilate observations at different time (4-dimensional)
        u, pa = analysis(u, pf, y, yloc, cycle=i)
    else: # assimilate observations at a time
        u, pa = analysis(u, pf, y[0], yloc[0], icycle=i)
    # save analysis (mean)
    if ft=="ensemble":
        if pt == "mlef" or pt == "4dmlef":
            xa[i] = u[:, 0]
        else:
            xa[i] = np.mean(u, axis=1)
    else:
        xa[i] = u 
    if i < na-1:
        # forecast
        if a_window > 1: # 4-dimensional
            um, uf = func.forecast(u)
            if (i+1+a_window <= na):
                xa[i+1:i+1+a_window] = um
                xf[i+1:i+1+a_window] = um
                ii = 0
                for k in range(i+1,i+1+a_window):
                    if pt=="4dvar":
                        stda[k] = np.sqrt(np.trace(pa)/nx)
                    else:
                        patmp = analysis.calc_pf(uf[ii], pa=pa, cycle=k)
                        stda[k] = np.sqrt(np.trace(patmp)/nx)
                    ii += 1
            else:
                xa[i+1:na] = um[:na-i-1]
                xf[i+1:na] = um[:na-i-1]
                ii = 0
                for k in range(i+1,na):
                    if pt=="4dvar":
                        stda[k] = np.sqrt(np.trace(pa)/nx)
                    else:
                        patmp = analysis.calc_pf(uf[ii], pa=pa, cycle=k)
                        stda[k] = np.sqrt(np.trace(patmp)/nx)
                    ii += 1
            u = uf[-1]
            um = um[-1]
            pf = analysis.calc_pf(u, pa=pa, cycle=i+1)
        else:
            um, u = func.forecast(u)
            pf = analysis.calc_pf(u, pa=pa, cycle=i+1)
        # save forecast (mean)
        xf[i+1] = um
    # calcurate RMSE & spread
    if a_window > 1: # 4-dimensional
        for k in range(i, min(i+a_window,na)):
            e[k] = np.sqrt(np.mean((xa[k, :] - xt[k, :])**2))
    else:
        e[i] = np.sqrt(np.mean((xa[i, :] - xt[i, :])**2))
    stda[i] = np.sqrt(np.trace(pa)/nx)

In [None]:
obs_s = sigma[op]
t = np.arange(params["na"]) + 1
y = np.ones(t.size) * obs_s
fig, ax = plt.subplots()
ax.plot(t, e, label='RMSE')
ax.plot(t, stda, ls='dashed', label='STDA')
ax.plot(t, y, linestyle="dotted", color="black", label="observation error")
ax.set(xlabel="DA cycle", title=pt+" "+op)
ax.set_xticks(t[::10])
ax.set_xticks(t[::20],minor=True)
ax.legend()
plt.show()

In [None]:
from matplotlib.colors import Normalize
x = np.arange(nx)
fig, axs = plt.subplots(ncols=3,sharey=True,constrained_layout=True)
vmin = -10
vmax = 10
p0=axs[0].pcolormesh(x, t, xt, shading='auto', cmap='coolwarm', norm=Normalize(vmin=vmin,vmax=vmax))
axs[0].set_title('truth')
p1=axs[1].pcolormesh(x, t, xa, shading='auto', cmap='coolwarm', norm=Normalize(vmin=vmin,vmax=vmax))
axs[1].set_title('analysis')
fig.colorbar(p1,ax=axs[1])
p2=axs[2].pcolormesh(x, t, xa-xt, shading='auto', cmap='coolwarm', norm=Normalize(vmin=-2.0,vmax=2.0))
axs[2].set_title('error')
fig.colorbar(p2,ax=axs[2])
for ax in axs:
    ax.set_xticks(x[::(nx//8)])
    ax.set_yticks(t[::(na//8)])
    ax.set_ylim(t[-1],t[0])
    ax.set_xlabel('site')
axs[0].set_ylabel('DA cycle')
plt.show()