# Lorenz 96 problem setup for estimating spatially dependent forcing and algorithm testing
By: Rebecca Gjini 


In [None]:
#Import statements
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numba
from numba import jit, njit
from scipy.linalg import sqrtm
from scipy.optimize import least_squares
import main.EnsembleKalmanAlgorithms as EKA
import main.l96.mL96_model as ml96
from cycler import cycler

#Plot preferences from palettable.colorbrewer.qualitative import Set1_9
from palettable.colorbrewer.qualitative import Dark2_8

#Plot Preferences 
plt.rcParams['figure.figsize'] = [11.0, 9.0] #size (w, h)
plt.rcParams['figure.dpi'] = 80
plt.rcParams['savefig.dpi'] = 400

plt.rcParams['font.size'] = 18 # controls default text sizes
plt.rcParams['legend.fontsize'] = 'large' # legend fontsize
plt.rcParams['figure.titlesize'] = 'large' # fontsize of the figure title  
plt.rcParams['axes.titlesize'] = 18     # fontsize of the axes title
plt.rcParams['axes.labelsize'] = 32   # fontsize of the x and y labels 35
plt.rcParams['xtick.labelsize'] = 20    # fontsize of the tick labels 17
plt.rcParams['ytick.labelsize'] = 20   # fontsize of the tick labels 17
plt.rcParams['axes.spines.right'] = False #makes right line in plot disappear
plt.rcParams['axes.spines.top'] = False #makes top line in plot disappear
plt.rcParams["font.family"] = "Times New Roman"

plt.rcParams['axes.prop_cycle'] = cycler(color=Dark2_8.mpl_colors)

In [None]:
file_path = "main/l96/mp/data/"
#initialize random seed
np.random.seed(2)

#Creating my sythetic data
#initalize model variables
nx = 40  #dimensions of parameter vector
gamma = 8 + 6*np.sin((4*np.pi*np.arange(0, nx, 1))/nx)  #forcing 

t = 0.01  #time step
T_long = 1000  #total time 

#beginning state
int_state = np.random.uniform(0,1,nx)

# plt.figure()
# plt.plot(np.arange(0,40,1), gamma)
# plt.plot(np.arange(40,80,1), gamma)
# plt.show()

In [None]:
#Find the initial condition for my data
spin_up_array = ml96.runge_kutta_v(gamma, int_state, t, T_long)
#intital condition used for the data
x0 = spin_up_array[-1]
# np.savetxt(file_path + 'x0.txt', x0, delimiter = ',') 
print(x0)

In [None]:
#Creating my sythetic data
window = 50
T = 4  + window # total time units, with 4 unit cut off
ny = nx*2   #number of data points

model_out_y = ml96.G(gamma, x0, t, T, np.zeros((nx, nx)))

#Data covariance
T_cov = 40004
RKgam = ml96.runge_kutta_v(gamma, x0, t, T_cov)[int(4/t):] 
gam = np.zeros((ny,int((T_cov - 4)/window)))
for i in range(0, T_cov - 4, window):
    ii = int(i/window)
    gam[:nx,ii] = np.mean(RKgam[int(i/t):int((i+window)/t)], axis = 0)
    gam[nx:,ii] = np.sqrt(np.var(RKgam[int(i/t):int((i+window)/t)], axis = 0, ddof = 1))

R = 2*np.cov(gam)

R_sqrt_in = EKA.matrix_inv_sqrt(R)
R_sqrt = EKA.matrix_sqrt(R)

#Observations y
y = model_out_y
print(y)

pl = 2
psig = 3
# Prior covariance
B = np.zeros((nx,nx))
for ii in range(0, nx):
    for jj in range(0, nx): 
        B[ii, jj] = psig**2 * np.exp(-np.abs(ii - jj)/pl)
B_sqrt = EKA.matrix_sqrt(B)

# Prior mean
mu = 8*np.ones(nx) #Prior mean

# Solving for initial condition perturbation covariance
covT = 2000  #time to simulate to calculate a covariance matrix of the system
cov_solve = ml96.runge_kutta_v(gamma, x0, t, covT)
ic_cov = 0.1*np.cov(cov_solve.T)
ic_cov_sqrt =  EKA.matrix_sqrt(ic_cov)

# Save problem data, prior, and covarainces 
# np.savetxt(file_path + 'y.txt', y, delimiter = ',') 
# np.savetxt(file_path + 'R.txt', R, delimiter = ',')
# np.savetxt(file_path + 'mu.txt', mu, delimiter = ',')
# np.savetxt(file_path + 'B.txt', B, delimiter = ',')
# np.savetxt(file_path + 'ic_cov_sqrt.txt', ic_cov_sqrt, delimiter = ',')

plt.figure()
plt.imshow(B)
plt.show()


In [None]:
def loss(gammas, x_0s, ts, Ts, ic_cov_sqrts, ys, R_sqrt_ins, mus, B_sqrt_ins):
    data_model = np.sqrt(2*(0.5*np.linalg.norm(R_sqrt_ins@(ys - ml96.G(gammas, x_0s
                                                               ,ts, Ts, ic_cov_sqrts)))**2)/len(y))
    prior = 0.5*np.linalg.norm(B_sqrt_ins@(mus - gammas))**2
    return data_model #+ prior

B_sqrt_in = EKA.matrix_inv_sqrt(B)
#y = model_out_y + R_sqrt@np.random.normal(0, 1, size = (len(model_out_y)))

truth = loss(gamma, x0, t, T, 0*ic_cov_sqrt, y, R_sqrt_in, mu, B_sqrt_in)

#Plotting in different planes of the cost function
indices = np.arange(0, 20, 2)
for ii in indices: 
    gamma_vals = np.arange(-5, 5, 0.1) + gamma[ii]
    cost_function = np.zeros(len(gamma_vals))
    for jj in range(0, len(gamma_vals)): 
        gamma_j = np.concatenate((gamma[:ii], [gamma_vals[jj]], gamma[ii + 1:]))
        cost_function[jj] = loss(gamma_j, x0, t, T, 0*ic_cov_sqrt, y, 
                                 R_sqrt_in, mu, B_sqrt_in)
        
    plt.figure()
    plt.plot(gamma_vals, cost_function, linewidth = 3)
    plt.axvline(x = gamma[ii], linewidth = 3, linestyle = '--', c = 'dimgrey')
    #plt.scatter(gamma[ii], truth, c = 'b', s = 100)
    #plt.title('Cost function of rho')
    plt.ylabel('Loss')
    plt.xlabel('$\\Gamma$ at index = %d' %ii)
    plt.show()


In [None]:
#Intitializing EKI ensemble
K = 60       #number of ensemble members

max_runs = 20   #set a maximum number of runs 

N_t = nx         #we estimate the forcing

u = np.random.normal(0, 1, size = (N_t,K))

In [None]:
#TEKI Test 
teki_u, teki_f, _ = EKA.TEKI(ml96.G, u, (x0, t, T, ic_cov_sqrt), 
                          y, R, mu, B, min_rmse = 1, method = 'rmse', 
                             tol_x = 1e-4, tol_f = 1e-4, max_iter = max_runs)
print(teki_f)
ft = ml96.G(np.mean(teki_u, axis = 1), x0, t, T, ic_cov_sqrt)
np.sqrt((np.linalg.norm(R_sqrt_in@(y - ft))**2)/len(y))

In [None]:
plt.figure()
plt.plot(np.mean(teki_u, axis = 1))
plt.show()

In [None]:
#UKI Test
uki_u, uki_f, _ = EKA.UKI(ml96.G, (x0, t, T, ic_cov_sqrt), 
                         y, R, mu, B, min_rmse = 1.0,  method = 'rmse', 
                       tol_x = 1e-4, tol_f = 1e-4, max_iter = max_runs)
print(uki_f)
fu = ml96.G(np.mean(uki_u, axis = 1), x0, t, T, ic_cov_sqrt)
np.sqrt((np.linalg.norm(R_sqrt_in@(y - fu))**2)/len(y))

In [None]:
etki_u, etki_f, exit = EKA.ETKI(ml96.G, u, (x0, t, T, ic_cov_sqrt), 
                          y, R, mu, B, min_rmse = 1.0, method = 'all', 
                          tol_x = 1e-4, tol_f = 1e-4, max_iter = max_runs)
print(exit)
print(etki_f)
fe = ml96.G(np.mean(etki_u, axis = 1), x0, t, T, ic_cov_sqrt)
np.sqrt((np.linalg.norm(R_sqrt_in@(y - fe))**2)/len(y))

In [None]:
iekf_u, iekf_f, exit = EKA.IEKF(ml96.G, u, (x0, t, T, ic_cov_sqrt), 
                          y, R, mu, B, alpha = 1, min_rmse = 1.0, 
                           method = 'rmse', tol_x = 1e-4, tol_f = 1e-4, max_iter = max_runs)
print(iekf_f)
print(exit)
fg = ml96.G(np.mean(iekf_u, axis = 1), x0, t, T, ic_cov_sqrt)
np.sqrt((np.linalg.norm(R_sqrt_in@(y - fg))**2)/len(y))

In [None]:
solution = least_squares(ml96.r, u[:,0], args=(x0, t, T, ic_cov_sqrt, y, R_sqrt_in, mu, B_sqrt), method = 'lm', 
                         xtol = 1e-8, ftol=1e-08)
print(solution.nfev)
print(B_sqrt@solution.x + mu)
print(solution.status)
ft = ml96.G(B_sqrt@solution.x + mu, x0, t, T, 0*ic_cov_sqrt)
print('RMSE:', np.sqrt((np.linalg.norm(R_sqrt_in@(y - ft))**2)/ny))
# print(B_sqrt@u[:,0] + mu)

In [None]:
FI = np.arange(0,nx,1)
plt.figure()
plt.plot(FI, gamma, label = 'true forcing', c = 'black', linewidth = 2)
plt.plot(FI, np.mean(teki_u, axis = 1), label = 'teki')
plt.plot(FI, np.mean(uki_u, axis = 1), label = 'uki')
plt.plot(FI, np.mean(etki_u, axis = 1), label = 'etki')
plt.plot(FI, np.mean(iekf_u, axis = 1), label = 'iekf')
plt.plot(FI, B_sqrt@solution.x + mu, label = 'lmfd')
plt.xlabel('Data point')
plt.ylabel('Model output')
plt.legend()
plt.show()