In [None]:
from pathlib import Path
import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
from kalman_reconstruction.kalman import (
    Kalman_SEM,
)

In [None]:
PATH_FIGURES = Path("../results/figures/idealized_ocean")
SAVE_FIGURES = True


def save_fig(fig, relative_path, **kwargs):
    if SAVE_FIGURES:
        fig.savefig(PATH_FIGURES / relative_path, **kwargs)
    else:
        pass

In [None]:
plt.rcParams["figure.figsize"] = (10, 8)
# Set default matplotlib style
MEDIUM_SIZE = 12
BIGGER_SIZE = 15
plt.style.use("seaborn-v0_8-whitegrid")
# plt.style.use('dark_background')

plt.rcParams["figure.figsize"] = (10.0, 6.0)
plt.rc("font", size=MEDIUM_SIZE)  # controls default text sizes
plt.rc("axes", titlesize=BIGGER_SIZE)  # fontsize of the axes title
plt.rc("axes", labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels
plt.rc(
    "axes.spines",
    **dict(
        left=True,
        right=False,
        bottom=True,
        top=False,
    )
)
# fontsize of the x and y labels
plt.rc("xtick", labelsize=MEDIUM_SIZE)  # fontsize of the tick labels
plt.rc("ytick", labelsize=MEDIUM_SIZE)  # fontsize of the tick labels
plt.rc("legend", fontsize=MEDIUM_SIZE)  # legend fontsize
plt.rc("figure", titlesize=BIGGER_SIZE)  # fontsize of the figure title
plt.rc("legend", loc="upper right")
# use colorblind save colors https://davidmathlogic.com/colorblind
colors = [
    "#CC6677",
    "#6E9CB3",
    "#CA8727",
    "#44AA99",
    "#AA4499",
    "#D6BE49",
    "#A494F5",
]
plt.rcParams["axes.prop_cycle"] = plt.cycler(color=colors)


def plot_colors(colors):
    fig, axs = plt.subplots(figsize=(5, 1))
    for idx, color in enumerate(colors):
        axs.scatter(idx, 1, c=color, s=300)

    axs.set_yticks([])
    return fig, axs

## Idealized ocean - reconstruct hidden component

The idea is to use two simplified ocean models:
1. "Sponge" Ocean which is only made up of a single layer which has a restoring timescale.
2. "Oscillator" Ocean which is made up of two layer (surface and deep ocean) which have an oscillation timescale.

The idea is to use the Methodology of retrieving hidden compontents in a Dynamical system to idemtify if hidden components exist.

**Experiment Set-Up**
1. Use *Surface Air Temperature* and *Sponge Sea Surface Temperature* in the Algorithm.
2. Use an additional random initialized component *$z_1$* in the Algorithm.

Different models:
1. *Sponge ocean*: Use *Surface Air Temperature* and *Sponge Sea Surface Temperature* in the Algorithm. Validate if the retireved component *$z_1$* gives any improvements.
2. *Sponge ocean*: Use *Surface Air Temperature* and *Oscillator Sea Surface Temperature* in the Algorithm. Do not use *Oscillator Deep Ocean Temperature*. Validate if the retireved component *$z_1$* is connected to hidden components.



### Load data from netcdf file

In [None]:
data = xr.open_dataset("..//data/idealized_ocean_model.nc")
data = data.drop("sea_surface_temperature")
for var in ["sponge_sea_surface_temperature", "oscillator_sea_surface_temperature"]:
    data[var].plot(x="time_years", label=var)
plt.legend()

In [None]:
def dataset_variables_to_array(ds, variables=["sea_surface_temperature"]):
    result = []
    for var in variables:
        if var not in ds.variables:
            raise KeyError(f"Variable '{var}' is not a key of the provided DatasSet.")
        result.append(ds[var])

    # check that all array have the same length
    it = iter(result)
    first_length = len(next(it))
    if not all(len(l) == first_length for l in it):
        raise ValueError("not all lists have same length!")

    # return a numpy array from this
    return np.array(result).swapaxes(0, 1)

## Time independent reconstruction

### Important parameters

In [None]:
# index of the unobserved component
i_unobs_comp = 0

# variance of the random white noise of z
variance_unobs_comp = 1

# variance of the observation error used in Kalman
variance_obs_comp = 0.0001

# number of SEM iterations
nb_iter_SEM = 30

## Use the Sponge ocean

#### Only using the known variables

In [None]:
# state
variables = ["surface_air_temperature", "sponge_sea_surface_temperature"]
state_array_true = dataset_variables_to_array(data, variables=variables)
time = data.time_years

In [None]:
# state
y = state_array_true.copy()
z = np.random.normal(loc=0, scale=variance_unobs_comp, size=np.shape(y)[0])
x = y.copy()

tab_labels = variables.copy()

# shapes
n = np.shape(x)[1]
p = np.shape(y)[1]

# colors and labels of the components
# plot the components
fig, axs = plt.subplots(nrows=1, ncols=1)
for idx in range(n):
    axs.plot(time, state_array_true[:, idx], label=tab_labels[idx])
axs.set_xlabel("years")
axs.set_ylabel("Temperature in K")
axs.legend(loc=1)
axs.set_title("Sponge Ocean Observed components")
fig.tight_layout()
save_fig(fig, "sponge-observed")

In [None]:
# kalman parameters
H = np.eye(n)
R = variance_obs_comp * np.eye(p)

# stochastic EM
# x_s_V0, P_s_V0, M_V0, loglik_V0, x, x_f_V0, Q_V0 = Kalman_SEM(x, y, H, R, nb_iter_SEM)

x_s_V0, P_s_V0, M_V0, loglik_V0, x, x_f_V0, Q_V0 = Kalman_SEM(x, y, H, R, nb_iter_SEM)

In [None]:
fig, axs = plt.subplots(1, 1)
for i in range(n):
    axs.plot(time, x_s_V0[:, i], color=colors[i], label=tab_labels[i])
    axs.fill_between(
        time,
        x_s_V0[:, i] - 1.96 * np.sqrt(P_s_V0[:, i, i]),
        x_s_V0[:, i] + 1.96 * np.sqrt(P_s_V0[:, i, i]),
        facecolor=colors[i],
        alpha=0.30,
    )
axs.set_xlabel("years")
axs.set_ylabel("Temperature in K")
axs.legend(loc=1)
axs.set_title("Sponge Ocean Kalman result")
fig.tight_layout()
save_fig(fig, "sponge-kalman")

In [None]:
kwargs = dict(
    linewidth=1,
    alpha=0.3,
    marker=".",
)
fig, axs = plt.subplots(ncols=2, nrows=2)
for idx, var in enumerate(variables):
    data_range = np.max(data[var]) - np.min(data[var])
    axs[idx][0].plot(data[var], x_s_V0[:, idx], **kwargs)
    axs[idx][0].scatter(data[var], x_s_V0[:, idx], **kwargs)
    axs[idx][1].plot(time[5:], (data[var] - x_s_V0[:, idx])[5:], **kwargs)
    # set titles
    axs[idx][0].set_title(f"{var}")
    axs[idx][1].set_title(f"{var}")
    # set labels
    axs[idx][0].set_xlabel("truth")
    axs[idx][0].set_ylabel("reconstruction")
    axs[idx][1].set_xlabel("time")
    axs[idx][1].set_ylabel("difference")

fig.tight_layout()
save_fig(fig, "sponge-correlation")

### Introducing random variable $z_1$

In [None]:
tab_labels = list(variables)
tab_labels.append(r"$z_1 $ = Random$(\mathcal{N}(0,\sigma^2))$")

# state
y = state_array_true.copy()
z = np.random.normal(loc=0, scale=variance_unobs_comp, size=np.shape(y)[0])
x = np.array([y[:, 0], y[:, 1], z]).T

# shapes
n = np.shape(x)[1]
p = np.shape(y)[1]

In [None]:
fig, axs = plt.subplots(1, 1)
for i in range(n):
    axs.plot(time, x[:, i], color=colors[i], label=tab_labels[i])
axs.legend(loc=1)
axs.set_xlabel("years")
axs.set_ylabel("Temperature in K")
axs.legend(loc=1)
axs.set_title(r"Sponge Ocean include $z_1$")
fig.tight_layout()
save_fig(fig, "sponge-z1-observations")

In [None]:
# kalman parameters
H = np.delete(np.eye(n), 2, axis=0)
R = variance_obs_comp * np.eye(p)

# stochastic EM
x_s_V1, P_s_V1, M_V1, loglik_V1, x, x_f_V1, Q_V1 = Kalman_SEM(x, y, H, R, nb_iter_SEM)

In [None]:
fig, axs = plt.subplots(1, 1)
for i in range(n):
    axs.plot(time, x_s_V1[:, i], color=colors[i], label=tab_labels[i])
    axs.fill_between(
        time,
        x_s_V1[:, i] - 1.96 * np.sqrt(P_s_V1[:, i, i]),
        x_s_V1[:, i] + 1.96 * np.sqrt(P_s_V1[:, i, i]),
        facecolor=colors[i],
        alpha=0.30,
    )

axs.legend(loc=1)
axs.set_xlabel("years")
axs.set_ylabel("Temperature in K")
axs.legend(loc=1)
axs.set_title(r"Sponge Ocean include $z_1$ kalman results")
fig.tight_layout()
save_fig(fig, "sponge-z1-kalman")

In [None]:
kwargs = dict(
    linewidth=1,
    alpha=0.3,
    marker=".",
)
fig, axs = plt.subplots(ncols=2, nrows=2)
for idx, var in enumerate(variables):
    data_range = np.max(data[var]) - np.min(data[var])
    axs[idx][0].plot(data[var], x_s_V0[:, idx], **kwargs)
    axs[idx][0].scatter(data[var], x_s_V0[:, idx], **kwargs)
    axs[idx][0].set_title(f"{var}")
    axs[idx][1].plot(time[5:], (data[var] - x_s_V0[:, idx])[5:], **kwargs)
    axs[idx][1].set_title(f"{var}")

    # set labels
    axs[idx][0].set_xlabel("truth")
    axs[idx][0].set_ylabel("reconstruction")
    axs[idx][1].set_xlabel("time")
    axs[idx][1].set_ylabel("difference")


fig.suptitle(r"Sponge Ocean include $z_1$ kalman results")
fig.tight_layout()
save_fig(fig, "sponge-z1-correlation")

## Use the Ossilator ocean

In [None]:
# state
variables = ["surface_air_temperature", "oscillator_sea_surface_temperature"]
state_array_true = dataset_variables_to_array(data, variables=variables)
time = data.time_years

In [None]:
# state
y = state_array_true.copy()
z = np.random.normal(loc=0, scale=variance_unobs_comp, size=np.shape(y)[0])
x = y.copy()

tab_labels = variables.copy()

# shapes
n = np.shape(x)[1]
p = np.shape(y)[1]

# colors and labels of the components
# plot the components
fig, axs = plt.subplots(1, 1)
for idx in range(n):
    axs.plot(time, state_array_true[:, idx], label=tab_labels[idx])
axs.set_xlabel("years")
axs.set_ylabel("Temperature in K")
axs.legend(loc=1)
axs.set_title("Ossilator ocean observed components")


fig.tight_layout()
save_fig(fig, "oscillator-observations-observations")

In [None]:
# kalman parameters
H = np.eye(n)
R = variance_obs_comp * np.eye(p)

# stochastic EM
# x_s_V0, P_s_V0, M_V0, loglik_V0, x, x_f_V0, Q_V0 = Kalman_SEM(x, y, H, R, nb_iter_SEM)

x_s_V0, P_s_V0, M_V0, loglik_V0, x, x_f_V0, Q_V0 = Kalman_SEM(x, y, H, R, nb_iter_SEM)

In [None]:
fig, axs = plt.subplots(1, 1)
for i in range(n):
    axs.plot(time, x_s_V0[:, i], color=colors[i], label=tab_labels[i])
    axs.fill_between(
        time,
        x_s_V0[:, i] - 1.96 * np.sqrt(P_s_V0[:, i, i]),
        x_s_V0[:, i] + 1.96 * np.sqrt(P_s_V0[:, i, i]),
        facecolor=colors[i],
        alpha=0.30,
    )

axs.set_xlabel("years")
axs.set_ylabel("Temperature in K")
axs.legend(loc=1)
axs.set_title("Ossilator ocean kalman results")


fig.tight_layout()
save_fig(fig, "oscillator-kalman")

In [None]:
kwargs = dict(
    linewidth=1,
    alpha=0.3,
    marker=".",
)
fig, axs = plt.subplots(ncols=2, nrows=2)
for idx, var in enumerate(variables):
    data_range = np.max(data[var]) - np.min(data[var])
    axs[idx][0].plot(data[var], x_s_V0[:, idx], **kwargs)
    axs[idx][0].scatter(data[var], x_s_V0[:, idx], **kwargs)
    axs[idx][0].set_title(f"{var}")
    axs[idx][1].plot(time[5:], (data[var] - x_s_V0[:, idx])[5:], **kwargs)
    axs[idx][1].set_title(f"{var}")

    # set labels
    axs[idx][0].set_xlabel("truth")
    axs[idx][0].set_ylabel("reconstruction")
    axs[idx][1].set_xlabel("time")
    axs[idx][1].set_ylabel("difference")

fig.suptitle(r"Oscilltor Ocean correlation")
fig.tight_layout()
save_fig(fig, "oscillator-correlation")

### Introducing random variable $z_1$

In [None]:
tab_labels = list(variables)
tab_labels.append(r"$z_1$ = Random $(\mathcal{N}(0,\sigma^2))$")

# state
y = state_array_true.copy()
z = np.random.normal(loc=y.mean(), scale=variance_unobs_comp, size=np.shape(y)[0])
x = np.array([y[:, 0], y[:, 1], z]).T

# shapes
n = np.shape(x)[1]
p = np.shape(y)[1]

In [None]:
fig, axs = plt.subplots(1, 1)
for i in range(n):
    axs.plot(time, x[:, i], color=colors[i], label=tab_labels[i])
axs.legend(loc=1)
axs.set_xlabel("years")
axs.set_ylabel("Temperature in K")
axs.set_title(r"Oscillator Ocean include $z_1$")
fig.tight_layout()
save_fig(fig, "oscillator-z1-observations")

In [None]:
# kalman parameters
H = np.delete(np.eye(n), 2, axis=0)
R = variance_obs_comp * np.eye(p)

# stochastic EM
x_s_V1, P_s_V1, M_V1, loglik_V1, x, x_f_V1, Q_V1 = Kalman_SEM(x, y, H, R, nb_iter_SEM)

In [None]:
fig, axs = plt.subplots(1, 1)
for i in range(n):
    plt.plot(time, x_s_V1[:, i], color=colors[i], label=tab_labels[i])
    plt.fill_between(
        time,
        x_s_V1[:, i] - 1.96 * np.sqrt(P_s_V1[:, i, i]),
        x_s_V1[:, i] + 1.96 * np.sqrt(P_s_V1[:, i, i]),
        facecolor=colors[i],
        alpha=0.30,
    )

axs.legend(loc=1)
axs.set_xlabel("years")
axs.set_ylabel("Temperature in K")
axs.legend(loc=1)
axs.set_title(r"Oscillator Ocean include $z_1$ kalman results")
fig.tight_layout()
save_fig(fig, "oscillator-z1-kalman")

In [None]:
fig, axs = plt.subplots(1, 1)
for i in [n - 1]:
    axs.plot(time, x_s_V1[:, i], color=colors[n - 1], label=tab_labels[i])
    axs.fill_between(
        time,
        x_s_V1[:, i] - 1.96 * np.sqrt(P_s_V1[:, i, i]),
        x_s_V1[:, i] + 1.96 * np.sqrt(P_s_V1[:, i, i]),
        facecolor=colors[i],
        alpha=0.30,
    )
axs.plot(
    time,
    data.oscillator_deep_ocean_temperature,
    label="oscillator_deep_ocean_temperature",
    color=colors[n],
)
axs.legend()
axs.legend(loc=1)
axs.set_xlabel("years")
axs.set_ylabel("Temperature in K")

fig.suptitle(
    r"Oscilltor Ocean include $z_1$"
    + "\nCompare Deep Ocean Temperature and $z_1$ kalman results"
)
fig.tight_layout()
save_fig(fig, "oscilltor-z1-comparison")

In [None]:
kwargs = dict(
    linewidth=None,
    alpha=0.3,
    marker=".",
)

vars_compare = list(variables) + ["oscillator_deep_ocean_temperature"]

fig, axs = plt.subplots(ncols=2, nrows=3, figsize=(10, 8))
for idx, var in enumerate(vars_compare):
    data_range = np.max(data[var]) - np.min(data[var])
    axs[idx][0].plot(data[var], x_s_V1[:, idx], **kwargs)
    axs[idx][0].set_title(f"{var}")
    axs[idx][0].set_xlabel("truth")
    axs[idx][0].set_ylabel("reconstruction")

    axs[idx][1].plot(time[5:], (data[var] - x_s_V1[:, idx])[5:], **kwargs)
    axs[idx][1].set_title(f"{var}")
    axs[idx][1].set_xlabel("time")
    axs[idx][1].set_ylabel("difference")


fig.suptitle(r"Oscilltor Ocean include $z_1$ correlation")
fig.tight_layout()
save_fig(fig, "oscilltor-z1-correlation")