In [None]:
import numpy as np
import xarray as xr
from kalman_reconstruction import pipeline
from kalman_reconstruction import example_models
from kalman_reconstruction.custom_plot import (
    plot_state_with_probability,
    set_custom_rcParams,
)
import matplotlib.pyplot as plt

set_custom_rcParams()
plt.rcParams["figure.figsize"] = (8, 5)

In [None]:
from importlib import reload

reload(pipeline)

<module 'kalman_reconstruction.pipeline' from 'C:\\Users\\Niebaum\\Documents\\Repositories\\kalman-reconstruction-partially-observered-systems\\kalman_reconstruction\\pipeline.py'>

In [None]:
seed = 39264
variance = 5
nb_iter_SEM = 30
dt = 0.001
number_loops = 4
model_result = example_models.Lorenz_63_xarray(
    dt=dt, time_length=number_loops, time_steps=None
)

rng1 = np.random.default_rng(seed=seed)
rng2 = np.random.default_rng(seed=seed + 1)
rng3 = np.random.default_rng(seed=seed + 2)
rng4 = np.random.default_rng(seed=seed + 3)

In [None]:
data = dict()
result = dict()
smoothed = dict()

### Apply the Kalman SEM using latent variables 


#### 0 latent variables

In [None]:
new_name = "0_latent"

data[new_name] = model_result.copy()
# Run Klman_SEM
result[new_name] = pipeline.xarray_Kalman_SEM(
    ds=data[new_name],
    observation_variables=["x2", "x3"],
    state_variables=["x2", "x3"],
    nb_iter_SEM=nb_iter_SEM,
    variance_obs_comp=0.0001,
)

fig, ax = plt.subplots(1, 1)
for var in result[new_name].state_name:
    plot_state_with_probability(
        ax=ax,
        x_value=result[new_name].time,
        state=result[new_name].states.sel(state_name=var),
        prob=result[new_name].covariance.sel(state_name=var, state_name_copy=var),
        line_kwargs={"label": var.values},
    )

ax.legend()
# ax.set_xlim((0, 2))
ax.set_xlabel("time")
ax.set_ylabel("Values")
ax.set_title("Using multiple random latent variables");

100%|██████████| 30/30 [01:29<00:00,  2.98s/it]


#### 1 latent variables

In [None]:
old_name = "0_latent"
new_name = "1_latent"

data[new_name] = data[old_name].copy()
# Add latent variable
pipeline.add_random_variable(
    data[new_name], var_name="z1", random_generator=rng1, variance=variance
)
# Run Klman_SEM
result[new_name] = pipeline.xarray_Kalman_SEM(
    ds=data[new_name],
    observation_variables=["x2", "x3"],
    state_variables=["x2", "x3", "z1"],
    nb_iter_SEM=nb_iter_SEM,
    variance_obs_comp=0.0001,
)

fig, ax = plt.subplots(1, 1)
for var in result[new_name].state_name:
    plot_state_with_probability(
        ax=ax,
        x_value=result[new_name].time,
        state=result[new_name].states.sel(state_name=var),
        prob=result[new_name].covariance.sel(state_name=var, state_name_copy=var),
        line_kwargs={"label": var.values},
    )

ax.legend()
# ax.set_xlim((0, 2))
ax.set_xlabel("time")
ax.set_ylabel("Values")
ax.set_title("Using multiple random latent variables")

display(result[new_name].M)
display(result[new_name].Q)

100%|██████████| 30/30 [01:23<00:00,  2.77s/it]


In [None]:
display(result["1_latent"].M)
display(result["1_latent"].Q)

#### 2 latent variables

In [None]:
old_name = "1_latent"
new_name = "2_latent"

data[new_name] = data[old_name].copy()
data[new_name]["z1"] = result[old_name].states.sel(state_name="z1")
# Add latent variable
pipeline.add_random_variable(
    data[new_name], var_name="z2", random_generator=rng1, variance=variance
)
# Run Klman_SEM
result[new_name] = pipeline.xarray_Kalman_SEM(
    ds=data[new_name],
    observation_variables=["x2", "x3"],
    state_variables=["x2", "x3", "z1", "z2"],
    nb_iter_SEM=nb_iter_SEM,
    variance_obs_comp=0.0001,
)

fig, ax = plt.subplots(1, 1)
for var in result[new_name].state_name:
    plot_state_with_probability(
        ax=ax,
        x_value=result[new_name].time,
        state=result[new_name].states.sel(state_name=var),
        prob=result[new_name].covariance.sel(state_name=var, state_name_copy=var),
        line_kwargs={"label": var.values},
    )

ax.legend()
# ax.set_xlim((0, 2))
ax.set_xlabel("time")
ax.set_ylabel("Values")
ax.set_title("Using multiple random latent variables");

100%|██████████| 30/30 [01:26<00:00,  2.88s/it]


In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2)
mpl = axs[0].imshow(result["2_latent"].M)
fig.colorbar(ax=axs[0], mappable=mpl)
mpl = axs[1].imshow(result["2_latent"].Q)
fig.colorbar(ax=axs[1], mappable=mpl)

<matplotlib.colorbar.Colorbar at 0x14e4cd754e0>

In [None]:
display(result["2_latent"].M)
display(result["2_latent"].Q)

#### 3 latent variables

In [None]:
# old_name = "2_latent"
# new_name = "3_latent"

# data[new_name] = data[old_name].copy()
# data[new_name]["z2"] = result[old_name].states.sel(state_name="z2")
# # Add latent variable
# pipeline.add_random_variable(
#     data[new_name], var_name="z3", random_generator=rng1, variance=variance
# )
# # Run Klman_SEM
# result[new_name] = pipeline.xarray_Kalman_SEM(
#     ds=data[new_name],
#     observation_variables=["x2", "x3"],
#     state_variables=["x2", "x3", "z1", "z2", "z3"],
#     nb_iter_SEM=nb_iter_SEM,
#     variance_obs_comp=0.0001,
# )

# fig, ax = plt.subplots(1, 1)
# for var in result[new_name].state_name:
#     plot_state_with_probability(
#         ax=ax,
#         x_value=result[new_name].time,
#         state=result[new_name].states.sel(state_name=var),
#         prob=result[new_name].covariance.sel(state_name=var, state_name_copy=var),
#         line_kwargs={"label": var.values},
#     )

# ax.legend()
# #ax.set_xlim((0, 2))
# ax.set_xlabel("time")
# ax.set_ylabel("Values")
# ax.set_title("Using multiple random latent variables");

## Test the forecast skill
To test the forecast skill, we will create a test dataset starting from the end of the initial training data.

In [None]:
test_initial_condition = np.array(
    [model_result[var].isel(time=-1).values for var in ["x1", "x2", "x3"]]
)
test_initial_condition

array([-0.83611822, -1.77794423, 17.937048  ])

In [None]:
model_result_test = example_models.Lorenz_63_xarray(
    dt=dt,
    time_length=number_loops,
    time_steps=None,
    initial_condition=test_initial_condition,
)

In [None]:
data_test = dict()
kalman_test = dict()
smoother_test = dict()

Perform Kalman SEM

In [None]:
"""0 Latent"""

new_name = "0_latent"
data_test[new_name] = model_result_test.copy()
# Run Klman_SEM
kalman_test[new_name] = pipeline.xarray_Kalman_SEM(
    ds=data_test[new_name],
    observation_variables=["x2", "x3"],
    state_variables=["x2", "x3"],
    nb_iter_SEM=nb_iter_SEM,
    variance_obs_comp=0.0001,
)


"""1 Latent"""
old_name = "0_latent"
new_name = "1_latent"
# copy dataset
data_test[new_name] = data_test[old_name].copy()
# Add latent variable
pipeline.add_random_variable(
    data_test[new_name], var_name="z1", random_generator=rng1, variance=variance
)
# Run Klman_SEM
kalman_test[new_name] = pipeline.xarray_Kalman_SEM(
    ds=data_test[new_name],
    observation_variables=["x2", "x3"],
    state_variables=["x2", "x3", "z1"],
    nb_iter_SEM=nb_iter_SEM,
    variance_obs_comp=0.0001,
)

"""2 Latent"""
old_name = "1_latent"
new_name = "2_latent"
# copy dataset
data_test[new_name] = data_test[old_name].copy()
data_test[new_name]["z1"] = kalman_test[old_name].states.sel(state_name="z1")
# Add latent variable
pipeline.add_random_variable(
    data_test[new_name], var_name="z2", random_generator=rng1, variance=variance
)
# Run Klman_SEM
kalman_test[new_name] = pipeline.xarray_Kalman_SEM(
    ds=data_test[new_name],
    observation_variables=["x2", "x3"],
    state_variables=["x2", "x3", "z1", "z2"],
    nb_iter_SEM=nb_iter_SEM,
    variance_obs_comp=0.0001,
)

100%|██████████| 30/30 [01:50<00:00,  3.68s/it]


Perform Kalman smoother using the last timestep as start.
The ``M`` and ``Q`` Matrices from the ``Kalman_SEM`` are used.

In [None]:
idx = -1
"""0 Latent"""
H, R = pipeline._input_matrices_H_R_from_n_p(n=2, p=2)
smoother_test["0_latent"] = pipeline.xarray_Kalman_smoother(
    ds=data_test["0_latent"],
    state_variables=["x2", "x3"],
    observation_variables=["x2", "x3"],
    initial_covariance_matrix=kalman_test["0_latent"].covariance.isel(time=idx),
    M=kalman_test["0_latent"].M.values,
    Q=kalman_test["0_latent"].Q.values,
    H=H,
    R=R,
    estimation_idx=idx,
    dim="time",
)

"""1 Latent"""
idx = -1
H, R = pipeline._input_matrices_H_R_from_n_p(n=3, p=2)
smoother_test["1_latent"] = pipeline.xarray_Kalman_smoother(
    ds=data_test["1_latent"],
    state_variables=["x2", "x3", "z1"],
    observation_variables=["x2", "x3"],
    initial_covariance_matrix=kalman_test["1_latent"].covariance.isel(time=idx),
    M=kalman_test["1_latent"].M.values,
    Q=kalman_test["1_latent"].Q.values,
    H=H,
    R=R,
    estimation_idx=idx,
    dim="time",
)

"""2 Latent"""
H, R = pipeline._input_matrices_H_R_from_n_p(n=4, p=2)
smoother_test["2_latent"] = pipeline.xarray_Kalman_smoother(
    ds=data_test["2_latent"],
    state_variables=["x2", "x3", "z1", "z2"],
    observation_variables=["x2", "x3"],
    initial_covariance_matrix=kalman_test["2_latent"].covariance.isel(time=idx),
    M=kalman_test["2_latent"].M.values,
    Q=kalman_test["2_latent"].Q.values,
    H=H,
    R=R,
    estimation_idx=idx,
    dim="time",
)

It seems the order of the variabels is not fixed. Need to fix it

In [None]:
horizon_length = 750

res = kalman_test["2_latent"]
smo = smoother_test["2_latent"]
hor = xr.Dataset(coords=res.coords)
hor = hor.assign_coords(horizon=np.arange(horizon_length))
pipeline.add_empty_dataarrays(hor, res, new_dimension="horizon")
res = pipeline.expand_and_assign_coords(res, hor)
hor = hor.transpose("horizon", "time", "state_name", ...)
pipeline.assign_variables_by_double_selection(
    ds1=hor, ds2=res, select_dict1=dict(horizon=0), select_dict2=dict()
)

In [None]:
print(np.shape(res.M.values))
np.shape(hor.states.sel(horizon=horiz).values)

(4, 4)


(4000, 4)

In [None]:
for horiz in range(0, len(hor.horizon) - 1):
    if horiz == 0:
        temp = smo.state_smooth
        pipeline.assign_variable_by_double_selection(
            ds1=hor,
            da2=temp,
            var_name="states",
            select_dict1=dict(horizon=horiz),
            select_dict2=dict(),
        )

    temp = res.M.values @ hor.states.sel(horizon=horiz).values.T

    hor["states"][dict(horizon=horiz + 1)] = temp.T
    # pipeline.assign_variable_by_double_selection(
    #     ds1 = hor,
    #     da2= temp,
    #     var_name = "states",
    #     select_dict1=dict(horizon=horiz+1),
    #     select_dict2=dict())
    if horiz == 0:
        temp = smo.covariance_smooth
        pipeline.assign_variable_by_double_selection(
            ds1=hor,
            da2=temp,
            var_name="covariance",
            select_dict1=dict(horizon=horiz),
            select_dict2=dict(),
        )
    temp = (
        res.M.values @ hor.covariance.sel(horizon=horiz).values @ res.M.T.values
        + res.Q.values
    )
    hor["covariance"][dict(horizon=horiz + 1)] = temp
    # pipeline.assign_variable_by_double_selection(
    #     ds1 = hor,
    #     da2= temp,
    #     var_name = "covariance",
    #     select_dict1=dict(horizon=horiz+1),
    #     select_dict2=dict())

In [None]:
from kalman_reconstruction.custom_plot import plot_state_with_probability

varibales_to_plot = ["x2", "x3"]

fig, ax = plt.subplots(1, 1)
ax.plot(
    hor.time,
    hor.states.isel(horizon=0).sel(state_name=varibales_to_plot),
    label=hor.state_name.sel(state_name=varibales_to_plot).values,
)
for start_time in np.arange(100, 4000, 1000):
    for state in varibales_to_plot:
        plot_state_with_probability(
            ax=ax,
            x_value=(start_time + hor.horizon) * dt,
            state=hor.states.sel(state_name=state).isel(time=start_time),
            prob=hor.covariance.sel(state_name=state, state_name_copy=state).isel(
                time=start_time
            ),
            stds=0.64,
            line_kwargs=dict(color=[0.5, 0.5, 0.5]),
        )

ax.legend()
ax.set_ylim(-40, 40)
ax.set_ylabel("Values")
ax.set_xlabel("Time")

Text(0.5, 0, 'Time')

In [None]:
fig, ax = plt.subplots(1, 1)
start_time = 100
for state in ["x2", "x3"]:
    ax.plot(
        hor.horizon.isel(horizon=slice(0, 20)),
        hor.covariance.isel(horizon=slice(0, 20))
        .sel(state_name=state, state_name_copy=state)
        .isel(time=start_time),
        linestyle="-",
        marker="x",
    )

In [None]:
print(res.M.values @ hor.covariance.isel(time=100, horizon=1).values @ res.M.T.values)

[[ 1.02117966e-03  1.70488565e-05  3.52497384e-03  6.80641054e-02]
 [ 1.70488565e-05  1.06926073e-03 -7.25975583e-02  5.05067950e-03]
 [ 3.52497384e-03 -7.25975583e-02  6.41287217e+00  4.70059757e-03]
 [ 6.80641054e-02  5.05067950e-03  4.70059757e-03  6.00840327e+00]]


In [None]:
plt.plot(
    hor.covariance.isel(time=100)
    .sel(state_name=["x2"], state_name_copy=["x2"])
    .squeeze(),
    label="x2",
)
plt.plot(
    hor.covariance.isel(time=100)
    .sel(state_name=["x3"], state_name_copy=["x3"])
    .squeeze(),
    label="x3",
)
plt.legend()

<matplotlib.legend.Legend at 0x14e4afcb8b0>

In [None]:
plt.plot(hor.states.isel(horizon=0).sel(state_name=["x2"]).squeeze(), label="x2")
plt.plot(hor.states.isel(horizon=-1).sel(state_name=["x2"]).squeeze(), label="x2")
plt.plot(hor.states.isel(horizon=0).sel(state_name=["x3"]).squeeze(), label="x3")
plt.plot(hor.states.isel(horizon=-1).sel(state_name=["x3"]).squeeze(), label="x3")
plt.legend()

<matplotlib.legend.Legend at 0x14e62da3070>

In [None]:
result["0_latent"].M

KeyError: '0_latent'