In [None]:
from pathlib import Path
import numpy as np
import xarray as xr
import itertools
import matplotlib.pyplot as plt

from kalman_reconstruction import pipeline
from kalman_reconstruction.custom_plot import (
    set_custom_rcParams,
    adjust_lightness,
    handler_map_alpha,
)
from kalman_reconstruction.statistics import (
    normalize,
)
from reconstruct_climate_indices.idealized_ocean import AMO_oscillatory_ocean
from reconstruct_climate_indices.track_data import track_model
from tqdm import tqdm

from mlflow import (
    end_run,
    log_artifact,
    log_params,
    set_tracking_uri,
    start_run,
)
import yaml

set_custom_rcParams()
plt.rcParams["figure.figsize"] = (8, 8)
colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]

In [None]:
SUBDATA_PATH = "AMO_oscillator_parameter_experiments"
PATH_FIGURES = Path("../results/AMO_oscillator_parameter_experiments")
SAVE_FIGURES = True


def save_fig(fig, relative_path, **kwargs):
    store_path = PATH_FIGURES / relative_path
    store_path.parent.mkdir(parents=True, exist_ok=True)
    if SAVE_FIGURES:
        fig.savefig(store_path, **kwargs)
    else:
        pass

In [None]:
def product_dict(**kwargs):
    keys = kwargs.keys()
    for instance in itertools.product(*kwargs.values()):
        yield dict(zip(keys, instance))

## Settings

#### Kalman Settings

In [None]:
# seed for the randomnumber generator
seed = 39266
# Varaince of the randomly initialized latent variable
random_variance = 1
# itterations of the kalman SEM
nb_iter_SEM = 30
# observation variables
observation_variables = ["AMO", "NAO", "EAP"]
# state variables
state_variables = ["AMO", "NAO", "EAP", "latent"]

# create the dictonary that shall be used ot store the kalman_settings in the mlflow tracking
kalman_settings = dict(
    RandomNumberGeneratorSeed=seed,
    RandomVariance=random_variance,
    NumberKalmanIteration=nb_iter_SEM,
    ObservartionVariables=observation_variables,
    StateVariables=state_variables,
)
# positional args for the kalman_SEM algorithm
func_args = dict()
# key word args for the kalman_SEM algorithm
func_kwargs = dict(
    observation_variables=observation_variables,
    state_variables=state_variables,
    nb_iter_SEM=nb_iter_SEM,
)

# Random number generators used to create the latent varibale.
rng1 = np.random.default_rng(seed=seed)
# rng2 = np.random.default_rng(seed=seed + 1)
# rng3 = np.random.default_rng(seed=seed + 2)
# rng4 = np.random.default_rng(seed=seed + 3)

#### Experiment settings

The model used is the ``AMO_oscillatory_ocean``. The parameters ``dNAO`` and ``dEAP`` will be changed.

In [None]:
default_settings = dict(
    nt=1000,  # timesteps
    dt=30,  # days
    per0=24 * 365.25,  # days
    tau0=10 * 365.25,  # days
    dNAO=0.1,
    dEAP=0.1,
    cNAOvsEAP=0,
)


modified_arguments = ["dNAO", "dEAP"]
factors = np.array([0.1, 0.5, 1, 5, 10])

### Run the experiments:

#### Create all datasets by running the model function ``AMO_oscillatory_ocean``.

The results will be combined into a single Dataset

In [None]:
# create all the experiment setups
experiment_setups = dict()
for key in modified_arguments:
    experiment_setups[key] = np.round(default_settings[key] * factors, 5)

# all experiment settings are made up by the all combinations of the experiment setups
experiment_settings = list(product_dict(**experiment_setups))

data_list = []
setting = default_settings.copy()
# we will not track each individual model run.
for s in tqdm(experiment_settings):
    # update the settings with the current set from the experiment settings.
    setting.update(**s)
    # integrate the model and store the output xr.Dataset
    data = AMO_oscillatory_ocean(**setting)
    data_list.append(data)
# merge all output Dataset into a single Dataset
experiments = xr.merge(data_list)
experiments

100%|██████████| 25/25 [00:00<00:00, 53.79it/s]


#### Run the ``xarray_Kalman_SEM`` function from the ``pipeline`` library.

The ``run_function_on_multiple_subdatasets`` function allows to run the input function on all ``subdatasets`` specified by the ``subdataset_selections``. In this case these selections are given by the ``experiment_settings``.

In [None]:
input_kalman = experiments.copy()
pipeline.add_random_variable(
    ds=input_kalman, var_name="latent", random_generator=rng1, variance=random_variance
)
experiments_kalman = pipeline.run_function_on_multiple_subdatasets(
    processing_function=pipeline.xarray_Kalman_SEM,
    parent_dataset=input_kalman,
    subdataset_selections=experiment_settings,
    func_args=func_args,
    func_kwargs=func_kwargs,
)

 33%|███▎      | 10/30 [00:04<00:07,  2.71it/s]

### Track the experiment using ``mlflow``
Using mlflow, the following information will be stored: 
- Dataset containing the results from the ``AMO_oscillatory_ocean`` for all experiment settings.
- Dataset containing the results from the ``xarray_Kalman_SEM`` for all experiment settings.
- Settings to create the different Model runs using (``AMO_oscillatory_ocean``).
- Settings used by the ``xarray_Kalman_SEM``.

Therefor multiple setting for mlflow will need to be set by the User:
- ExperimentID : Corresponds to the experiment_id used by ``mlflow`` to set the ``set_tracking_uri``.
- SubdataPath : Name of the directory in which to store the results. This will be a child of the ``data`` directory. 
- MlflowPath : Name of the directory in which the mlflow tracking uri shall be used.
- NOTE: 
    - Make sure that the RepoPath is correct! 
    - Make sure that ExperimentID exists! 

The folder structure will be :

**Folder structure**

    └───RepoPath
        └───data
            └───SubdataPath
                └───run_id
                    │    run_id_input.nc
                    │    run_id_kalman.nc
                    │    run_id_kalman_settings.yml
                    │    run_id_parameter_settings.yml
Where ``run_id`` is e.g. *553cbd3bc6ce44028c8daad12647c306*


In [None]:
ExperimentID = 665803199114752138
SubdataPath = "parameter-experiments-storage"
MlflowPath = "mlruns"
ThisPath = Path(".").resolve()
RepoPath = ThisPath.parent
print(RepoPath)

C:\Users\Niebaum\Documents\Repositories\reconstruct-climate-indices


In [None]:
# prepare the parameter_settings to indlude all arrays used in the experiment_setup
parameter_settings = dict()
parameter_settings.update(default_settings)
parameter_settings.update(experiment_setups)
for key in parameter_settings:
    try:
        parameter_settings[key] = parameter_settings[key].tolist()
    except:
        pass
# set the tracking_uri
set_tracking_uri(RepoPath / MlflowPath)
with start_run(experiment_id=ExperimentID) as run:
    # retrieve the run_id
    run_id = run.info.run_id

    # Create Paths to the corresponding directories names
    DataPath = RepoPath / "data"
    SubdataPath = DataPath / SubdataPath / f"{run_id}"
    SubdataPath.mkdir(parents=True, exist_ok=True)

    # Create file names to store the  different settings
    ParameterSettingsPath = SubdataPath / f"{run_id}_parameter_settings.yml"
    KalmanSettingsPath = SubdataPath / f"{run_id}_kalman_settings.yml"
    InputFile = SubdataPath / f"{run_id}_input.nc"
    KalmanFile = SubdataPath / f"{run_id}_kalman.nc"

    # log all settings and file locations.
    log_params(kalman_settings)
    log_params(parameter_settings)
    log_params(
        dict(
            ParameterSettingsFile=ParameterSettingsPath.relative_to(
                RepoPath
            ).as_posix(),
            KalmanSettingsFile=KalmanSettingsPath.relative_to(RepoPath).as_posix(),
            InputFile=InputFile.relative_to(RepoPath).as_posix(),
            KalmanFile=KalmanFile.relative_to(RepoPath).as_posix(),
        )
    )

    # ---- Save Files ----
    experiments.to_netcdf(InputFile)
    experiments_kalman.to_netcdf(KalmanFile)
    with open(ParameterSettingsPath, "w") as yaml_file:
        yaml.dump(parameter_settings, yaml_file, default_flow_style=False)
    with open(KalmanSettingsPath, "w") as yaml_file:
        yaml.dump(kalman_settings, yaml_file, default_flow_style=False)

    # log artifact of the settings
    log_artifact(ParameterSettingsPath.as_posix())
    log_artifact(KalmanSettingsPath.as_posix())

end_run()

NameError: name 'default_settings' is not defined

# Reload the data from filepaths

In [None]:
experiments = xr.open_dataset(
    r"../data/parameter-experiments-storage/154b763486044d4189d2742b6ba89b65/14172c48432a4e578196aba0e61ddf82/14172c48432a4e578196aba0e61ddf82_input.nc"
)
experiments_kalman = xr.open_dataset(
    r"../data/parameter-experiments-storage/154b763486044d4189d2742b6ba89b65/14172c48432a4e578196aba0e61ddf82/14172c48432a4e578196aba0e61ddf82_kalman.nc"
)
experiments_kalman_states = pipeline.from_standard_dataset(experiments_kalman)

In [None]:
fig, axs = plt.subplots(
    nrows=len(experiments.dNAO), ncols=len(experiments.dEAP), figsize=(15, 15)
)
for i, dNAO in tqdm(enumerate(experiments.dNAO)):
    for j, dEAP in enumerate(experiments.dEAP):
        axs[i, j].plot(
            experiments.time_years,
            experiments["AMO"].sel(dNAO=dNAO, dEAP=dEAP),
            label="AMO",
        )
        axs[i, j].plot(
            experiments.time_years,
            experiments["ZOT"].sel(dNAO=dNAO, dEAP=dEAP),
            label="ZOT",
        )
        axs[i, j].set_title(f"dNAO: {dNAO:.2f}, dEAP: {dEAP:.2f}")
        axs[i, j].set_ylabel("value")
        axs[i, j].set_xlabel("years")
        axs[i, j].legend()

fig.suptitle("Deterministic variables | Variations of dNAO and dEAP")


save_fig(fig, "svgs\deterministic-evolution.svg")
save_fig(fig, "deterministic-evolution.png", dpi=400)

5it [00:00,  6.55it/s]


In [None]:
fig, axs = plt.subplots(
    nrows=len(experiments.dNAO), ncols=len(experiments.dEAP), figsize=(15, 15)
)
for i, dNAO in tqdm(enumerate(experiments.dNAO)):
    for j, dEAP in enumerate(experiments.dEAP):
        axs[i, j].plot(
            experiments["AMO"].sel(dNAO=dNAO, dEAP=dEAP),
            experiments["ZOT"].sel(dNAO=dNAO, dEAP=dEAP),
            linestyle="-",
            linewidth=0.5,
            marker=".",
            alpha=0.7,
        )
        axs[i, j].set_title(f"dNAO: {dNAO:.2f}, dEAP: {dEAP:.2f}")
        axs[i, j].set_ylabel("ZOT")
        axs[i, j].set_xlabel("AMO")

fig.suptitle("ZOT and AMO relation | Variations of dNAO and dEAP")


save_fig(fig, "svgs\deterministic-relation.svg")
save_fig(fig, "deterministic-relation.png", dpi=400)

5it [00:00, 16.77it/s]


In [None]:
fig, axs = plt.subplots(
    nrows=len(experiments.dNAO), ncols=len(experiments.dEAP), figsize=(15, 15)
)
for i, dNAO in tqdm(enumerate(experiments.dNAO)):
    for j, dEAP in enumerate(experiments.dEAP):
        axs[i, j].plot(
            experiments.time_years,
            experiments["NAO"].sel(dNAO=dNAO, dEAP=dEAP),
            label="NAO",
            alpha=0.7,
        )
        axs[i, j].plot(
            experiments.time_years,
            experiments["EAP"].sel(dNAO=dNAO, dEAP=dEAP),
            label="EAP",
            alpha=0.7,
        )
        axs[i, j].set_title(f"dNAO: {dNAO:.2f}, dEAP: {dEAP:.2f}")
        axs[i, j].set_ylabel("value")
        axs[i, j].set_xlabel("years")
        axs[i, j].legend()

fig.suptitle("Stochstic variables | Variations of dNAO and dEAP")


save_fig(fig, "svgs\stochastic-evolution.svg")
save_fig(fig, "stochastic-evolution.png", dpi=400)

5it [00:00,  9.14it/s]


In [None]:
experiments_kalman_states = pipeline.from_standard_dataset(experiments_kalman, "states")

In [None]:
fig, axs = plt.subplots(
    nrows=len(experiments_kalman_states.dNAO),
    ncols=len(experiments_kalman_states.dEAP),
    figsize=(15, 15),
)
for i, dNAO in tqdm(enumerate(experiments_kalman_states.dNAO)):
    for j, dEAP in enumerate(experiments_kalman_states.dEAP):
        reconst = experiments_kalman.sel(dNAO=dNAO, dEAP=dEAP, method="nearest")
        axs[i, j].plot(reconst["kalman_itteration"], reconst["log_likelihod"])
        axs[i, j].set_ylabel("log_likelihod")
        axs[i, j].set_xlabel("kalman itteration")

fig.suptitle("Log Likelihood | Variation of dNAO and dEAP")


save_fig(fig, "svgs\loglikelihood.svg")
save_fig(fig, "loglikelihood.png", dpi=400)

5it [00:00,  6.34it/s]


In [None]:
fig, axs = plt.subplots(
    nrows=len(experiments_kalman_states.dNAO),
    ncols=len(experiments_kalman_states.dEAP),
    figsize=(15, 15),
    layout="constrained",
)
handles = dict()
for i, dNAO in tqdm(enumerate(experiments_kalman_states.dNAO)):
    for j, dEAP in enumerate(experiments_kalman_states.dEAP):
        time_years = experiments.time_years
        truth = experiments.sel(dNAO=dNAO, dEAP=dEAP, method="nearest")
        reconst = experiments_kalman_states.sel(dNAO=dNAO, dEAP=dEAP, method="nearest")
        # plot AMO
        handles["AMO truth"] = axs[i, j].plot(
            time_years,
            normalize(truth["AMO"], method="mean"),
            label="AMO truth",
            alpha=0.7,
        )
        try:
            # set same color as in AMO turth but darker
            color = adjust_lightness(handles["AMO truth"][0].get_color())
            handles["AMO"] = axs[i, j].plot(
                time_years,
                normalize(reconst["AMO"], method="mean"),
                label="AMO",
                color=color,
                alpha=0.7,
            )
        except Exception as e:
            pass
        # plot ZOT
        handles["ZOT truth"] = axs[i, j].plot(
            time_years,
            normalize(truth["ZOT"], method="mean"),
            label="ZOT truth",
            alpha=0.7,
        )
        try:
            # set same color as in AMO turth but darker
            color = adjust_lightness(handles["ZOT truth"][0].get_color())
            handles["ZOT"] = axs[i, j].plot(
                time_years,
                normalize(reconst["ZOT"], method="mean"),
                label="ZOT",
                color=color,
                alpha=0.7,
            )
        except Exception as e:
            pass
        # plot latent
        handles["latent"] = axs[i, j].plot(
            time_years,
            normalize(reconst["latent"], method="mean"),
            label="latent",
            alpha=0.7,
        )

        axs[i, j].set_title(f"dNAO: {dNAO:.2f}, dEAP: {dEAP:.2f}")
        axs[i, j].set_ylabel("value")
        axs[i, j].set_xlabel("years")

# create a flat list from the handles dict
handles = list(itertools.chain.from_iterable(handles.values()))

fig.suptitle("Deterministic Variables KalmanSEM result | Variation of dNAO and dEAP")
fig.legend(
    handles=handles,
    loc=7,
    markerscale=3,
)

# save_fig(fig, "svgs\deterministic-evolution-kalman.svg")
# save_fig(fig, "deterministic-evolution-kalman.png", dpi = 400)

5it [00:01,  4.44it/s]


<matplotlib.legend.Legend at 0x2e300518d30>

In [None]:
experiments_kalman_states = normalize(experiments_kalman_states)
fig, axs = plt.subplots(
    nrows=len(experiments_kalman_states.dNAO),
    ncols=len(experiments_kalman_states.dEAP),
    figsize=(15, 15),
)
handles = dict()
for i, dNAO in tqdm(enumerate(experiments_kalman_states.dNAO)):
    for j, dEAP in enumerate(experiments_kalman_states.dEAP):
        truth = experiments.sel(dNAO=dNAO, dEAP=dEAP, method="nearest")
        reconst = experiments_kalman_states.sel(dNAO=dNAO, dEAP=dEAP, method="nearest")
        # plot NAO
        handles["NAO truth"] = axs[i, j].plot(
            time_years,
            normalize(truth["NAO"], method="mean"),
            label="NAO truth",
            alpha=0.7,
        )
        try:
            # set same color as in AMO turth but darker
            color = adjust_lightness(handles["AMO truth"][0].get_color())
            handles["ANO"] = axs[i, j].plot(
                time_years,
                normalize(reconst["NAO"], method="mean"),
                label="NAO",
                color=color,
                alpha=0.7,
            )
        except Exception as e:
            pass
        # plot  EAP
        handles["EAP truth"] = axs[i, j].plot(
            time_years,
            normalize(truth["EAP"], method="mean"),
            label="EAP truth",
            alpha=0.7,
        )
        try:
            # set same color as in AMO turth but darker
            color = adjust_lightness(handles["EAP truth"][0].get_color())
            handles["EAP"] = axs[i, j].plot(
                time_years,
                normalize(reconst["EAP"], method="mean"),
                label="EAP",
                color=color,
                alpha=0.7,
            )
        except Exception as e:
            pass
        # plot latent
        handles["latent"] = axs[i, j].plot(
            time_years,
            normalize(reconst["latent"], method="mean"),
            label="latent",
            alpha=0.7,
        )

        axs[i, j].set_title(f"dNAO: {dNAO:.2f}, dEAP: {dEAP:.2f}")
        axs[i, j].set_ylabel("value")
        axs[i, j].set_xlabel("years")
        # axs[i,j].legend()

axs[i, j].legend()

fig.suptitle("Stochastic Variables KalmanSEM result | Variation of dNAO and dEAP")


# save_fig(fig, "svgs\deterministic-evolution-kalman.svg")
# save_fig(fig, "stochastic-evolution-kalman.png", dpi = 400)

5it [00:02,  1.77it/s]


Text(0.5, 0.98, 'Stochastic Variables KalmanSEM result | Variation of dNAO and dEAP')

In [None]:
experiments_kalman_states = normalize(experiments_kalman_states)
fig, axs = plt.subplots(
    nrows=len(experiments_kalman_states.dNAO),
    ncols=len(experiments_kalman_states.dEAP),
    figsize=(15, 15),
    layout="constrained",
)
handles = dict()
for i, dNAO in tqdm(enumerate(experiments_kalman_states.dNAO)):
    for j, dEAP in enumerate(experiments_kalman_states.dEAP):
        reconst = normalize(
            experiments_kalman_states.sel(dNAO=dNAO, dEAP=dEAP, method="nearest")
        )
        truth = normalize(data.sel(dNAO=dNAO, dEAP=dEAP, method="nearest"))
        for k, state in enumerate(observation_variables):
            handles[f"{k}"] = axs[i, j].scatter(
                reconst[state],
                truth[state],
                marker=".",
                alpha=0.5,
                label=state,
            )
        axs[i, j].set_title(f"dNAO: {dNAO:.2f}, dEAP: {dEAP:.2f}")
        axs[i, j].set_xlabel("truth")
        axs[i, j].set_ylabel("kalman")
        # axs[i,j].legend()

# create a flat list from the handles dict
handles = handles.values()

fig.suptitle("Truth against KalmanSEM result | Variation of dNAO and dEAP")
fig.legend(
    handles=handles,
    loc=7,
    markerscale=3,
)

# save_fig(fig, "svgs\deterministic-evolution-kalman.svg")
save_fig(fig, "Truth-against-KalmanSEM-result.png", dpi=400)

0it [00:00, ?it/s]


In [None]:
experiments_kalman_states = normalize(experiments_kalman_states)
fig, axs = plt.subplots(
    nrows=len(experiments_kalman_states.dNAO),
    ncols=len(experiments_kalman_states.dEAP),
    figsize=(15, 15),
    layout="constrained",
)
for i, dNAO in tqdm(enumerate(experiments_kalman_states.dNAO)):
    for j, dEAP in enumerate(experiments_kalman_states.dEAP):
        reconst = normalize(
            experiments_kalman_states.sel(dNAO=dNAO, dEAP=dEAP, method="nearest")
        )
        truth = normalize(experiments.sel(dNAO=dNAO, dEAP=dEAP, method="nearest"))
        for k, state in enumerate(experiments.data_vars):
            corr = xr.corr(truth[state], reconst["latent"])
            axs[i, j].scatter(
                truth[state],
                reconst["latent"],
                marker=".",
                alpha=np.abs(corr.values),
                label=f"{state} : {corr:.2f}",
            )
        axs[i, j].set_title(f"dNAO: {dNAO:.2f}, dEAP: {dEAP:.2f}")
        axs[i, j].set_xlabel("truth")
        axs[i, j].set_ylabel("latent | kalman")
        axs[i, j].legend(
            markerscale=3,
            handler_map=handler_map_alpha(),
        )


fig.suptitle("Truth against Latent Variable | Variation of dNAO and dEAP")
# save_fig(fig, "svgs\deterministic-evolution-kalman.svg")
save_fig(fig, "Truth-against-LatentVariable-result.png", dpi=400)

5it [00:09,  1.81s/it]


NameError: name 'save_fig' is not defined