# Mutual information estimates

In [17]:
# Custom code imports
from generate_time_series import (load_two_body_problem_time_series,
                                  load_belousov_zhabotinsky_time_series,
                                  load_lorenz_attractor_time_series)

from datasets import (chop_time_series_into_chunks,
                      split_chunks_into_windows_and_targets)

In [18]:
# Standard code imports
from typing import Tuple
import numpy as np
import numpy.typing
NDArray = numpy.typing.NDArray[np.floating]

In [65]:
# Helpers
def time_series_2_windows_and_targets(time_series: NDArray,
                                      window_len: int = 10,
                                      target_len: int = 1,
                                      reverse: bool = False) -> Tuple[NDArray, NDArray]:
    chunks = chop_time_series_into_chunks(time_series,
                                          chunk_len=window_len+target_len,
                                          reverse=reverse,
                                          take_each_nth_chunk=3)
    windows, targets = split_chunks_into_windows_and_targets(chunks, target_len=target_len)
    return windows, targets


def flatten_last_dim(array: NDArray) -> NDArray:
    assert array.ndim >= 2
    return array.reshape((*array.shape[:-2], -1))

## `sklearn.feature_selection.mutual_info_regression`

In [32]:
# # This function only works with discrete inputs (handy for categorization/clusterization).
# # It is unusable for the float (continuous) vectors we are dealing with here.
# from sklearn.metrics import mutual_info_score

In [21]:
# See https://www.blog.trainindata.com/mutual-information-with-python/
from sklearn.feature_selection import mutual_info_regression

In [66]:
def calculate_mutual_info_for_dataset(ts: NDArray, dim: int = 0) -> Tuple[NDArray, NDArray]:
    assert 0 <= dim < ts.shape[1]

    forward_windows, forward_targets = time_series_2_windows_and_targets(ts)
    backward_windows, backward_targets = time_series_2_windows_and_targets(ts, reverse=True)

    # Each window or target is two-dimensional. I extract just one dimension
    backward_windows = backward_windows[:, :, dim]
    forward_windows = forward_windows[:, :, dim]
    # ... and assume target_len=1, so take the 0-th point in target.
    forward_targets = forward_targets[:, 0, dim]
    backward_targets = backward_targets[:, 0, dim]
    # Note: `mutual_info_regression` only accepts 1-dimensional y's.
    # So I'm forced to pick only one dimension from targets, although I could
    # flatten the windows instead of extracting one dimension from it.

    return (mutual_info_regression(forward_windows, forward_targets),
            mutual_info_regression(backward_windows, backward_targets))

In [67]:
def print_mutual_info(ts: NDArray, comment: str) -> None:
    forward, backward = calculate_mutual_info_for_dataset(ts)
    print(comment, "forward", forward)
    print(comment, "backward", backward)

In [68]:
print_mutual_info(load_two_body_problem_time_series(), "kepler")
print()
print_mutual_info(load_belousov_zhabotinsky_time_series(), "belousov_zhabotinsky")
print()
print_mutual_info(load_lorenz_attractor_time_series(), "lorenz")

kepler forward [4.1544507  4.1679777  4.19014217 4.23306892 4.40380346 4.52204556
 4.6513439  4.83707335 5.07009635 5.38225062]
kepler backward [4.1544507  4.16829816 4.19438907 4.23299843 4.40411548 4.52240252
 4.65179145 4.83874348 5.06217402 5.38334711]

belousov_zhabotinsky forward [5.10297078 5.11591333 5.12894595 5.15271997 5.2606767  5.40564618
 5.4406048  5.48620959 5.54904934 5.63826422]
belousov_zhabotinsky backward [5.10354369 5.11591333 5.12937203 5.1472664  5.23061426 5.39968754
 5.42965822 5.4778498  5.53905855 5.6281999 ]

lorenz forward [1.10491599 1.18068127 1.28136143 1.39081549 1.52221169 1.68715116
 1.89150051 2.17183271 2.57736067 3.26623382]
lorenz backward [1.10105144 1.18418257 1.28136143 1.38803675 1.51959223 1.68800725
 1.90041139 2.17751784 2.57858673 3.25964205]


The numbers about are the same, within reasonable accuracy, for `forward` and `backward`.
This is not what we expect.

## gregversteeg/NPEET

In [16]:
# Install the module from GitHub
!git clone https://github.com/gregversteeg/NPEET.git

Cloning into 'NPEET'...
remote: Enumerating objects: 129, done.[K
remote: Counting objects: 100% (42/42), done.[K
remote: Compressing objects: 100% (22/22), done.[K
remote: Total 129 (delta 21), reused 35 (delta 19), pack-reused 87[K
Receiving objects: 100% (129/129), 317.14 KiB | 868.00 KiB/s, done.
Resolving deltas: 100% (55/55), done.


In [53]:
# The module's suggested installation method doesn't work,
# so we just find the right source file in the directory tree.
from NPEET.npeet import entropy_estimators

In [69]:
def calculate_mutual_info_for_dataset(ts: NDArray) -> Tuple[NDArray, NDArray]:
    forward_windows, forward_targets = time_series_2_windows_and_targets(ts)
    backward_windows, backward_targets = time_series_2_windows_and_targets(ts, reverse=True)

    # Each window or target is two-dimensional, so I flatten them.
    backward_windows = flatten_last_dim(backward_windows)
    forward_windows = flatten_last_dim(forward_windows)
    backward_targets = flatten_last_dim(backward_targets)
    forward_targets = flatten_last_dim(forward_targets)

    return (entropy_estimators.mi(forward_windows, forward_targets),
            entropy_estimators.mi(backward_windows, backward_targets))

In [70]:
calculate_mutual_info_for_dataset(load_two_body_problem_time_series())

(8.074050021439163, 8.047722644831461)

In [71]:
calculate_mutual_info_for_dataset(load_lorenz_attractor_time_series())

(9.368706642394997, 9.357729704269742)

In [72]:
calculate_mutual_info_for_dataset(load_belousov_zhabotinsky_time_series())

(7.951617233018499, 8.087579162293846)