# Inference of causal graphs from data

Author: Marcell Stippinger

Date: 2025-11-07

## Contents

* Generate logistic map data
* Implement time delay embedding
* Do convergent cross-mapping

## Imports and plotting functions

In [None]:
import numpy as np
import pandas as pd
from scipy import stats, signal
from sklearn.utils import check_random_state
from scipy.spatial import cKDTree
from typing import NamedTuple, Optional, Tuple, Any
# Granger causality test
from statsmodels.tsa.stattools import grangercausalitytests
# Visualization
import matplotlib.pyplot as plt

In [None]:
def plot_ts(X, E=None, fs=1.0):
    """Plot time series data.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        Time series data to plot.
    E : array-like, shape (n_samples, n_features), optional
        Noise components to overlay on the time series.
    fs : float, optional
        Sampling frequency of the time series.
    """
    n_samples, n_features = X.shape
    t = np.arange(n_samples) / fs
    fig, axes = plt.subplots(n_features, 1, figsize=(6, 4), sharex=True)
    if n_features == 1:
        axes = [axes]
    for i in range(n_features):
        axes[i].axhline(0, color='gray', linestyle='--', linewidth=0.5)
        axes[i].plot(t, X[:, i])
        if E is not None:
            axes[i].plot(t, E[:, i], linestyle='None', marker='o', markersize=3, alpha=0.7, label='Noise')
        axes[i].set_title(f'Time Series {i+1}')
        axes[i].set_ylabel('Value')
    axes[-1].set_xlabel('Time')
    plt.tight_layout()
    plt.show()

## Autocorrelograms

In [None]:
def plot_autocorrelograms(x: np.ndarray, lags: int = 40):
    """Plot ACF and PACF of a time series.

    Parameters
    ----------
    x : array-like, shape (n_samples,)
        Time series data.
    lags : int
        Number of lags to include in the plots.
    """
    acf_vals = acf(x, nlags=lags)
    pacf_vals = pacf(x, nlags=lags)

    fig, axes = plt.subplots(2, 1, figsize=(6, 4))

    axes[0].stem(range(lags + 1), acf_vals)
    axes[0].set_title('Autocorrelation Function (ACF)')
    axes[0].set_xlabel('Lags')
    axes[0].set_ylabel('ACF')

    axes[1].stem(range(lags + 1), pacf_vals)
    axes[1].set_title('Partial Autocorrelation Function (PACF)')
    axes[1].set_xlabel('Lags')
    axes[1].set_ylabel('PACF')

    plt.tight_layout()
    plt.show()

## Granger causality test

We can follow, for example

Ding, M., Chen, Y., & Bressler, S. L. (2006). Granger Causality: Basic Theory and Application to Neuroscience. February. https://doi.org/10.1002/9783527609970.ch17

In [None]:
# Y -> X
#gr_yx = grangercausalitytests(coupled_ts, maxlag=4)

In [None]:
# X -> Y
#coupled_ts_rev = np.stack((coupled_ts[:, 1], coupled_ts[:, 0]), axis=1)
#gr_xy = grangercausalitytests(coupled_ts[:, ::-1], maxlag=4)
#gr_xy = grangercausalitytests(coupled_ts_rev, maxlag=4)

Explain the results

- which tests are significant
- for what lag

# Coupled logistic maps

A single logistic map is generated by
$$ x(t+1) = r x(t) (1- x(t)) $$

The interaction between logmaps may be additive
$$ x(t+1) = r x(t) (1- x(t)) - \beta y(t) $$
or multiplicative
$$ x(t+1) = r x(t) (1- x(t) - \beta y(t)). $$

We have to make sure the new value is in $[0, 1]$ therefore we take it modulo $1$.

In [None]:
def _logmap_mul_rhs_modulo(state, r, beta, noise):
    """
    Mapping function of logistic map with circular boundary conditions and
    multiplicative coupling

    :param state: [X1, X2, ..., Xn], shape (dim, )
    :param r: float or array of floats, shape (n, )
    :param beta: c_{j-->i} == beta_{ij}, array of floats, shape (n, n)
    :param noise: callable, dynamical noise generator
    :return: f
    """
    return np.remainder(noise + r * state * (1.0 - state - beta @ state), 1.0)


def _logmap_add_rhs_modulo(state, r, beta, noise):
    """
    Mapping function of logistic map with circular boundary conditions and
    additive coupling

    :param state: [X1, X2, ..., Xn], shape (dim, )
    :param r: float or array of floats, shape (n, )
    :param beta: c_{j-->i} == beta_{ij}, array of floats, shape (n, n)
    :param noise: callable, dynamical noise generator
    :return: f
    """
    return np.remainder(noise + r * state * (1.0 - state) - beta @ state, 1.0)

def generate_coupled_logmaps(
        n_samples: int,
        r: Any,
        beta: np.ndarray,
        noise_std: float = 0.0,
        coupling_type: str = 'additive',
        random_state: Optional[Any] = None
    ) -> np.ndarray:
    """
    Generate time series from coupled logistic maps with circular boundary conditions.

    :param n_samples: Number of time steps to simulate.
    :param r: Growth rate parameter(s) for the logistic maps.
    :param beta: Coupling matrix, diagonal empty, shape (n, n).
    :param noise_std: Standard deviation of the Gaussian noise.
    :param coupling_type: Type of coupling ('additive' or 'multiplicative').
    :param random_state: Random state for reproducibility.
    :return: Time series data, shape (n_samples, n).
    """
    rng = check_random_state(random_state)
    n = beta.shape[0]
    state = rng.rand(n)
    ts = np.zeros((n_samples, n))

    if coupling_type == 'additive':
        rhs = _logmap_add_rhs_modulo
    elif coupling_type == 'multiplicative':
        rhs = _logmap_mul_rhs_modulo
    else:
        raise ValueError("coupling_type must be 'additive' or 'multiplicative'")

    for t in range(n_samples):
        noise = rng.normal(0, noise_std, size=n)
        ts[t] = state
        state = rhs(state, r, beta, noise)

    return ts

In [None]:
# x0 -> x1 avagy x -> y

coupling = np.array([[0.0, 0.2],
                     [0.0, 0.0]])

ts = generate_coupled_logmaps(
    n_samples=1000,
    r=3.8,
    beta=coupling.T,
    coupling_type='additive',
    random_state=20251107,
    noise_std=0.1
)

plot_ts(ts)


# Time delay embedding is based on Takens' theorem.

We may use numpy's sliding_window_view to create the time delay embedding. (This is good for us, but not efficient for convolution.)

In [None]:
help(np.lib.stride_tricks.sliding_window_view)

In [None]:
def time_delay_embedding(X: np.ndarray, m: int, tau: int) -> np.ndarray:
    """
    Perform time-delay embedding of a time series.

    :param X: Time series data, shape (n_samples, n_features).
    :param m: Embedding dimension.
    :param tau: Time delay.
    :return: Embedded time series, shape (n_samples - (m - 1) * tau, n_features * m).
    """
    n_samples, n_features = X.shape
    n_embedded = n_samples - (m - 1) * tau
    embedded = np.zeros((n_embedded, n_features * m))

    for i in range(n_embedded):
        for j in range(m):
            embedded[i, j*n_features:(j+1)*n_features] = X[i + j * tau]

    return embedded

In [None]:
def time_delay_embedding_np(X: np.ndarray, m: int, tau: int) -> np.ndarray:
    """
    Perform time-delay embedding of a time series.

    :param X: Time series data, shape (n_samples, n_features).
    :param m: Embedding dimension.
    :param tau: Time delay.
    :return: Embedded time series, shape (n_samples - (m - 1) * tau, n_features * m).
    """
    n_samples, n_features = X.shape
    n_embedded = n_samples - (m - 1) * tau
    embedded = np.zeros((n_embedded, n_features * m))

    embedded = np.lib.stride_tricks.sliding_window_view(X, window_shape=(m, n_features))[:, 0, ::tau, :].reshape(n_embedded, n_features * m)

    return embedded

In [None]:
from pyparsing import line


def show_embedding_3d(X_embedded: np.ndarray, title: str = ''):
    """
    Visualize 3D time-delay embedding.

    :param X_embedded: Embedded time series, shape (n_samples, 3).
    :param title: Title of the plot.
    """
    from mpl_toolkits.mplot3d import Axes3D

    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot(X_embedded[:, 0], X_embedded[:, 1], X_embedded[:, 2], marker='o', linestyle='None', markersize=2, alpha=0.7)
    ax.set_title(title)
    ax.set_xlabel('X(t)')
    ax.set_ylabel('X(t + τ)')
    ax.set_zlabel('X(t + 2τ)')
    plt.show()

show_embedding_3d(time_delay_embedding(ts[:, 0:1], m=3, tau=1), title='Time-Delay Embedding of X')
show_embedding_3d(time_delay_embedding(ts[:, 1:2], m=3, tau=1), title='Time-Delay Embedding of Y')

# Convergent cross-mapping

Map neighborhood of manifold $Y$ (consequence) to indices, look for corresponding values in $x$.
Compare the average of these values with the actual $x$.

* naive neighbor lookup uses all pairwise distances, $O(n^2)$ complexty
* in lower dimensions KDTree algorithm is much more efficient

In [None]:
help(cKDTree)

In [None]:
def cross_map(source: np.array, target: np.array, k: int):
    """

    Args:
        source (np.array): n_samples x d_embed
        target (np.array): n_samples
        k (int): number of neighbors to consider

    """
    tree = cKDTree(source)
    # we shall exclude self-matches of points
    distances, indices = tree.query(source, k+1)
    # search corresponding x values: (n_samples, k)
    estimates = target[indices[:, 1:]]
    # weights, summing to 1 in every row
    weights = np.exp(-distances[:, 1:] / distances[:, 1:2])
    weights /= weights.sum(axis=1, keepdims=True)
    # estimates
    weighted_estimates = (weights * estimates).sum(axis=1)
    return weighted_estimates

def cross_map_correlation(source: np.array, target: np.array, k: int):
    correlations = []
    lengths = np.logspace(1, np.log10(len(source)), num=10, dtype=int).astype(int)
    for n in lengths:
        estimates = cross_map(source[:n], target[:n], k)
        corr = np.corrcoef(estimates[:n].flat, target[:n].flat)[0, 1]
        correlations.append(corr)
    return lengths, correlations

def cross_map_correlation_plot(source: np.array, target: np.array, k: int, title: str = ''):
    lengths, correlations = cross_map_correlation(source, target, k)
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.plot(lengths, correlations, marker='o')
    ax.set_xscale('log')
    ax.set_xlabel('Library Size')
    ax.set_ylabel('Cross Map Correlation')
    ax.set_title(title)
    ax.grid()
    plt.show()

TODO:

* discuss why target is indexed [:, 0], multiple reasons!

In [None]:
x = time_delay_embedding(ts[:, 0:1], m=3, tau=1)
y = time_delay_embedding(ts[:, 1:2], m=3, tau=1)

print(cross_map_correlation(x, y[:, 0], k=6))
print(cross_map_correlation(y, x[:, 0], k=6))

cross_map_correlation_plot(x, y[:, 0], k=6, title = 'Y cross-mapped from X (does Y->X?)')
cross_map_correlation_plot(y, x[:, 0], k=6, title = 'X cross-mapped from Y (does X->Y?)')