## Experiments: Ergodicity detection algorithms: Scaling of ergodicity in random symbolic dynamics 

    M. Süzen   
    (c) 2025

## Versions check

In [None]:
import statsmodels
import pandas
import numpy
import matplotlib
import sys

# (sys.version_info(major=3, minor=11, micro=6, releaselevel='final', serial=0),
#  '0.14.5',
#  '2.3.1',
#  '2.3.1',
#  '3.10.3')
sys.version_info, statsmodels.__version__, pandas.__version__, numpy.__version__, matplotlib.__version__

## Simulation Methods

In [None]:
import statsmodels.api as sm
from collections import namedtuple
import secrets
import numpy as np
import pandas as pd
from numpy.random import Generator, PCG64
from typing import Callable
from itertools import product
import matplotlib.pyplot as plt


# Storage for a simulation
simStorage = namedtuple(
    "simStorage", ["sparsity", "alphabet_size", "event_size", "ergo_measure"]
)


# Measured
measureErgo = namedtuple(
    "measureErgo",
    ["ergo_measure_mean", "ergo_measure_se", "alpha", "alpha_se", "C", "C_se"],
)


def observe_sum(pattern: np.array) -> float:
    return float(np.sum(pattern))


def events_ensemble(alphabet_size: int, event_size: int) -> np.ndarray:
    """

    Generate ensemble of deterministic patterns of given alphabet size.
    Combination with replacement and order not matter.

    Parameters
    ----------
    alphabet_size : int
        Number of symbols to use
    event_size : int
        Number of times symbols can appear in a single event.

    Returns
    -------
    Ensemble of events: nd.array
        Ensemble members in an array

    Example:
        >>> events_ensemble(2, 2)
        array([[0, 0], [0, 1], [1, 0], [1, 1]])

    """
    cartesian_events = [list(range(alphabet_size))] * event_size
    ensemble_of_events = np.array([list(e) for e in product(*cartesian_events)])
    return ensemble_of_events


def compute_observable(
    patterns: np.array, observe_method: Callable[np.array, float]
) -> np.array:
    return np.array([observe_method(pattern) for pattern in patterns])


def log_ols_coefficients(x, y):
    """OLS coefficients for log-data"""
    X = np.log10(x)
    Y = np.log10(y)
    Xc = sm.add_constant(X)
    result = sm.OLS(Y, Xc).fit()
    C, alpha = result.params
    return C, alpha


def get_a_good_seed():
    return secrets.randbits(32)


def measure_ergodicity_scaling(
    ensemble_set,
    time_repeat: int = 40,
    experiment_repeat: int = 70,
    time_length: int = int(120e3),
) -> measureErgo:
    """
    Approach to Ergodicity ; observed sum, mean absolute difference measure

    """
    coefs = []
    alphas = []
    ergos = []
    for _ in range(experiment_repeat):
        rng = Generator(PCG64(get_a_good_seed()))
        scon = np.arange(1, time_length + 1)
        X1 = np.log10(scon)
        ensemble_avg = np.mean(compute_observable(ensemble_set, observe_sum))
        time_sims = rng.choice(ensemble_set, (time_repeat, time_length)).sum(axis=2)
        time_sims = time_sims.cumsum(axis=1)
        ergo = np.abs(time_sims / scon - ensemble_avg).mean(axis=0)
        ergos.append(ergo)
        Y1 = np.log10(ergo)
        Xc1 = sm.add_constant(X1)
        result = sm.OLS(Y1, Xc1).fit()
        C1, alpha1 = result.params
        coefs.append(float(C1))
        alphas.append(float(alpha1))
    se_denom = np.sqrt(experiment_repeat)
    ergo_measure_mean = np.array(ergos).mean(axis=0)
    ergo_measure_se = np.array(ergos).std(axis=0) / se_denom
    C = np.mean(coefs)
    C_se = np.std(coefs) / se_denom
    alpha = np.mean(alphas)
    alpha_se = np.std(alphas) / se_denom
    ergom = measureErgo(
        ergo_measure_mean=ergo_measure_mean,
        ergo_measure_se=ergo_measure_se,
        alpha=alpha,
        alpha_se=alpha_se,
        C=C,
        C_se=C_se,
    )
    return ergom

In [None]:
def measure_ergodicity_scaling_sparse(
    alphabet_size: int,
    event_size: int,
    sparsity: float,
    time_repeat: int = 40,
    experiment_repeat: int = 70,
    sparsity_repeat: int = 40,
    time_length: int = int(120e3),
):
    ensemble_set = events_ensemble(alphabet_size=alphabet_size, event_size=event_size)
    npatterns = int(sparsity * alphabet_size**event_size)
    coefs = []
    alphas = []
    ergos = []
    for _ in range(sparsity_repeat):
        for i in range(1000):
            rng = Generator(PCG64(get_a_good_seed()))
            number_of_events = ensemble_set.shape[0]
            ensemble_of_events_index = rng.choice(number_of_events, size=int(npatterns))
            subensemble = ensemble_set[ensemble_of_events_index]
            ensemble_avg_sub = np.mean(compute_observable(subensemble, observe_sum))
            ensemble_avg = np.mean(compute_observable(ensemble_set, observe_sum))
            if np.abs(ensemble_avg_sub - ensemble_avg) < 1e-8:
                # print(f"Find after {i} iteration an identical energy sub-manifold {number_of_events}/{subensemble.shape[0]}")
                break
        ergo_sparse = measure_ergodicity_scaling(
            subensemble,
            time_repeat=time_repeat,
            experiment_repeat=experiment_repeat,
            time_length=time_length,
        )
        coefs.append(ergo_sparse.C)
        alphas.append(ergo_sparse.alpha)
        ergos.append(ergo_sparse.ergo_measure_mean)
    print(f"subensemble {subensemble.shape[0]} ensemble_set {ensemble_set.shape[0]}")
    se_denom = np.sqrt(sparsity_repeat)
    ergo_measure_mean = np.array(ergos).mean(axis=0)
    ergo_measure_se = np.array(ergos).std(axis=0) / se_denom
    C = np.mean(coefs)
    C_se = np.std(coefs) / se_denom
    alpha = np.mean(alphas)
    alpha_se = np.std(alphas) / se_denom
    ergom = measureErgo(
        ergo_measure_mean=ergo_measure_mean,
        ergo_measure_se=ergo_measure_se,
        alpha=alpha,
        alpha_se=alpha_se,
        C=C,
        C_se=C_se,
    )
    return ergom

## Case 1: Symbolic space 

This entails space formed with `alphabet_size*10 + event_size`
Set of 10 case `[65, 58, 55, 52, 48, 43, 34, 28, 25, 22]`.  

In [None]:
alphabet_sizes = [6, 5, 5, 5, 4, 4, 3, 2, 2, 2]
event_sizes = [5, 8, 5, 2, 8, 3, 3, 8, 5, 2]

sim_data = []
for alpha, event_size in zip(alphabet_sizes, event_sizes):
    print(f"Sim {alpha}-{event_size}")
    ensemble_set = events_ensemble(alpha, event_size)
    ergo_measure = measure_ergodicity_scaling(ensemble_set)
    sim = simStorage(
        sparsity=1.0,
        alphabet_size=alpha,
        event_size=event_size,
        ergo_measure=ergo_measure,
    )
    sim_data.append(sim)

In [None]:
styles = iter(
    [
        "-",
        "--",
        "-.",
        ":",
        ".",
        ">",
        "<",
        "*",
        "x",
        "o",
    ]
)
font = {"family": "monospace", "weight": "bold", "size": 16}
plt.rc("font", **font)
alphabet_sizes = []
event_sizes = []
C = []
Cse = []
alpha = []
alpha_se = []
for sim in sim_data:
    _label = str(sim.alphabet_size) + "-" + str(sim.event_size)
    plt.plot(
        sim.ergo_measure.ergo_measure_mean[::1500],
        next(styles),
        markersize=6,
        label=_label,
    )
    plt.yscale("log")
    alphabet_sizes.append(sim.alphabet_size)
    event_sizes.append(sim.event_size)
    C.append(sim.ergo_measure.C)
    Cse.append(sim.ergo_measure.C_se)
    alpha.append(sim.ergo_measure.alpha)
    alpha_se.append(sim.ergo_measure.alpha_se)
plt.legend()
plt.xlabel(f"Time Step (x1500)")
plt.ylabel("Approach to Ergodicity")
plt.title(f"Scaling of full visits ")
plt.xlim([0.0, 49.0])
plt.savefig(f"full_visits.eps", format="eps", bbox_inches="tight")
plt.close()

df = pd.DataFrame(
    {
        "alphabet_sizes": alphabet_sizes,
        "event_sizes": event_sizes,
        "C": C,
        "Cse": Cse,
        "alpha": alpha,
        "alpha_se": alpha_se,
    }
)
df.to_latex("full_visits.latex", index=False, float_format="%.4f")

## Case 2: Demonstration of sparse visits with 10-2 baseline 

* Event-size fixed at 2  alphabet sizes: 2 to 10 : Ergodicity Omega vs. time
* Table of Coeffs. 

In [None]:
event_size = 2
base_alphabet_size = 10
ensemble_size = base_alphabet_size**event_size
alphabet_sizes = np.arange(2, base_alphabet_size + 1)
sim_data = []
for alpha in alphabet_sizes:
    print(f"Sim {alpha}-{event_size}")
    sparse_space = alpha**event_size
    sparsity = sparse_space / ensemble_size
    ensemble_set = events_ensemble(alpha, event_size)
    ergo_measure = measure_ergodicity_scaling(ensemble_set)
    sim = simStorage(
        sparsity=sparsity,
        alphabet_size=alpha,
        event_size=event_size,
        ergo_measure=ergo_measure,
    )
    sim_data.append(sim)

In [None]:
styles = iter(
    [
        "-",
        "--",
        "-.",
        ":",
        ".",
        ">",
        "<",
        "*",
        "x",
        "o",
    ]
)
font = {"family": "monospace", "weight": "bold", "size": 16}
plt.rc("font", **font)
alphabet_sizes = []
event_sizes = []
sparsity = []
C = []
Cse = []
alpha = []
alpha_se = []
for sim in sim_data:
    _label = (
        str(sim.alphabet_size)
        + "-"
        + str(sim.event_size)
        + ": {0:.2f}".format(sim.sparsity)
    )
    plt.plot(
        sim.ergo_measure.ergo_measure_mean[::1500],
        next(styles),
        markersize=6,
        label=_label,
    )
    plt.yscale("log")
    alphabet_sizes.append(sim.alphabet_size)
    event_sizes.append(sim.event_size)
    C.append(sim.ergo_measure.C)
    Cse.append(sim.ergo_measure.C_se)
    alpha.append(sim.ergo_measure.alpha)
    alpha_se.append(sim.ergo_measure.alpha_se)
    sparsity.append(sim.sparsity)
plt.legend()
plt.xlabel(f"Time Step (x1500)")
plt.ylabel("Approach to Ergodicity")
plt.title(f"Demonstration of sparse \n visits with 10-2 baseline ")
plt.xlim([0.0, 65.0])
plt.savefig(f"sparse_visit_10_2_baseline.eps", format="eps", bbox_inches="tight")
plt.close()

df = pd.DataFrame(
    {
        "alphabet_sizes": alphabet_sizes,
        "event_sizes": event_sizes,
        "sparsity": sparsity,
        "C": C,
        "Cse": Cse,
        "alpha": alpha,
        "alpha_se": alpha_se,
    }
)
df.to_latex("sparse_visit_10_2_baseline.latex", index=False, float_format="%.4f")

## Case 3: Demonstration of sparse visits with 18-3 baseline

* Event-size fixed at 3  alphabet sizes: 3 to 18 : Ergodicity Omega vs. time
* Table of Coeffs.

In [None]:
event_size = 3
base_alphabet_size = 18
ensemble_size = base_alphabet_size**event_size
alphabet_sizes = np.arange(7, 19)
sim_data = []
for alpha in alphabet_sizes:
    print(f"Sim {alpha}-{event_size}")
    sparse_space = alpha**event_size
    sparsity = sparse_space / ensemble_size
    ensemble_set = events_ensemble(alpha, event_size)
    ergo_measure = measure_ergodicity_scaling(ensemble_set)
    sim = simStorage(
        sparsity=sparsity,
        alphabet_size=alpha,
        event_size=event_size,
        ergo_measure=ergo_measure,
    )
    sim_data.append(sim)

In [None]:
styles = iter(["-", "--", "-.", ":", ".", ">", "<", "*", "x", "o", "o-", "x-"])
font = {"family": "monospace", "weight": "bold", "size": 14}
plt.rc("font", **font)
alphabet_sizes = []
event_sizes = []
sparsity = []
C = []
Cse = []
alpha = []
alpha_se = []
for sim in sim_data:
    _label = (
        str(sim.alphabet_size)
        + "-"
        + str(sim.event_size)
        + ": {0:.2f}".format(sim.sparsity)
    )
    plt.plot(
        sim.ergo_measure.ergo_measure_mean[::1500],
        next(styles),
        markersize=6,
        label=_label,
    )
    plt.yscale("log")
    plt.xlim([0.0, 60.0])
    alphabet_sizes.append(sim.alphabet_size)
    event_sizes.append(sim.event_size)
    sparsity.append(sim.sparsity)
    C.append(sim.ergo_measure.C)
    Cse.append(sim.ergo_measure.C_se)
    alpha.append(sim.ergo_measure.alpha)
    alpha_se.append(sim.ergo_measure.alpha_se)
plt.legend()
plt.xlabel(f"Time Step (x1500)")
plt.ylabel("Approach to Ergodicity")
plt.title(f"Demonstration of sparse \n visits with 18-3 baseline ")
plt.xlim([0.0, 80.0])
plt.savefig(f"sparse_visit_18_3_baseline.eps", format="eps", bbox_inches="tight")
plt.close()

df = pd.DataFrame(
    {
        "alphabet_sizes": alphabet_sizes,
        "event_sizes": event_sizes,
        "sparsity": sparsity,
        "C": C,
        "Cse": Cse,
        "alpha": alpha,
        "alpha_se": alpha_se,
    }
)
df.to_latex("sparse_visit_18_3_baseline.latex", index=False, float_format="%.4f")

## Visualise Ensemble Sets

Example visualisation of ensemble sets.

In [None]:
font = {"family": "monospace", "weight": "bold", "size": 38}
plt.rc("font", **font)
ensemble_set = events_ensemble(alphabet_size=2, event_size=3)
plt.matshow(ensemble_set.T, cmap="Greys")
plt.gca().axes.get_xaxis().set_visible(False)
plt.gca().axes.get_yaxis().set_visible(False)
plt.title("Ensemble Set 2,3")
plt.savefig(f"ensemble_set_2_3.eps", format="eps", bbox_inches="tight")
plt.close()


In [None]:
ensemble_set = events_ensemble(alphabet_size=3, event_size=2)
font = {"family": "monospace", "weight": "bold", "size": 38}
plt.rc("font", **font)
plt.matshow(ensemble_set.T, cmap='Greys')
plt.gca().axes.get_xaxis().set_visible(False)
plt.gca().axes.get_yaxis().set_visible(False)
plt.title("Ensemble Set 3,2")
plt.savefig(f"ensemble_set_3_2.eps", format="eps", bbox_inches="tight")
plt.close()