In [1]:
DATA_CSV = "../results/basic_analysis.csv"

GENERATE_DATA = False
# Parameters for generating data
MODEL_DIRECTORY = "../models/random_nk3"
UPDATE = "asynchronous"
N_RANDOM = 30 # number of random models
DEBUG = False

In [2]:
import os
import csv

import pandas as pd
import numpy as np
from pyboolnet.external.bnet2primes import bnet_text2primes
from pyboolnet.prime_implicants import percolate
from pyboolnet.file_exchange import primes2bnet
from pyboolnet.trap_spaces import compute_trap_spaces
from pyboolnet.state_transition_graphs import primes2stg

from sdmarkov.grouping import sd_grouping, null_grouping, random_grouping
from sdmarkov.scc_dags import get_scc_dag, get_attractor_states
from sdmarkov.transition_matrix import get_transition_matrix
from sdmarkov.matrix_operations import compress_matrix
from sdmarkov.graph import get_markov_chain

from sdmarkov.analysis.utils import load_or_generate_data, add_classification_metrics, network_level_metrics_df


## Data generation or loading

In [3]:
def get_general_info(bnet, bnet_name=None, num_runs=100, update="asynchronous", DEBUG=False):
    """
    Computes general network info and grouping/Markov chain statistics.

    Returns a long-format DataFrame:
        bnet | method | run | group_count | group_size_mean | group_size_std | mc_edges_count
    """

    # --- initialize primes ---
    primes = bnet_text2primes(bnet)
    primes = {k: primes[k] for k in sorted(primes)}
    N_nodes = len(primes)
    sources = sum(1 for node in primes if primes[node] == [[{node: 0}], [{node: 1}]])

    # --- percolation ---
    percolated_primes = percolate(primes, remove_constants=True, copy=True)
    N_perc = len(percolated_primes)
    if N_perc == 0:
        return pd.DataFrame()
    sources_perc = sum(1 for node in percolated_primes if percolated_primes[node] == [[{node:0}], [{node:1}]])
    percolated_bnet = primes2bnet(percolated_primes)

    # --- trap spaces and attractors ---
    min_trap_count = len(compute_trap_spaces(percolated_primes, type_="min"))
    stg = primes2stg(percolated_primes, update)
    attractor_count = len(get_attractor_states(get_scc_dag(stg), as_indices=True, DEBUG=DEBUG))

    # --- full transition matrix ---
    T = get_transition_matrix(stg, DEBUG=DEBUG)

    rows = []

    # --- internal helper to compute metrics and append a row ---
    def _append_row(method_name, indices, run=0):
        valid_groups = [g for g in indices if g]
        group_sizes = [len(g) for g in valid_groups] if valid_groups else [0]
        T_group = compress_matrix(T, indices, DEBUG=DEBUG) if T is not None else None
        mc_edges = get_markov_chain(T_group, indices, DEBUG=DEBUG).number_of_edges() if T_group is not None else 0

        row = {
            "bnet": bnet_name,
            "method": method_name,
            "run": run,
            "group_count": len(valid_groups),
            "group_size_mean": np.mean(group_sizes),
            "group_size_std": np.std(group_sizes),
            "mc_edges_count": mc_edges,
            "N": N_nodes,
            "sources": sources,
            "N_perc": N_perc,
            "sources_perc": sources_perc,
            "min_trap_count": min_trap_count,
            "attractor_count": attractor_count,
        }
        rows.append(row)

    # --- deterministic groupings ---
    sd_indices = sd_grouping(percolated_bnet, DEBUG=DEBUG)
    null_indices = null_grouping(percolated_bnet, DEBUG=DEBUG)

    _append_row("sd_mc", sd_indices)
    _append_row("null_mc", null_indices)

    # --- random groupings ---
    for i in range(num_runs):
        random_indices = random_grouping(sd_indices, null_indices, seed=i, DEBUG=DEBUG)
        _append_row("random_mc", random_indices, run=i)

    return pd.DataFrame(rows)


In [4]:
df = load_or_generate_data(
    data_csv=DATA_CSV,
    generate_data=GENERATE_DATA,
    data_function=get_general_info,
    model_directory=MODEL_DIRECTORY,
    update=UPDATE,
    n_random=N_RANDOM,
    debug=DEBUG,
)

Loaded cached results from ../results/basic_analysis.csv.


In [5]:
print("Preview of results DataFrame:")
display(df.head())

print("\nDataFrame shape:", df.shape)
print("Methods present:", df.method.unique())

Preview of results DataFrame:


Unnamed: 0,bnet,method,run,group_count,group_size_mean,group_size_std,mc_edges_count,N,sources,N_perc,sources_perc,min_trap_count,attractor_count,update_scheme
0,n010_000.bnet,sd_mc,0,16,64.0,32.0,46,10,0,10,0,2,2,asynchronous
1,n010_000.bnet,null_mc,0,3,341.333333,437.463395,5,10,0,10,0,2,2,asynchronous
2,n010_000.bnet,random_mc,0,16,64.0,13.720423,226,10,0,10,0,2,2,asynchronous
3,n010_000.bnet,random_mc,1,16,64.0,13.430376,226,10,0,10,0,2,2,asynchronous
4,n010_000.bnet,random_mc,2,16,64.0,13.448978,226,10,0,10,0,2,2,asynchronous



DataFrame shape: (2912, 14)
Methods present: ['sd_mc' 'null_mc' 'random_mc']


## Data analysis

In [12]:
df_metrics = network_level_metrics_df(df=df)

# Compute maximum possible number of edges in the Markov chain
# Formula: (non_attractors)^2 + non_attractors * attractors + attractors^2
non_attractors = df_metrics["group_count"] - df_metrics["attractor_count"]
attractors = df_metrics["attractor_count"]

df_metrics["max_edges"] = non_attractors**2 + non_attractors * attractors + attractors**2

df_metrics["normalized_density"] = df_metrics["mc_edges_count"] / df_metrics["max_edges"]

print("Preview of network-level metrics DataFrame:")
display(df_metrics.head())

print("\nDataFrame shape:", df_metrics.shape)
print("Methods present:", df_metrics.method.unique())

with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=all_keys)
    writer.writeheader()
    writer.writerows(rows)

display(summary)

Raw CSV saved to: ../results/basic_analysis.csv
