In [5]:
import pandas as pd
import os
import re

In [2]:
def analyze_bnet_file(filepath):
    """
    Analyze a .bnet file and return key statistics.
    
    Returns:
        tuple: (number_of_nodes, total_number_of_regulators)
    """
    nodes = set()
    total_regulators = 0
    
    with open(filepath, 'r') as f:
        for line in f:
            line = line.strip()
            # Skip comments and empty lines
            if line.startswith('//') or not line:
                continue
            
            # Split by comma to get target and regulation rule
            if ',' in line:
                target, rule = line.split(',', 1)
                target = target.strip()
                rule = rule.strip()
                
                # Add target node
                nodes.add(target)
                
                # Count unique regulators in this rule
                # Extract all variable names (alphanumeric sequences)
                import re
                regulators = re.findall(r'\b[A-Za-z][A-Za-z0-9]*\b', rule)
                # Remove the target itself if it appears in the rule
                regulators = [reg for reg in regulators if reg != target]
                total_regulators += len(set(regulators))
    
    return len(nodes), total_regulators

In [6]:
from collections import defaultdict

def parse_bnet_file(filepath):
    """
    Parse a .bnet file and extract nodes with their regulators.
    
    Returns:
        dict: {node: set_of_regulators}
    """
    node_regulators = {}
    
    with open(filepath, 'r') as f:
        for line in f:
            line = line.strip()
            # Skip comments and empty lines
            if line.startswith('//') or not line:
                continue
            
            if ',' in line:
                target, rule = line.split(',', 1)
                target = target.strip()
                rule = rule.strip()
                
                # Extract all variable names from the Boolean rule
                regulators = re.findall(r'\b[A-Za-z][A-Za-z0-9_]*\b', rule)
                # Remove boolean operators and the target itself
                boolean_ops = {'AND', 'OR', 'NOT', 'and', 'or', 'not'}
                regulators = [reg for reg in regulators if reg not in boolean_ops and reg != target]
                
                node_regulators[target] = set(regulators)
    
    return node_regulators

def calculate_possible_models(node_regulators):
    """
    Calculate the theoretical number of possible Boolean network models.
    
    For each node with k regulators, there are 2^(2^k) possible Boolean functions.
    Total possibilities = product of all individual node possibilities.
    
    Args:
        node_regulators (dict): {node: set_of_regulators}
    
    Returns:
        tuple: (total_log2_possibilities, breakdown_by_node)
    """
    breakdown = {}
    total_log2 = 0
    
    for node, regulators in node_regulators.items():
        k = len(regulators)
        # Number of possible Boolean functions for k inputs: 2^(2^k)
        # We'll work in log2 space to avoid overflow
        log2_possibilities = 2**k
        total_log2 += log2_possibilities
        
        breakdown[node] = {
            'num_regulators': k,
            'log2_possibilities': log2_possibilities,
            'actual_possibilities': f"2^{2**k}" if k <= 4 else f"2^{2**k} (extremely large)"
        }
    
    return total_log2, breakdown

def print_diversity_analysis(filepath):
    """
    Print a comprehensive analysis of Boolean network diversity.
    """
    print(f"Analyzing Boolean Network Diversity for: {filepath}")
    print("=" * 60)
    
    node_regulators = parse_bnet_file(filepath)
    total_log2, breakdown = calculate_possible_models(node_regulators)
    
    print(f"\nTotal nodes: {len(node_regulators)}")
    print(f"\nNode-by-node analysis:")
    print("-" * 40)
    
    for node, info in breakdown.items():
        k = info['num_regulators']
        print(f"{node}: {k} regulators → 2^{2**k} possible functions")
    
    print(f"\nTotal theoretical diversity:")
    print(f"Log2 of total possibilities: {total_log2}")
    print(f"Actual number: 2^{total_log2}")
    
    # Practical interpretation
    print(f"\nPractical interpretation:")
    if total_log2 > 100:
        print(f"This is an astronomically large number (2^{total_log2})")
        print("Even with infinite computational power, exploring all possibilities is impossible.")
    elif total_log2 > 50:
        print(f"This is an extremely large number (2^{total_log2})")
        print("Sampling approaches like Bonesis are essential for exploration.")
    else:
        print(f"This is a large but potentially manageable number (2^{total_log2})")
    
    # Compare with your Bonesis results
    print(f"\nBonesis sampling context:")
    print("Your notebook generated 1000 solutions from this vast space.")
    print(f"This represents a tiny fraction: 1000 / 2^{total_log2}")
    
    return total_log2, breakdown

In [7]:
 # Analyze the MAPK model
mapk_path = "/home/spankaew/Git/astrologics/models/kadelka_models_repo/fMRI - Regulation of the Lac Operon_25790483.bnet"

print("MAPK Cancer Cell Fate Network Analysis:")
total_log2, breakdown = print_diversity_analysis(mapk_path)

MAPK Cancer Cell Fate Network Analysis:
Analyzing Boolean Network Diversity for: /home/spankaew/Git/astrologics/models/kadelka_models_repo/fMRI - Regulation of the Lac Operon_25790483.bnet

Total nodes: 8

Node-by-node analysis:
----------------------------------------
lac_mRNA: 1 regulators → 2^2 possible functions
lactose_breakdown: 2 regulators → 2^4 possible functions
lac_repressor: 1 regulators → 2^2 possible functions
allolactose: 1 regulators → 2^2 possible functions
CAP: 1 regulators → 2^2 possible functions
lac_operon: 2 regulators → 2^4 possible functions
cAMP: 2 regulators → 2^4 possible functions
lac_enzymes: 1 regulators → 2^2 possible functions

Total theoretical diversity:
Log2 of total possibilities: 22
Actual number: 2^22

Practical interpretation:
This is a large but potentially manageable number (2^22)

Bonesis sampling context:
Your notebook generated 1000 solutions from this vast space.
This represents a tiny fraction: 1000 / 2^22


In [4]:
file_path = '/home/spankaew/Git/astrologics/models'
file_names = [f for f in os.listdir(file_path) if f.endswith('.bnet')]
for file_name in file_names:
    full_path = os.path.join(file_path, file_name)
    num_nodes, num_regulators = analyze_bnet_file(full_path)
    # Collect results in a list of dicts
    results = []
    for file_name in file_names:
        full_path = os.path.join(file_path, file_name)
        num_nodes, num_regulators = analyze_bnet_file(full_path)
        results.append({
            'File': file_name.replace('.bnet', ''),
            'Nodes': num_nodes,
            'Regulators': num_regulators
        })
df_results = pd.DataFrame(results)
df_results.sort_values(by='Nodes', ascending=True)

Unnamed: 0,File,Nodes,Regulators
1,herault_hematopoiesis,15,33
2,toy_reprogramming,16,17
4,synthetic_random_diff,30,22
0,Invasion_Master_Model,32,156
3,calzone_cellfate,32,65


In [5]:
file_path = '/home/spankaew/Git/astrologics/dev/trap-spaces-as-siphons/models'
file_names = [f for f in os.listdir(file_path) if f.endswith('.bnet')]
for file_name in file_names:
    full_path = os.path.join(file_path, file_name)
    num_nodes, num_regulators = analyze_bnet_file(full_path)
    # Collect results in a list of dicts
    results = []
    for file_name in file_names:
        full_path = os.path.join(file_path, file_name)
        num_nodes, num_regulators = analyze_bnet_file(full_path)
        results.append({
            'File': file_name.replace('.bnet', ''),
            'Nodes': num_nodes,
            'Regulators': num_regulators
        })
df_results = pd.DataFrame(results)
df_results

Unnamed: 0,File,Nodes,Regulators
0,HIV-1,140,12
1,SN5,2747,9975
2,FIBROBLASTS,141,12
3,IMMUNE-SYSTEM,165,101
4,INTERFERON-1,123,201
5,Kynurenine_pathway_stable,152,316
6,ER-STRESS,184,21
7,IL_6_Signalling,88,12
8,turei_2016,4692,15513
9,TCR-TLR5-SIGNALING-2018,131,269


In [None]:
df_results.sort_values(by='Nodes', ascending=True)

Unnamed: 0,File,Nodes,Regulators
16,InflammatoryBowelDisease,48,239
17,TLGLSurvival,63,12
23,SIGNALING-PATHWAY-FOR-BUTANOL-PRODUCTION,68,12
11,Colitis_associated_colon_cancer,72,12
20,Executable_file_for_CaSQ_derived_mast_cell_act...,75,112
7,IL_6_Signalling,88,12
21,Corral_ThIL17diff_15jan2021,94,165
29,korkut_2015a,100,423
24,Regan2020_Adhesion_CIP_Migration_CellCycle_Apo...,122,370
4,INTERFERON-1,123,201


In [9]:
file_path = '/home/spankaew/Git/astrologics/dev/GRN_dataset/models/converted_bnet'
file_names = [f for f in os.listdir(file_path) if f.endswith('.bnet')]
for file_name in file_names:
    full_path = os.path.join(file_path, file_name)
    num_nodes, num_regulators = analyze_bnet_file(full_path)
    # Collect results in a list of dicts
    results = []
    for file_name in file_names:
        full_path = os.path.join(file_path, file_name)
        num_nodes, num_regulators = analyze_bnet_file(full_path)
        results.append({
            'File': file_name.replace('.bnet', ''),
            'Nodes': num_nodes,
            'Regulators': num_regulators
        })
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by='Nodes', ascending=True)

In [11]:
df_results.to_csv('kadelka_network_properties.csv', index=False)