<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/LBN_Sparse_BIC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.26-py3-none-any.whl.metadata (9.1 kB)
Downloading pgmpy-0.1.26-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pgmpy
Successfully installed pgmpy-0.1.26


In [2]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.sampling import BayesianModelSampling
from sklearn.preprocessing import LabelEncoder
from pgmpy.estimators import HillClimbSearch, BicScore, MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.metrics import accuracy_score
from scipy.stats import entropy
import os
from tabulate import tabulate
from sklearn.model_selection import train_test_split
from pgmpy.estimators import AICScore

# Bayesian Network Data Generation 1000, 2000, ..., 10000 Samples (sparse)

In [42]:
# Function to safely normalize arrays to avoid NaN values
def safe_normalize(arr, axis=0):
    with np.errstate(divide='ignore', invalid='ignore'):
        norm_arr = arr / arr.sum(axis=axis, keepdims=True)
        norm_arr = np.nan_to_num(norm_arr)  # Replace NaNs with 0s
    return norm_arr

# Function to replace all-zero slices with uniform distribution
def replace_zeros_with_uniform(arr, axis=0):
    sum_along_axis = arr.sum(axis=axis, keepdims=True)
    mask = (sum_along_axis == 0)  # Mask for all-zero slices

    # Create a uniform distribution where the sum is zero
    uniform_distribution = np.ones_like(arr) / arr.shape[axis]

    # Where the mask is True, replace with the uniform distribution
    arr = np.where(mask, uniform_distribution, arr)

    # Normalize the resulting array to ensure it's a valid probability distribution
    arr = safe_normalize(arr, axis=axis)
    return arr

# Function to generate sparse CPDs
def generate_sparse_cpds():
    # Generate random probabilities for IR
    ir_probs = np.random.rand(3)
    ir_probs /= ir_probs.sum()  # Normalize to make it a valid probability distribution

    # Generate sparse probabilities for EI given IR (some probabilities set to zero)
    ei_given_ir_probs = np.random.rand(3, 3)
    ei_given_ir_probs[np.random.rand(3, 3) < 0.5] = 0  # Introduce sparsity by setting 50% of values to 0
    ei_given_ir_probs = replace_zeros_with_uniform(ei_given_ir_probs, axis=0)

    # Generate sparse probabilities for SP given IR and EI (some probabilities set to zero)
    sp_probs = np.random.rand(3, 3, 3)
    sp_probs[np.random.rand(3, 3, 3) < 0.5] = 0  # Introduce sparsity by setting 50% of values to 0
    sp_probs = replace_zeros_with_uniform(sp_probs, axis=0)

    return ir_probs, ei_given_ir_probs, sp_probs

# Function to generate and save samples (Sparse Version)
def generate_and_save_sparse_samples(ir_probs, ei_probs, sp_probs, sample_size, filename):
    output_data = []

    # Generate `sample_size` random samples
    for _ in range(sample_size):
        # Sample `IR` state based on `IR` probabilities
        ir_state_idx = np.random.choice(3, p=ir_probs)
        ir_state = ['low', 'medium', 'high'][ir_state_idx]
        ir_prob = ir_probs[ir_state_idx]

        # Sample `EI` state based on `EI` probabilities given `IR`
        ei_probs_given_ir = ei_probs[:, ir_state_idx]
        ei_state_idx = np.random.choice(3, p=ei_probs_given_ir)
        ei_state = ['poor', 'average', 'good'][ei_state_idx]
        ei_prob = ei_probs_given_ir[ei_state_idx]

        # Sample `SP` state based on `SP` probabilities given `IR` and `EI`
        sp_probs_given_ir_ei = sp_probs[:, ir_state_idx, ei_state_idx]
        sp_state_idx = np.random.choice(3, p=sp_probs_given_ir_ei)
        sp_state = ['decrease', 'stable', 'increase'][sp_state_idx]
        sp_prob = sp_probs_given_ir_ei[sp_state_idx]

        # Append sample data to output list
        output_data.append({
            'IR_State': ir_state,
            'IR_Prob': f'{ir_prob:.4f}',
            'EI_State': ei_state,
            'EI_Prob': f'{ei_prob:.4f}',
            'SP_Probabilities (decrease, stable, increase)': ', '.join([f'{prob:.4f}' for prob in sp_probs_given_ir_ei]),
            'Chosen_SP_State': sp_state,
            'Chosen_SP_Probability': f'{sp_prob:.4f}'
        })

    # Create a DataFrame from the output data
    output_df = pd.DataFrame(output_data)

    # Save the output DataFrame to a CSV file
    output_df.to_csv(filename, index=False)

    # Print the first few rows for visual confirmation
    print(f"\nSample size: {sample_size} - First few rows of generated sparse samples:\n")
    print(tabulate(output_df.head(), headers='keys', tablefmt='grid'))

# Generate and save sparse samples for sample sizes from 1000 to 10000 every 1000
sample_sizes = range(1000, 11000, 1000)

for size in sample_sizes:
    # Generate the sparse CPDs
    ir_probs, ei_given_ir_probs, sp_probs = generate_sparse_cpds()

    # Generate and save individual sparse samples for the given sample size
    generate_and_save_sparse_samples(ir_probs, ei_given_ir_probs, sp_probs, size, f'combined_probabilities_sparse_{size}.csv')

# Notify the user that the process is done
print("\nGeneration and saving of individual sparse samples complete for all sample sizes!")


Sample size: 1000 - First few rows of generated sparse samples:

+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|    | IR_State   |   IR_Prob | EI_State   |   EI_Prob | SP_Probabilities (decrease, stable, increase)   | Chosen_SP_State   |   Chosen_SP_Probability |
|  0 | high       |    0.3354 | poor       |    1      | 0.3908, 0.1945, 0.4147                          | increase          |                  0.4147 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  1 | low        |    0.245  | good       |    0.569  | 0.3492, 0.6508, 0.0000                          | decrease          |                  0.3492 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  2 | low      

# LBN Sparse BIC

In [43]:
# Sample sizes to loop through
sample_sizes = range(1000, 11000, 1000)

# Loop through each sample size
for sample_size in sample_sizes:
    print(f"\nProcessing sample size: {sample_size}")

    # Load the sparse dataset for the current sample size
    sparse_data_file = f'combined_probabilities_sparse_{sample_size}.csv'
    df_sparse = pd.read_csv(sparse_data_file)

    # Manually encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_sparse['IR_encoded'] = df_sparse['IR_State'].map(ir_map)
    df_sparse['EI_encoded'] = df_sparse['EI_State'].map(ei_map)
    df_sparse['SP_encoded'] = df_sparse['Chosen_SP_State'].map(sp_map)

    # Split the data into training, validation, and test sets
    X = df_sparse[['IR_encoded', 'EI_encoded']]
    y = df_sparse['SP_encoded']

    # Split into training (70%) and temp (30%) for validation and test
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)
    # Split temp into validation (50%) and test (50%)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, shuffle=True)

    # Concatenate X and y to form the training set for learning the BN structure
    df_train = pd.concat([X_train, y_train], axis=1)

    # Define the Hill-Climb structure learning algorithm
    hc = HillClimbSearch(df_train)
    scoring_method = BicScore(df_train)

    # Estimate the best structure
    best_dag = hc.estimate(scoring_method=scoring_method)
    best_model = BayesianNetwork(best_dag.edges())

    # Display the learned structure (edges of the Bayesian Network)
    print(f"\nLearned Structure (Edges) for {sample_size} samples:")
    print(best_model.edges())

    # Learn the CPDs using Maximum Likelihood Estimation (MLE)
    best_model.fit(df_train, estimator=MaximumLikelihoodEstimator)

    # Check if the model is valid after learning the parameters
    assert best_model.check_model()

    # Print the learned CPDs (Conditional Probability Distributions)
    for cpd in best_model.get_cpds():
        print("\nCPD of", cpd.variable)
        print(cpd)

print("\nProcessing complete for all sample sizes.")


Processing sample size: 1000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 1000 samples:
[('IR_encoded', 'EI_encoded'), ('IR_encoded', 'SP_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of IR_encoded
+---------------+----------+
| IR_encoded(0) | 0.241429 |
+---------------+----------+
| IR_encoded(1) | 0.414286 |
+---------------+----------+
| IR_encoded(2) | 0.344286 |
+---------------+----------+

CPD of EI_encoded
+---------------+---------------------+-----+---------------+---------------+
| IR_encoded    | IR_encoded(0)       | ... | IR_encoded(2) | IR_encoded(2) |
+---------------+---------------------+-----+---------------+---------------+
| SP_encoded    | SP_encoded(0)       | ... | SP_encoded(1) | SP_encoded(2) |
+---------------+---------------------+-----+---------------+---------------+
| EI_encoded(0) | 0.0                 | ... | 1.0           | 1.0           |
+---------------+---------------------+-----+---------------+---------------+
| EI_encoded(1) | 0.16216216216216217 | ... | 0.0           | 0.0           |

  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 2000 samples:
[('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]

CPD of IR_encoded
+---------------+------------+
| IR_encoded(0) | 0.00642857 |
+---------------+------------+
| IR_encoded(1) | 0.604286   |
+---------------+------------+
| IR_encoded(2) | 0.389286   |
+---------------+------------+

CPD of SP_encoded
+---------------+--------------------+-----+---------------+---------------------+
| EI_encoded    | EI_encoded(0)      | ... | EI_encoded(2) | EI_encoded(2)       |
+---------------+--------------------+-----+---------------+---------------------+
| IR_encoded    | IR_encoded(0)      | ... | IR_encoded(1) | IR_encoded(2)       |
+---------------+--------------------+-----+---------------+---------------------+
| SP_encoded(0) | 0.3333333333333333 | ... | 1.0           | 0.15018315018315018 |
+---------------+--------------------+-----+---------------+---------------------+
| SP_encoded(1) | 0.33333333

  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 3000 samples:
[('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]

CPD of IR_encoded
+---------------+---------------------+-------------------+---------------------+
| EI_encoded    | EI_encoded(0)       | EI_encoded(1)     | EI_encoded(2)       |
+---------------+---------------------+-------------------+---------------------+
| IR_encoded(0) | 0.23815620998719592 | 0.0               | 0.24128686327077747 |
+---------------+---------------------+-------------------+---------------------+
| IR_encoded(1) | 0.3854033290653009  | 0.506108202443281 | 0.3672922252010724  |
+---------------+---------------------+-------------------+---------------------+
| IR_encoded(2) | 0.3764404609475032  | 0.493891797556719 | 0.3914209115281501  |
+---------------+---------------------+-------------------+---------------------+

CPD of SP_encoded
+---------------+---------------+-----+---------------------+--------------------+
| EI_

  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 4000 samples:
[('IR_encoded', 'EI_encoded'), ('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded')]

CPD of IR_encoded
+---------------+------------+
| IR_encoded(0) | 0.2925     |
+---------------+------------+
| IR_encoded(1) | 0.698571   |
+---------------+------------+
| IR_encoded(2) | 0.00892857 |
+---------------+------------+

CPD of EI_encoded
+---------------+---------------+--------------------+---------------+
| IR_encoded    | IR_encoded(0) | IR_encoded(1)      | IR_encoded(2) |
+---------------+---------------+--------------------+---------------+
| EI_encoded(0) | 0.0           | 0.9729038854805726 | 0.0           |
+---------------+---------------+--------------------+---------------+
| EI_encoded(1) | 0.0           | 0.0270961145194274 | 1.0           |
+---------------+---------------+--------------------+---------------+
| EI_encoded(2) | 1.0           | 0.0                | 0.0           |
+---------------+---------------+--------

  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 5000 samples:
[('IR_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded'), ('EI_encoded', 'SP_encoded')]

CPD of IR_encoded
+---------------+---------------+--------------------+---------------+
| EI_encoded    | EI_encoded(0) | EI_encoded(1)      | EI_encoded(2) |
+---------------+---------------+--------------------+---------------+
| IR_encoded(0) | 1.0           | 0.1241307371349096 | 1.0           |
+---------------+---------------+--------------------+---------------+
| IR_encoded(1) | 0.0           | 0.4937413073713491 | 0.0           |
+---------------+---------------+--------------------+---------------+
| IR_encoded(2) | 0.0           | 0.3821279554937413 | 0.0           |
+---------------+---------------+--------------------+---------------+

CPD of SP_encoded
+---------------+---------------+-----+--------------------+--------------------+
| EI_encoded    | EI_encoded(0) | ... | EI_encoded(2)      | EI_encoded(2)      |
+---------------+------

  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 6000 samples:
[('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]

CPD of IR_encoded
+---------------+----------+
| IR_encoded(0) | 0.459762 |
+---------------+----------+
| IR_encoded(1) | 0.517857 |
+---------------+----------+
| IR_encoded(2) | 0.022381 |
+---------------+----------+

CPD of SP_encoded
+---------------+---------------------+-----+--------------------+--------------------+
| EI_encoded    | EI_encoded(0)       | ... | EI_encoded(2)      | EI_encoded(2)      |
+---------------+---------------------+-----+--------------------+--------------------+
| IR_encoded    | IR_encoded(0)       | ... | IR_encoded(1)      | IR_encoded(2)      |
+---------------+---------------------+-----+--------------------+--------------------+
| SP_encoded(0) | 0.4461538461538462  | ... | 0.2099056603773585 | 0.3723404255319149 |
+---------------+---------------------+-----+--------------------+--------------------+
| SP_en

  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 7000 samples:
[('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded'), ('SP_encoded', 'IR_encoded')]

CPD of EI_encoded
+---------------+----------+
| EI_encoded(0) | 0.645714 |
+---------------+----------+
| EI_encoded(1) | 0.174286 |
+---------------+----------+
| EI_encoded(2) | 0.18     |
+---------------+----------+

CPD of SP_encoded
+---------------+---------------------+-------------------+---------------------+
| EI_encoded    | EI_encoded(0)       | EI_encoded(1)     | EI_encoded(2)       |
+---------------+---------------------+-------------------+---------------------+
| SP_encoded(0) | 0.09766118836915297 | 0.734192037470726 | 0.9807256235827665  |
+---------------+---------------------+-------------------+---------------------+
| SP_encoded(1) | 0.5894437420986094  | 0.0               | 0.01927437641723356 |
+---------------+---------------------+-------------------+---------------------+
| SP_encoded(2) | 0.31289506953223767 | 0.2658079

  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 8000 samples:
[('EI_encoded', 'IR_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of EI_encoded
+---------------+---------------+---------------------+-----------------------+
| SP_encoded    | SP_encoded(0) | SP_encoded(1)       | SP_encoded(2)         |
+---------------+---------------+---------------------+-----------------------+
| EI_encoded(0) | 0.0           | 0.9624494511842865  | 0.0012531328320802004 |
+---------------+---------------+---------------------+-----------------------+
| EI_encoded(1) | 1.0           | 0.0                 | 0.0                   |
+---------------+---------------+---------------------+-----------------------+
| EI_encoded(2) | 0.0           | 0.03755054881571346 | 0.9987468671679198    |
+---------------+---------------+---------------------+-----------------------+

CPD of IR_encoded
+---------------+----------------------+---------------+---------------+
| EI_encoded    | EI_encoded(0)        | EI_encoded(1) | EI_enc

  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 9000 samples:
[('IR_encoded', 'EI_encoded'), ('SP_encoded', 'IR_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of IR_encoded
+---------------+---------------+---------------------+-------------------+
| SP_encoded    | SP_encoded(0) | SP_encoded(1)       | SP_encoded(2)     |
+---------------+---------------+---------------------+-------------------+
| IR_encoded(0) | 0.0           | 0.1549738219895288  | 0.721001221001221 |
+---------------+---------------+---------------------+-------------------+
| IR_encoded(1) | 0.0           | 0.8083769633507853  | 0.0               |
+---------------+---------------+---------------------+-------------------+
| IR_encoded(2) | 1.0           | 0.03664921465968586 | 0.278998778998779 |
+---------------+---------------+---------------------+-------------------+

CPD of EI_encoded
+---------------+--------------------+-----+---------------+--------------------+
| IR_encoded    | IR_encoded(0)      | ... | IR_encoded(2) |

  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 10000 samples:
[('IR_encoded', 'EI_encoded'), ('IR_encoded', 'SP_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of IR_encoded
+---------------+----------+
| IR_encoded(0) | 0.405571 |
+---------------+----------+
| IR_encoded(1) | 0.388429 |
+---------------+----------+
| IR_encoded(2) | 0.206    |
+---------------+----------+

CPD of EI_encoded
+---------------+---------------+-----+---------------+---------------+
| IR_encoded    | IR_encoded(0) | ... | IR_encoded(2) | IR_encoded(2) |
+---------------+---------------+-----+---------------+---------------+
| SP_encoded    | SP_encoded(0) | ... | SP_encoded(1) | SP_encoded(2) |
+---------------+---------------+-----+---------------+---------------+
| EI_encoded(0) | 1.0           | ... | 0.0           | 0.0           |
+---------------+---------------+-----+---------------+---------------+
| EI_encoded(1) | 0.0           | ... | 1.0           | 1.0           |
+---------------+---------------+-----+-------

# Entropy

In [44]:
inference = VariableElimination(best_model)

# Sample sizes to loop through
sample_sizes = range(1000, 11000, 1000)

# Prepare a list to store K-L divergence results
kl_divergence_results = []

# Loop through each sample size
for sample_size in sample_sizes:
    print(f"\nProcessing K-L Divergence for sample size: {sample_size}")

    # Load the sparse dataset used in the LBN part
    sparse_data_file = f'combined_probabilities_sparse_{sample_size}.csv'
    df_sparse = pd.read_csv(sparse_data_file)

    # Manually encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_sparse['IR_encoded'] = df_sparse['IR_State'].map(ir_map)
    df_sparse['EI_encoded'] = df_sparse['EI_State'].map(ei_map)
    df_sparse['SP_encoded'] = df_sparse['Chosen_SP_State'].map(sp_map)

    # Use the test data split obtained from the LBN part
    X_test = df_sparse[['IR_encoded', 'EI_encoded']]
    y_test = df_sparse['SP_encoded']

    # Placeholder to store K-L divergence values
    kl_divergences = []

    # Loop through each row in the test data to make predictions
    for index, row in X_test.iterrows():
        sample_input = {'IR_encoded': int(row['IR_encoded']), 'EI_encoded': int(row['EI_encoded'])}

        # Perform inference using the learned Bayesian model
        predicted_sp_distribution = inference.query(variables=['SP_encoded'], evidence=sample_input)
        predicted_probs = predicted_sp_distribution.values

        # Extract the ground truth probabilities for SP from `y_test`
        ground_truth_probabilities_str = df_sparse['SP_Probabilities (decrease, stable, increase)'].iloc[index]
        ground_truth_probs = np.array(list(map(float, ground_truth_probabilities_str.strip('[]').split(','))))

        # Ensure the probabilities are non-zero to avoid division by zero
        epsilon = 1e-10
        ground_truth_probs = np.clip(ground_truth_probs, epsilon, 1)
        predicted_probs = np.clip(predicted_probs, epsilon, 1)

        # Normalize both probability distributions
        ground_truth_probs /= ground_truth_probs.sum()
        predicted_probs /= predicted_probs.sum()

        # Calculate the K-L divergence (Learned BN vs Ground Truth)
        kl_div = entropy(ground_truth_probs, predicted_probs)
        kl_divergences.append(kl_div)

    # Calculate the average K-L divergence and standard deviation over all test samples
    average_kl_divergence = np.mean(kl_divergences)
    std_kl_divergence = np.std(kl_divergences)

    # Append the results to the list for saving later
    kl_divergence_results.append({
        'Sample_Size': sample_size,
        'Average_KL_Divergence': average_kl_divergence,
        'Std_Dev': std_kl_divergence
    })

    # Print confirmation and result for this sample size
    print(f"\nAverage K-L Divergence for {sample_size} samples: {average_kl_divergence:.4f}, Std Dev: {std_kl_divergence:.4f}")

# Save the K-L divergence results to a CSV file
kl_divergence_df = pd.DataFrame(kl_divergence_results)
kl_divergence_df.to_csv('kl_div_LBN_sparse_bic.csv', index=False)

print("\nK-L divergence calculations complete and results saved to 'kl_div_LBN_sparse_bic.csv'.")


Processing K-L Divergence for sample size: 1000


  phi.values = phi.values / phi.values.sum()



Average K-L Divergence for 1000 samples: nan, Std Dev: nan

Processing K-L Divergence for sample size: 2000


  phi.values = phi.values / phi.values.sum()



Average K-L Divergence for 2000 samples: nan, Std Dev: nan

Processing K-L Divergence for sample size: 3000


  phi.values = phi.values / phi.values.sum()



Average K-L Divergence for 3000 samples: nan, Std Dev: nan

Processing K-L Divergence for sample size: 4000


  phi.values = phi.values / phi.values.sum()



Average K-L Divergence for 4000 samples: nan, Std Dev: nan

Processing K-L Divergence for sample size: 5000


  phi.values = phi.values / phi.values.sum()



Average K-L Divergence for 5000 samples: nan, Std Dev: nan

Processing K-L Divergence for sample size: 6000


  phi.values = phi.values / phi.values.sum()



Average K-L Divergence for 6000 samples: nan, Std Dev: nan

Processing K-L Divergence for sample size: 7000


  phi.values = phi.values / phi.values.sum()



Average K-L Divergence for 7000 samples: nan, Std Dev: nan

Processing K-L Divergence for sample size: 8000


  phi.values = phi.values / phi.values.sum()



Average K-L Divergence for 8000 samples: nan, Std Dev: nan

Processing K-L Divergence for sample size: 9000


  phi.values = phi.values / phi.values.sum()



Average K-L Divergence for 9000 samples: nan, Std Dev: nan

Processing K-L Divergence for sample size: 10000

Average K-L Divergence for 10000 samples: 0.0012, Std Dev: 0.0014

K-L divergence calculations complete and results saved to 'kl_div_LBN_sparse_bic.csv'.
