<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/LBN_Sparse_BIC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [5]:
pip install pgmpy



In [6]:
import numpy as np
import pandas as pd
from pgmpy.estimators import HillClimbSearch, BicScore, MaximumLikelihoodEstimator
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import BayesianEstimator
from sklearn.model_selection import train_test_split
from scipy.stats import entropy
from tabulate import tabulate

# Bayesian Network Data Generation 500, ..., 20000 Samples (dense)

In [37]:
# Function to generate sparse CPDs
def generate_sparse_cpds():
    # Generate random probabilities for IR
    ir_probs = np.random.rand(3)
    ir_probs /= ir_probs.sum()  # Normalize to make it a valid probability distribution

    # Generate random sparse probabilities for EI given IR (introduce zeros or very small values)
    ei_given_ir_probs = np.random.rand(3, 3)
    ei_given_ir_probs[ei_given_ir_probs < 0.5] = 0  # Introduce sparsity; any values less than 0.5 are = 0
    ei_given_ir_probs_sum = ei_given_ir_probs.sum(axis=0, keepdims=True)

    # If any column sums to zero, replace it with uniform probabilities to avoid NaN
    ei_given_ir_probs[:, ei_given_ir_probs_sum[0] == 0] = 1 / 3
    ei_given_ir_probs /= ei_given_ir_probs.sum(axis=0, keepdims=True)

    # Generate random sparse probabilities for SP given IR and EI
    sp_probs = np.random.rand(3, 3, 3)
    sp_probs[sp_probs < 0.2] = 0  # Introduce sparsity
    sp_probs_sum = sp_probs.sum(axis=0, keepdims=True)

    # If any column sums to zero, replace it with uniform probabilities to avoid NaN
    sp_probs[:, sp_probs_sum[0] == 0] = 1 / 3
    sp_probs /= sp_probs.sum(axis=0, keepdims=True)

    return ir_probs, ei_given_ir_probs, sp_probs

# Function to generate and save samples
def generate_and_save_sparse_samples(ir_probs, ei_probs, sp_probs, sample_size, filename):
    output_data = []

    # Generate `sample_size` random samples
    for _ in range(sample_size):
        # Sample `IR` state based on `IR` probabilities
        ir_state_idx = np.random.choice(3, p=ir_probs)
        ir_state = ['low', 'medium', 'high'][ir_state_idx]
        ir_prob = ir_probs[ir_state_idx]

        # Sample `EI` state based on `EI` probabilities given `IR`
        ei_probs_given_ir = ei_probs[:, ir_state_idx]
        ei_state_idx = np.random.choice(3, p=ei_probs_given_ir)
        ei_state = ['poor', 'average', 'good'][ei_state_idx]
        ei_prob = ei_probs_given_ir[ei_state_idx]

        # Sample `SP` state based on `SP` probabilities given `IR` and `EI`
        sp_probs_given_ir_ei = sp_probs[:, ir_state_idx, ei_state_idx]
        sp_state_idx = np.random.choice(3, p=sp_probs_given_ir_ei)
        sp_state = ['decrease', 'stable', 'increase'][sp_state_idx]
        sp_prob = sp_probs_given_ir_ei[sp_state_idx]

        # Append sample data to output list
        output_data.append({
            'IR_State': ir_state,
            'IR_Prob': f'{ir_prob:.4f}',
            'EI_State': ei_state,
            'EI_Prob': f'{ei_prob:.4f}',
            'SP_Probabilities (decrease, stable, increase)': ', '.join([f'{prob:.4f}' for prob in sp_probs_given_ir_ei]),
            'Chosen_SP_State': sp_state,
            'Chosen_SP_Probability': f'{sp_prob:.4f}'
        })

    # Create a DataFrame from the output data
    output_df = pd.DataFrame(output_data)

    # Save the output DataFrame to a CSV file
    output_df.to_csv(filename, index=False)

    # Print the first few rows for visual confirmation
    print(f"\nSample size: {sample_size} - First few rows of generated sparse samples:\n")
    print(tabulate(output_df.head(), headers='keys', tablefmt='grid'))

# Generate and save samples for sample sizes from 50 to 20000
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

for size in sample_sizes:
    # Generate the sparse CPDs
    ir_probs, ei_given_ir_probs, sp_probs = generate_sparse_cpds()

    # Generate and save individual sparse samples for the given sample size
    generate_and_save_sparse_samples(ir_probs, ei_given_ir_probs, sp_probs, size, f'combined_probabilities_sparse_{size}.csv')

# Notify the user that the process is done
print("\nGeneration and saving of individual sparse samples complete for all sample sizes!")


Sample size: 50 - First few rows of generated sparse samples:

+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|    | IR_State   |   IR_Prob | EI_State   |   EI_Prob | SP_Probabilities (decrease, stable, increase)   | Chosen_SP_State   |   Chosen_SP_Probability |
|  0 | medium     |    0.1264 | average    |    0.4106 | 0.1751, 0.4234, 0.4015                          | stable            |                  0.4234 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  1 | low        |    0.8463 | poor       |    0.4436 | 0.5266, 0.0000, 0.4734                          | decrease          |                  0.5266 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  2 | low        

# LBN Sparse BIC & Entropy

In [38]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Initialize list to store K-L divergence and standard deviation values for each sample size
results = []

# Small smoothing value to avoid zero probabilities
epsilon = 1e-10

# Loop through each sample size
for sample_size in sample_sizes:
    print(f"\nProcessing sample size: {sample_size}")

    # Load the dense dataset for the current sample size
    sparse_data_file = f'combined_probabilities_sparse_{sample_size}.csv'
    df_sparse = pd.read_csv(sparse_data_file)

    # Manually encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_sparse['IR_encoded'] = df_sparse['IR_State'].map(ir_map)
    df_sparse['EI_encoded'] = df_sparse['EI_State'].map(ei_map)
    df_sparse['SP_encoded'] = df_sparse['Chosen_SP_State'].map(sp_map)

    # Split the data into training, validation, and test sets
    X = df_sparse[['IR_encoded', 'EI_encoded']]
    y = df_sparse['SP_encoded']

    # Split into training (70%) and temp (30%) for validation and test
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, shuffle=True)

    # Concatenate X and y to form the training set for learning the BN structure
    df_train = pd.concat([X_train, y_train], axis=1)
    print("Training data:")
    print(df_train.head())

    # Enforce the inclusion of `SP_encoded` using priors (fixed edges)
    priors = [
        ('IR_encoded', 'SP_encoded'),
        ('EI_encoded', 'SP_encoded')
    ]

    # Perform Hill Climb Search with priors (fixed edges)
    hc = HillClimbSearch(df_train)
    best_dag = hc.estimate(scoring_method=BicScore(df_train), fixed_edges=priors)

    # Initialize BayesianNetwork and print edges
    best_model = BayesianNetwork(best_dag.edges())
    print("Learned structure (edges):", best_model.edges())

    if len(best_model.edges()) == 0:
        print("No edges learned. Skipping to next sample size.")
        continue

    # Try fitting parameters using BayesianEstimator
    try:
        best_model.fit(df_train, estimator=BayesianEstimator, prior_type="BDeu", equivalent_sample_size=5)
        print("Model fitted successfully.")
    except Exception as e:
        print("Error during fitting:", str(e))
        continue

    # Check model validity
    if not best_model.check_model():
        print("Model check failed.")
        continue

    # --- K-L Divergence and Standard Deviation Calculation Block ---
    # Predict on test data
    try:
        inference = best_model.predict(X_test)
        predicted_probabilities = inference['SP_encoded']

        # Calculate K-L divergence and standard deviation between the ground truth and learned BN
        ground_truth_probabilities = y_test.value_counts(normalize=True).sort_index()
        predicted_probabilities = predicted_probabilities.value_counts(normalize=True).sort_index()

        # Reindex both distributions to have the same set of categories and add smoothing
        all_categories = sorted(set(ground_truth_probabilities.index).union(set(predicted_probabilities.index)))
        ground_truth_probabilities = ground_truth_probabilities.reindex(all_categories, fill_value=epsilon)
        predicted_probabilities = predicted_probabilities.reindex(all_categories, fill_value=epsilon)

        # Calculate K-L divergence with smoothing
        kl_divergence = entropy(pk=ground_truth_probabilities, qk=predicted_probabilities)

        # Standard deviation between predicted probabilities and actual probabilities
        std_dev = np.std(predicted_probabilities - ground_truth_probabilities)

        # Append results for this sample size
        results.append({
            'Sample_Size': sample_size,
            'K-L_Divergence': kl_divergence,
            'Standard_Deviation': std_dev
        })

        # Print the K-L divergence and standard deviation for this sample size
        print(f"\nResults for sample size {sample_size}:")
        print(f"K-L Divergence: {kl_divergence:.4f}")
        print(f"Standard Deviation: {std_dev:.4f}")

    except Exception as e:
        print("Error during prediction:", str(e))
        continue

# After processing all sample sizes, save results to CSV and display them
results_df = pd.DataFrame(results)
results_df.to_csv('kl_std_bic_sparse_results.csv', index=False)

# Print all results after saving to CSV
print("\nAll results have been saved to 'kl_std_bic_sparse_results.csv'.")
print(results_df)


Processing sample size: 50
Training data:
    IR_encoded  EI_encoded  SP_encoded
6            0           2           2
41           1           0           1
46           0           1           1
47           0           1           0
15           0           2           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/4 [00:00<?, ?it/s]


Results for sample size 50:
K-L Divergence: 19.7875
Standard Deviation: 0.6693

Processing sample size: 100
Training data:
    IR_encoded  EI_encoded  SP_encoded
11           0           0           1
47           0           0           2
85           1           2           0
28           0           0           1
93           0           0           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/4 [00:00<?, ?it/s]


Results for sample size 100:
K-L Divergence: 10.1200
Standard Deviation: 0.3810

Processing sample size: 150
Training data:
     IR_encoded  EI_encoded  SP_encoded
81            1           0           1
133           2           0           2
137           1           0           0
75            1           0           1
109           2           0           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 150:
K-L Divergence: 0.3530
Standard Deviation: 0.2485

Processing sample size: 200
Training data:
     IR_encoded  EI_encoded  SP_encoded
169           2           2           0
97            1           1           1
31            0           1           2
12            0           0           2
35            2           2           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 200:
K-L Divergence: 4.8326
Standard Deviation: 0.1700

Processing sample size: 250
Training data:
     IR_encoded  EI_encoded  SP_encoded
82            2           0           0
29            0           2           2
126           0           2           1
79            2           0           2
86            0           2           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/3 [00:00<?, ?it/s]


Results for sample size 250:
K-L Divergence: 12.8939
Standard Deviation: 0.4388

Processing sample size: 300
Training data:
     IR_encoded  EI_encoded  SP_encoded
194           1           2           0
101           2           0           1
68            1           2           0
224           2           0           1
37            2           0           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/7 [00:00<?, ?it/s]


Results for sample size 300:
K-L Divergence: 0.0855
Standard Deviation: 0.1133

Processing sample size: 350
Training data:
     IR_encoded  EI_encoded  SP_encoded
139           0           1           2
79            2           0           2
116           2           0           2
18            0           1           0
223           0           1           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/6 [00:00<?, ?it/s]


Results for sample size 350:
K-L Divergence: 5.6436
Standard Deviation: 0.2853

Processing sample size: 400
Training data:
     IR_encoded  EI_encoded  SP_encoded
157           2           1           2
109           1           1           0
17            1           1           2
347           0           2           1
24            1           0           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 400:
K-L Divergence: 2.3622
Standard Deviation: 0.1421

Processing sample size: 450
Training data:
     IR_encoded  EI_encoded  SP_encoded
409           2           0           0
108           2           0           2
229           0           0           1
420           1           1           2
118           0           0           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/8 [00:00<?, ?it/s]


Results for sample size 450:
K-L Divergence: 0.0549
Standard Deviation: 0.1183

Processing sample size: 500
Training data:
     IR_encoded  EI_encoded  SP_encoded
5             0           0           0
116           1           2           2
45            1           2           2
16            0           0           1
462           0           0           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 500:
K-L Divergence: 3.2457
Standard Deviation: 0.1256

Processing sample size: 550
Training data:
     IR_encoded  EI_encoded  SP_encoded
42            1           1           0
349           0           1           0
523           1           0           2
469           1           2           2
399           0           1           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/8 [00:00<?, ?it/s]


Results for sample size 550:
K-L Divergence: 0.0193
Standard Deviation: 0.0743

Processing sample size: 600
Training data:
     IR_encoded  EI_encoded  SP_encoded
108           0           0           2
272           0           2           2
599           2           0           0
479           2           0           0
436           2           2           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/7 [00:00<?, ?it/s]


Results for sample size 600:
K-L Divergence: 2.2263
Standard Deviation: 0.1133

Processing sample size: 650
Training data:
     IR_encoded  EI_encoded  SP_encoded
580           0           2           1
211           1           1           0
9             1           1           2
613           0           2           1
300           1           1           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/4 [00:00<?, ?it/s]


Results for sample size 650:
K-L Divergence: 0.0979
Standard Deviation: 0.1323

Processing sample size: 700
Training data:
     IR_encoded  EI_encoded  SP_encoded
357           0           1           1
649           2           2           0
291           0           2           1
420           1           0           0
177           2           2           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/6 [00:00<?, ?it/s]


Results for sample size 700:
K-L Divergence: 0.5236
Standard Deviation: 0.3368

Processing sample size: 750
Training data:
     IR_encoded  EI_encoded  SP_encoded
382           2           1           2
407           0           1           0
235           0           1           2
456           0           1           0
209           2           0           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/6 [00:00<?, ?it/s]


Results for sample size 750:
K-L Divergence: 0.0050
Standard Deviation: 0.0261

Processing sample size: 800
Training data:
     IR_encoded  EI_encoded  SP_encoded
404           1           1           1
522           1           1           1
749           1           1           1
426           1           1           1
41            2           0           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 800:
K-L Divergence: 0.4909
Standard Deviation: 0.0670

Processing sample size: 850
Training data:
     IR_encoded  EI_encoded  SP_encoded
136           1           0           1
703           0           1           2
425           1           2           0
140           0           1           0
173           2           2           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/7 [00:00<?, ?it/s]


Results for sample size 850:
K-L Divergence: 3.5343
Standard Deviation: 0.1587

Processing sample size: 900
Training data:
     IR_encoded  EI_encoded  SP_encoded
687           0           0           0
705           0           0           0
296           1           1           1
172           2           0           2
863           0           0           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/6 [00:00<?, ?it/s]


Results for sample size 900:
K-L Divergence: 5.3770
Standard Deviation: 0.3562

Processing sample size: 950
Training data:
     IR_encoded  EI_encoded  SP_encoded
886           2           1           0
694           0           2           0
212           2           2           0
79            1           2           1
148           0           2           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 950:
K-L Divergence: 7.0100
Standard Deviation: 0.3010

Processing sample size: 1000
Training data:
     IR_encoded  EI_encoded  SP_encoded
541           0           2           0
440           2           2           1
482           2           2           0
422           2           1           2
778           2           1           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/7 [00:00<?, ?it/s]


Results for sample size 1000:
K-L Divergence: 0.5850
Standard Deviation: 0.2391

Processing sample size: 2000
Training data:
      IR_encoded  EI_encoded  SP_encoded
836            0           0           2
575            0           0           1
557            1           0           0
1235           2           1           1
1360           0           0           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 2000:
K-L Divergence: 4.4455
Standard Deviation: 0.2125

Processing sample size: 3000
Training data:
      IR_encoded  EI_encoded  SP_encoded
611            1           2           0
530            2           0           2
2787           2           2           0
49             2           0           2
1883           2           1           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/6 [00:00<?, ?it/s]


Results for sample size 3000:
K-L Divergence: 0.2431
Standard Deviation: 0.1692

Processing sample size: 4000
Training data:
      IR_encoded  EI_encoded  SP_encoded
3215           0           2           1
3126           1           0           2
697            1           0           2
3613           0           0           1
2374           0           0           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/4 [00:00<?, ?it/s]


Results for sample size 4000:
K-L Divergence: 0.1436
Standard Deviation: 0.1800

Processing sample size: 5000
Training data:
      IR_encoded  EI_encoded  SP_encoded
1840           0           1           2
2115           0           2           2
4437           0           2           2
1146           0           2           0
2486           0           1           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/7 [00:00<?, ?it/s]


Results for sample size 5000:
K-L Divergence: 0.0381
Standard Deviation: 0.0710

Processing sample size: 6000
Training data:
      IR_encoded  EI_encoded  SP_encoded
4775           0           0           2
927            0           0           1
3258           1           0           1
45             1           2           1
1260           1           2           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 6000:
K-L Divergence: 0.0517
Standard Deviation: 0.1128

Processing sample size: 7000
Training data:
      IR_encoded  EI_encoded  SP_encoded
1612           1           2           1
978            0           0           1
3050           1           0           2
4960           0           0           2
2548           1           1           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/7 [00:00<?, ?it/s]


Results for sample size 7000:
K-L Divergence: 5.3248
Standard Deviation: 0.2088

Processing sample size: 8000
Training data:
      IR_encoded  EI_encoded  SP_encoded
6760           2           0           0
4623           1           1           2
7841           2           0           0
3704           1           2           0
981            2           0           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/6 [00:00<?, ?it/s]


Results for sample size 8000:
K-L Divergence: 0.4123
Standard Deviation: 0.2736

Processing sample size: 9000
Training data:
      IR_encoded  EI_encoded  SP_encoded
7581           1           0           1
8484           1           0           2
6215           1           0           1
6884           1           1           2
3647           1           1           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/6 [00:00<?, ?it/s]


Results for sample size 9000:
K-L Divergence: 0.0316
Standard Deviation: 0.0893

Processing sample size: 10000
Training data:
      IR_encoded  EI_encoded  SP_encoded
9069           0           0           1
2603           0           1           2
7738           2           0           2
1579           1           2           2
5058           2           0           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/4 [00:00<?, ?it/s]


Results for sample size 10000:
K-L Divergence: 0.4716
Standard Deviation: 0.2934

Processing sample size: 11000
Training data:
       IR_encoded  EI_encoded  SP_encoded
8957            2           2           2
1507            2           1           2
6383            0           0           1
1559            0           0           1
10017           2           2           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/7 [00:00<?, ?it/s]


Results for sample size 11000:
K-L Divergence: 0.1587
Standard Deviation: 0.1821

Processing sample size: 12000
Training data:
       IR_encoded  EI_encoded  SP_encoded
7729            2           1           0
3546            1           1           1
3527            1           1           0
10159           1           1           1
6959            0           2           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 12000:
K-L Divergence: 0.2584
Standard Deviation: 0.2250

Processing sample size: 13000
Training data:
       IR_encoded  EI_encoded  SP_encoded
10345           2           0           1
4983            0           0           1
10368           1           1           1
3219            1           1           1
3284            1           1           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/3 [00:00<?, ?it/s]


Results for sample size 13000:
K-L Divergence: 1.1958
Standard Deviation: 0.0761

Processing sample size: 14000
Training data:
       IR_encoded  EI_encoded  SP_encoded
12959           1           2           2
5502            0           1           1
5890            0           0           1
323             0           0           0
1344            0           2           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 14000:
K-L Divergence: 0.0728
Standard Deviation: 0.1305

Processing sample size: 15000
Training data:
       IR_encoded  EI_encoded  SP_encoded
11797           2           1           0
5899            0           0           1
9513            0           0           1
1572            0           0           1
12995           2           1           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('IR_encoded', 'EI_encoded'), ('EI_encoded', 'SP_encoded')]
Model fitted successfully.


  0%|          | 0/7 [00:00<?, ?it/s]


Results for sample size 15000:
K-L Divergence: 0.0175
Standard Deviation: 0.0537

Processing sample size: 16000
Training data:
       IR_encoded  EI_encoded  SP_encoded
14741           2           2           2
11539           0           0           0
14634           2           2           0
11678           2           0           2
12645           0           0           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/4 [00:00<?, ?it/s]


Results for sample size 16000:
K-L Divergence: 0.0628
Standard Deviation: 0.1247

Processing sample size: 17000
Training data:
       IR_encoded  EI_encoded  SP_encoded
9173            2           2           2
16528           2           1           2
4819            0           2           1
6818            2           2           0
7717            0           2           2


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/6 [00:00<?, ?it/s]


Results for sample size 17000:
K-L Divergence: 0.6078
Standard Deviation: 0.2803

Processing sample size: 18000
Training data:
       IR_encoded  EI_encoded  SP_encoded
5553            1           1           0
15006           1           1           0
3312            0           2           1
479             0           0           0
1596            0           2           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 18000:
K-L Divergence: 5.4643
Standard Deviation: 0.1923

Processing sample size: 19000
Training data:
       IR_encoded  EI_encoded  SP_encoded
800             2           0           1
13405           1           1           0
747             0           2           0
4029            0           2           2
8730            2           2           0


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/7 [00:00<?, ?it/s]


Results for sample size 19000:
K-L Divergence: 0.2898
Standard Deviation: 0.2095

Processing sample size: 20000
Training data:
       IR_encoded  EI_encoded  SP_encoded
17218           2           2           0
15188           1           2           1
11295           0           2           1
19772           1           2           2
13072           1           2           1


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure (edges): [('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]
Model fitted successfully.


  0%|          | 0/5 [00:00<?, ?it/s]


Results for sample size 20000:
K-L Divergence: 6.4566
Standard Deviation: 0.2526

All results have been saved to 'kl_std_bic_sparse_results.csv'.
    Sample_Size  K-L_Divergence  Standard_Deviation
0            50       19.787541            0.669266
1           100       10.119972            0.381032
2           150        0.353020            0.248499
3           200        4.832556            0.169967
4           250       12.893943            0.438772
5           300        0.085453            0.113312
6           350        5.643569            0.285316
7           400        2.362227            0.142075
8           450        0.054902            0.118258
9           500        3.245729            0.125551
10          550        0.019331            0.074270
11          600        2.226348            0.113312
12          650        0.097885            0.132260
13          700        0.523554            0.336807
14          750        0.005000            0.026052
15          800      