<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/LBN_Dense_BIC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [40]:
pip install pgmpy



In [41]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.sampling import BayesianModelSampling
from sklearn.preprocessing import LabelEncoder
from pgmpy.estimators import HillClimbSearch, BicScore, MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.metrics import accuracy_score
from scipy.stats import entropy
import os
from tabulate import tabulate
from sklearn.model_selection import train_test_split

# Bayesian Network Data Generation 1000, 2000, ..., 10000 Samples (dense)

In [200]:
# Function to generate CPDs
def generate_cpds():
    # Generate random probabilities for IR
    ir_probs = np.random.rand(3)
    ir_probs /= ir_probs.sum()  # Normalize to make it a valid probability distribution

    # Generate random probabilities for EI given IR
    ei_given_ir_probs = np.random.rand(3, 3)
    ei_given_ir_probs /= ei_given_ir_probs.sum(axis=0, keepdims=True)

    # Generate random probabilities for SP given IR and EI
    sp_probs = np.random.rand(3, 3, 3)
    sp_probs /= sp_probs.sum(axis=0, keepdims=True)

    return ir_probs, ei_given_ir_probs, sp_probs

# Function to generate and save samples
def generate_and_save_samples(ir_probs, ei_probs, sp_probs, sample_size, filename):
    output_data = []

    # Generate `sample_size` random samples
    for _ in range(sample_size):
        # Sample `IR` state based on `IR` probabilities
        ir_state_idx = np.random.choice(3, p=ir_probs)
        ir_state = ['low', 'medium', 'high'][ir_state_idx]
        ir_prob = ir_probs[ir_state_idx]

        # Sample `EI` state based on `EI` probabilities given `IR`
        ei_probs_given_ir = ei_probs[:, ir_state_idx]
        ei_state_idx = np.random.choice(3, p=ei_probs_given_ir)
        ei_state = ['poor', 'average', 'good'][ei_state_idx]
        ei_prob = ei_probs_given_ir[ei_state_idx]

        # Sample `SP` state based on `SP` probabilities given `IR` and `EI`
        sp_probs_given_ir_ei = sp_probs[:, ir_state_idx, ei_state_idx]
        sp_state_idx = np.random.choice(3, p=sp_probs_given_ir_ei)
        sp_state = ['decrease', 'stable', 'increase'][sp_state_idx]
        sp_prob = sp_probs_given_ir_ei[sp_state_idx]

        # Append sample data to output list
        output_data.append({
            'IR_State': ir_state,
            'IR_Prob': f'{ir_prob:.4f}',
            'EI_State': ei_state,
            'EI_Prob': f'{ei_prob:.4f}',
            'SP_Probabilities (decrease, stable, increase)': ', '.join([f'{prob:.4f}' for prob in sp_probs_given_ir_ei]),
            'Chosen_SP_State': sp_state,
            'Chosen_SP_Probability': f'{sp_prob:.4f}'
        })

    # Create a DataFrame from the output data
    output_df = pd.DataFrame(output_data)

    # Save the output DataFrame to a CSV file
    output_df.to_csv(filename, index=False)

    # Print the first few rows for visual confirmation
    print(f"\nSample size: {sample_size} - First few rows of generated samples:\n")
    print(tabulate(output_df.head(), headers='keys', tablefmt='grid'))

# Generate and save samples for sample sizes from 1000 to 10000 every 1000
sample_sizes = range(1000, 11000, 1000)

for size in sample_sizes:
    # Generate the CPDs
    ir_probs, ei_given_ir_probs, sp_probs = generate_cpds()

    # Generate and save individual samples for the given sample size
    generate_and_save_samples(ir_probs, ei_given_ir_probs, sp_probs, size, f'combined_probabilities_{size}.csv')

# Notify the user that the process is done
print("\nGeneration and saving of individual samples complete for all sample sizes!")


Sample size: 1000 - First few rows of generated samples:

+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|    | IR_State   |   IR_Prob | EI_State   |   EI_Prob | SP_Probabilities (decrease, stable, increase)   | Chosen_SP_State   |   Chosen_SP_Probability |
|  0 | high       |    0.2881 | poor       |    0.2814 | 0.3054, 0.4201, 0.2745                          | increase          |                  0.2745 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  1 | medium     |    0.4664 | poor       |    0.4235 | 0.3464, 0.6040, 0.0496                          | decrease          |                  0.3464 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  2 | high       |    

# LBN Dense BIC & Entropy

## MMHC

In [201]:
import numpy as np
import pandas as pd
from pgmpy.estimators import MmhcEstimator, BicScore, MaximumLikelihoodEstimator
from pgmpy.models import BayesianNetwork
from sklearn.model_selection import train_test_split
from scipy.stats import entropy

# Sample sizes to loop through
sample_sizes = range(1000, 11000, 1000)

# Initialize list to store K-L divergence and standard deviation values for each sample size
results = []

# Small smoothing value to avoid zero probabilities
epsilon = 1e-10

# Loop through each sample size
for sample_size in sample_sizes:
    print(f"\nProcessing sample size: {sample_size}")

    # Load the dense dataset for the current sample size
    dense_data_file = f'combined_probabilities_{sample_size}.csv'
    df_dense = pd.read_csv(dense_data_file)

    # Manually encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_dense['IR_encoded'] = df_dense['IR_State'].map(ir_map)
    df_dense['EI_encoded'] = df_dense['EI_State'].map(ei_map)
    df_dense['SP_encoded'] = df_dense['Chosen_SP_State'].map(sp_map)

    # Split the data into training, validation, and test sets
    X = df_dense[['IR_encoded', 'EI_encoded']]
    y = df_dense['SP_encoded']

    # Split into training (70%) and temp (30%) for validation and test
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)
    # Split temp into validation (50%) and test (50%)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, shuffle=True)

    # Concatenate X and y to form the training set for learning the BN structure
    df_train = pd.concat([X_train, y_train], axis=1)

    # Define the Max-Min Hill Climbing (MMHC) structure learning algorithm
    mmhc = MmhcEstimator(df_train)
    scoring_method = BicScore(df_train)

    # Estimate the best structure using MMHC
    best_dag = mmhc.estimate(scoring_method=scoring_method)
    best_model = BayesianNetwork(best_dag.edges())

    # Display the learned structure (edges of the Bayesian Network)
    print(f"\nLearned Structure (Edges) for {sample_size} samples:")
    print(best_model.edges())

    # Learn the CPDs using Maximum Likelihood Estimation (MLE)
    best_model.fit(df_train, estimator=MaximumLikelihoodEstimator)

    # Check if the model is valid after learning the parameters
    assert best_model.check_model()

    # --- K-L Divergence and Standard Deviation Calculation Block ---
    # Calculate the probabilities using the learned BN and the test set
    inference = best_model.predict(X_test)
    predicted_probabilities = inference['SP_encoded']

    # Calculate K-L divergence and standard deviation between the ground truth and learned BN
    ground_truth_probabilities = y_test.value_counts(normalize=True).sort_index()
    predicted_probabilities = predicted_probabilities.value_counts(normalize=True).sort_index()

    # Reindex both distributions to have the same set of categories and add smoothing
    all_categories = sorted(set(ground_truth_probabilities.index).union(set(predicted_probabilities.index)))
    ground_truth_probabilities = ground_truth_probabilities.reindex(all_categories, fill_value=epsilon)
    predicted_probabilities = predicted_probabilities.reindex(all_categories, fill_value=epsilon)

    # Calculate K-L divergence with smoothing
    kl_divergence = entropy(pk=ground_truth_probabilities, qk=predicted_probabilities)

    # Standard deviation between predicted probabilities and actual probabilities
    std_dev = np.std(predicted_probabilities - ground_truth_probabilities)

    # Append results for this sample size
    results.append({
        'Sample_Size': sample_size,
        'K-L_Divergence': kl_divergence,
        'Standard_Deviation': std_dev
    })

    # Print the K-L divergence and standard deviation for this sample size
    print(f"\nResults for sample size {sample_size}:")
    print(f"K-L Divergence: {kl_divergence:.4f}")
    print(f"Standard Deviation: {std_dev:.4f}")

# After processing all sample sizes, save results to CSV and display them
results_df = pd.DataFrame(results)
results_df.to_csv('kl_std_results_mmhc.csv', index=False)

# Print all results after saving to CSV
print("\nAll results have been saved to 'kl_std_results_mmhc.csv'.")
print(results_df)


Processing sample size: 1000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 1000 samples:
[('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 1000:
K-L Divergence: 0.0050
Standard Deviation: 0.0327

Processing sample size: 2000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 2000 samples:
[('IR_encoded', 'EI_encoded'), ('SP_encoded', 'EI_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 2000:
K-L Divergence: 0.1239
Standard Deviation: 0.1093

Processing sample size: 3000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 3000 samples:
[('IR_encoded', 'EI_encoded'), ('SP_encoded', 'EI_encoded'), ('SP_encoded', 'IR_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 3000:
K-L Divergence: 0.5768
Standard Deviation: 0.1931

Processing sample size: 4000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 4000 samples:
[('EI_encoded', 'IR_encoded'), ('EI_encoded', 'SP_encoded'), ('SP_encoded', 'IR_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 4000:
K-L Divergence: 0.0540
Standard Deviation: 0.0862

Processing sample size: 5000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 5000 samples:
[('EI_encoded', 'IR_encoded'), ('SP_encoded', 'IR_encoded'), ('SP_encoded', 'EI_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 5000:
K-L Divergence: 0.0642
Standard Deviation: 0.0998

Processing sample size: 6000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 6000 samples:
[('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 6000:
K-L Divergence: 4.0749
Standard Deviation: 0.1624

Processing sample size: 7000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 7000 samples:
[('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 7000:
K-L Divergence: 0.0441
Standard Deviation: 0.0950

Processing sample size: 8000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 8000 samples:
[('EI_encoded', 'IR_encoded'), ('SP_encoded', 'EI_encoded'), ('SP_encoded', 'IR_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 8000:
K-L Divergence: 6.0439
Standard Deviation: 0.2415

Processing sample size: 9000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 9000 samples:
[('EI_encoded', 'IR_encoded'), ('SP_encoded', 'EI_encoded'), ('SP_encoded', 'IR_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 9000:
K-L Divergence: 0.0009
Standard Deviation: 0.0150

Processing sample size: 10000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 10000 samples:
[('IR_encoded', 'EI_encoded'), ('SP_encoded', 'EI_encoded'), ('SP_encoded', 'IR_encoded')]


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 10000:
K-L Divergence: 0.0491
Standard Deviation: 0.0887

All results have been saved to 'kl_std_results_mmhc.csv'.
   Sample_Size  K-L_Divergence  Standard_Deviation
0         1000        0.005007            0.032660
1         2000        0.123945            0.109274
2         3000        0.576811            0.193073
3         4000        0.053969            0.086249
4         5000        0.064171            0.099819
5         6000        4.074937            0.162400
6         7000        0.044060            0.094955
7         8000        6.043879            0.241466
8         9000        0.000942            0.015048
9        10000        0.049057            0.088731


## HC

In [100]:
# Sample sizes to loop through
sample_sizes = range(1000, 11000, 1000)

# Initialize list to store K-L divergence and standard deviation values for each sample size
results = []

# Small smoothing value to avoid zero probabilities
epsilon = 1e-10

# Loop through each sample size
for sample_size in sample_sizes:
    print(f"\nProcessing sample size: {sample_size}")

    # Load the dense dataset for the current sample size
    dense_data_file = f'combined_probabilities_{sample_size}.csv'
    df_dense = pd.read_csv(dense_data_file)

    # Manually encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_dense['IR_encoded'] = df_dense['IR_State'].map(ir_map)
    df_dense['EI_encoded'] = df_dense['EI_State'].map(ei_map)
    df_dense['SP_encoded'] = df_dense['Chosen_SP_State'].map(sp_map)

    # Split the data into training, validation, and test sets
    X = df_dense[['IR_encoded', 'EI_encoded']]
    y = df_dense['SP_encoded']

    # Split into training (70%) and temp (30%) for validation and test
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)
    # Split temp into validation (50%) and test (50%)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, shuffle=True)

    # Concatenate X and y to form the training set for learning the BN structure
    df_train = pd.concat([X_train, y_train], axis=1)

    # Define the Hill-Climb structure learning algorithm
    hc = HillClimbSearch(df_train)
    scoring_method = BicScore(df_train)

    # Estimate the best structure
    best_dag = hc.estimate(scoring_method=scoring_method)
    best_model = BayesianNetwork(best_dag.edges())

    # Display the learned structure (edges of the Bayesian Network)
    print(f"\nLearned Structure (Edges) for {sample_size} samples:")
    print(best_model.edges())

    # Learn the CPDs using Maximum Likelihood Estimation (MLE)
    best_model.fit(df_train, estimator=MaximumLikelihoodEstimator)

    # Check if the model is valid after learning the parameters
    assert best_model.check_model()

    # Print the learned CPDs (Conditional Probability Distributions)
    for cpd in best_model.get_cpds():
        print("\nCPD of", cpd.variable)
        print(cpd)

    # --- K-L Divergence and Standard Deviation Calculation Block ---
    # Calculate the probabilities using the learned BN and the test set
    inference = best_model.predict(X_test)
    predicted_probabilities = inference['SP_encoded']

    # Calculate K-L divergence and standard deviation between the ground truth and learned BN
    ground_truth_probabilities = y_test.value_counts(normalize=True).sort_index()
    predicted_probabilities = predicted_probabilities.value_counts(normalize=True).sort_index()

    # Reindex both distributions to have the same set of categories and add smoothing
    all_categories = sorted(set(ground_truth_probabilities.index).union(set(predicted_probabilities.index)))
    ground_truth_probabilities = ground_truth_probabilities.reindex(all_categories, fill_value=epsilon)
    predicted_probabilities = predicted_probabilities.reindex(all_categories, fill_value=epsilon)

    # Calculate K-L divergence with smoothing
    kl_divergence = entropy(pk=ground_truth_probabilities, qk=predicted_probabilities)

    # Standard deviation between predicted probabilities and actual probabilities
    std_dev = np.std(predicted_probabilities - ground_truth_probabilities)

    # Append results for this sample size
    results.append({
        'Sample_Size': sample_size,
        'K-L_Divergence': kl_divergence,
        'Standard_Deviation': std_dev
    })

    # Print the K-L divergence and standard deviation for this sample size
    print(f"\nResults for sample size {sample_size}:")
    print(f"K-L Divergence: {kl_divergence:.4f}")
    print(f"Standard Deviation: {std_dev:.4f}")

# After processing all sample sizes, save results to CSV and display them
results_df = pd.DataFrame(results)
results_df.to_csv('kl_std_results.csv', index=False)

# Print all results after saving to CSV
print("\nAll results have been saved to 'kl_std_results.csv'.")
print(results_df)


Processing sample size: 1000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 1000 samples:
[('IR_encoded', 'EI_encoded'), ('IR_encoded', 'SP_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of IR_encoded
+---------------+----------+
| IR_encoded(0) | 0.365714 |
+---------------+----------+
| IR_encoded(1) | 0.364286 |
+---------------+----------+
| IR_encoded(2) | 0.27     |
+---------------+----------+

CPD of EI_encoded
+---------------+---------------------+-----+---------------------+---------------------+
| IR_encoded    | IR_encoded(0)       | ... | IR_encoded(2)       | IR_encoded(2)       |
+---------------+---------------------+-----+---------------------+---------------------+
| SP_encoded    | SP_encoded(0)       | ... | SP_encoded(1)       | SP_encoded(2)       |
+---------------+---------------------+-----+---------------------+---------------------+
| EI_encoded(0) | 0.5714285714285714  | ... | 0.2727272727272727  | 0.21666666666666667 |
+---------------+---------------------+-----+---------------------+----------------

  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 1000:
K-L Divergence: 0.0446
Standard Deviation: 0.0993

Processing sample size: 2000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 2000 samples:
[('EI_encoded', 'IR_encoded'), ('SP_encoded', 'IR_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of EI_encoded
+---------------+---------------------+---------------------+---------------------+
| SP_encoded    | SP_encoded(0)       | SP_encoded(1)       | SP_encoded(2)       |
+---------------+---------------------+---------------------+---------------------+
| EI_encoded(0) | 0.21584158415841584 | 0.45893719806763283 | 0.24740124740124741 |
+---------------+---------------------+---------------------+---------------------+
| EI_encoded(1) | 0.49306930693069306 | 0.2584541062801932  | 0.3783783783783784  |
+---------------+---------------------+---------------------+---------------------+
| EI_encoded(2) | 0.29108910891089107 | 0.2826086956521739  | 0.37422037422037424 |
+---------------+---------------------+---------------------+---------------------+

CPD of IR_encoded
+---------------+----------------------+-----+---------------------+--

  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 2000:
K-L Divergence: 0.0080
Standard Deviation: 0.0395

Processing sample size: 3000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 3000 samples:
[('EI_encoded', 'IR_encoded'), ('SP_encoded', 'IR_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of EI_encoded
+---------------+--------------------+---------------------+---------------------+
| SP_encoded    | SP_encoded(0)      | SP_encoded(1)       | SP_encoded(2)       |
+---------------+--------------------+---------------------+---------------------+
| EI_encoded(0) | 0.2518597236981934 | 0.1933139534883721  | 0.3673036093418259  |
+---------------+--------------------+---------------------+---------------------+
| EI_encoded(1) | 0.5334750265674814 | 0.5799418604651163  | 0.33121019108280253 |
+---------------+--------------------+---------------------+---------------------+
| EI_encoded(2) | 0.2146652497343252 | 0.22674418604651161 | 0.30148619957537154 |
+---------------+--------------------+---------------------+---------------------+

CPD of IR_encoded
+---------------+----------------------+-----+----------------------+
| EI_enco

  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 3000:
K-L Divergence: 0.1954
Standard Deviation: 0.1367

Processing sample size: 4000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 4000 samples:
[('EI_encoded', 'IR_encoded'), ('SP_encoded', 'IR_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of EI_encoded
+---------------+---------------------+---------------------+---------------------+
| SP_encoded    | SP_encoded(0)       | SP_encoded(1)       | SP_encoded(2)       |
+---------------+---------------------+---------------------+---------------------+
| EI_encoded(0) | 0.10580204778156997 | 0.13432835820895522 | 0.09937332139659803 |
+---------------+---------------------+---------------------+---------------------+
| EI_encoded(1) | 0.42662116040955633 | 0.5385572139303483  | 0.4556848701880036  |
+---------------+---------------------+---------------------+---------------------+
| EI_encoded(2) | 0.46757679180887374 | 0.3271144278606965  | 0.4449418084153984  |
+---------------+---------------------+---------------------+---------------------+

CPD of IR_encoded
+---------------+---------------------+-----+----------------------+--

  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 4000:
K-L Divergence: 0.2817
Standard Deviation: 0.1809

Processing sample size: 5000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 5000 samples:
[('IR_encoded', 'EI_encoded'), ('SP_encoded', 'EI_encoded'), ('SP_encoded', 'IR_encoded')]

CPD of IR_encoded
+---------------+---------------------+---------------------+---------------------+
| SP_encoded    | SP_encoded(0)       | SP_encoded(1)       | SP_encoded(2)       |
+---------------+---------------------+---------------------+---------------------+
| IR_encoded(0) | 0.35148514851485146 | 0.27325102880658436 | 0.40059790732436473 |
+---------------+---------------------+---------------------+---------------------+
| IR_encoded(1) | 0.1342821782178218  | 0.23045267489711935 | 0.13004484304932734 |
+---------------+---------------------+---------------------+---------------------+
| IR_encoded(2) | 0.5142326732673267  | 0.4962962962962963  | 0.4693572496263079  |
+---------------+---------------------+---------------------+---------------------+

CPD of EI_encoded
+---------------+---------------------+-----+---------------------+---

  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 5000:
K-L Divergence: 4.1348
Standard Deviation: 0.1708

Processing sample size: 6000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 6000 samples:
[('IR_encoded', 'EI_encoded'), ('IR_encoded', 'SP_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of IR_encoded
+---------------+----------+
| IR_encoded(0) | 0.322619 |
+---------------+----------+
| IR_encoded(1) | 0.295952 |
+---------------+----------+
| IR_encoded(2) | 0.381429 |
+---------------+----------+

CPD of EI_encoded
+---------------+---------------------+-----+---------------------+---------------------+
| IR_encoded    | IR_encoded(0)       | ... | IR_encoded(2)       | IR_encoded(2)       |
+---------------+---------------------+-----+---------------------+---------------------+
| SP_encoded    | SP_encoded(0)       | ... | SP_encoded(1)       | SP_encoded(2)       |
+---------------+---------------------+-----+---------------------+---------------------+
| EI_encoded(0) | 0.01809954751131222 | ... | 0.25203252032520324 | 0.275974025974026   |
+---------------+---------------------+-----+---------------------+----------------

  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 6000:
K-L Divergence: 0.0343
Standard Deviation: 0.0791

Processing sample size: 7000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 7000 samples:
[('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]

CPD of IR_encoded
+---------------+---------------------+---------------------+---------------------+
| EI_encoded    | EI_encoded(0)       | EI_encoded(1)       | EI_encoded(2)       |
+---------------+---------------------+---------------------+---------------------+
| IR_encoded(0) | 0.7046783625730995  | 0.5877437325905293  | 0.7287878787878788  |
+---------------+---------------------+---------------------+---------------------+
| IR_encoded(1) | 0.16885964912280702 | 0.27541782729805014 | 0.12121212121212122 |
+---------------+---------------------+---------------------+---------------------+
| IR_encoded(2) | 0.12646198830409358 | 0.1368384401114206  | 0.15                |
+---------------+---------------------+---------------------+---------------------+

CPD of SP_encoded
+---------------+---------------------+-----+---------------+---------

  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 7000:
K-L Divergence: 0.2627
Standard Deviation: 0.1670

Processing sample size: 8000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 8000 samples:
[('IR_encoded', 'EI_encoded'), ('IR_encoded', 'SP_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of IR_encoded
+---------------+----------+
| IR_encoded(0) | 0.278214 |
+---------------+----------+
| IR_encoded(1) | 0.429464 |
+---------------+----------+
| IR_encoded(2) | 0.292321 |
+---------------+----------+

CPD of EI_encoded
+---------------+--------------------+-----+---------------------+---------------------+
| IR_encoded    | IR_encoded(0)      | ... | IR_encoded(2)       | IR_encoded(2)       |
+---------------+--------------------+-----+---------------------+---------------------+
| SP_encoded    | SP_encoded(0)      | ... | SP_encoded(1)       | SP_encoded(2)       |
+---------------+--------------------+-----+---------------------+---------------------+
| EI_encoded(0) | 0.2460136674259681 | ... | 0.33248730964467005 | 0.39072847682119205 |
+---------------+--------------------+-----+---------------------+---------------------+


  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 8000:
K-L Divergence: 5.8663
Standard Deviation: 0.2899

Processing sample size: 9000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 9000 samples:
[('EI_encoded', 'IR_encoded'), ('SP_encoded', 'IR_encoded'), ('SP_encoded', 'EI_encoded')]

CPD of EI_encoded
+---------------+---------------------+--------------------+---------------------+
| SP_encoded    | SP_encoded(0)       | SP_encoded(1)      | SP_encoded(2)       |
+---------------+---------------------+--------------------+---------------------+
| EI_encoded(0) | 0.6710526315789473  | 0.3134496342149691 | 0.5999077065066912  |
+---------------+---------------------+--------------------+---------------------+
| EI_encoded(1) | 0.1536502546689304  | 0.2954417557681486 | 0.08121827411167512 |
+---------------+---------------------+--------------------+---------------------+
| EI_encoded(2) | 0.17529711375212223 | 0.3911086100168824 | 0.3188740193816336  |
+---------------+---------------------+--------------------+---------------------+

CPD of IR_encoded
+---------------+---------------------+-----+----------------------+
| EI_encod

  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 9000:
K-L Divergence: 0.1667
Standard Deviation: 0.1664

Processing sample size: 10000


  0%|          | 0/1000000 [00:00<?, ?it/s]


Learned Structure (Edges) for 10000 samples:
[('IR_encoded', 'SP_encoded'), ('EI_encoded', 'SP_encoded'), ('EI_encoded', 'IR_encoded')]

CPD of IR_encoded
+---------------+---------------------+---------------------+---------------------+
| EI_encoded    | EI_encoded(0)       | EI_encoded(1)       | EI_encoded(2)       |
+---------------+---------------------+---------------------+---------------------+
| IR_encoded(0) | 0.6401515151515151  | 0.5453031312458361  | 0.39248895434462444 |
+---------------+---------------------+---------------------+---------------------+
| IR_encoded(1) | 0.2606060606060606  | 0.20519653564290474 | 0.3711340206185567  |
+---------------+---------------------+---------------------+---------------------+
| IR_encoded(2) | 0.09924242424242424 | 0.24950033311125916 | 0.23637702503681884 |
+---------------+---------------------+---------------------+---------------------+

CPD of SP_encoded
+---------------+---------------------+-----+---------------------+--

  0%|          | 0/9 [00:00<?, ?it/s]


Results for sample size 10000:
K-L Divergence: 0.1931
Standard Deviation: 0.1618

All results have been saved to 'kl_std_results.csv'.
   Sample_Size  K-L_Divergence  Standard_Deviation
0         1000        0.044567            0.099331
1         2000        0.008019            0.039534
2         3000        0.195419            0.136710
3         4000        0.281708            0.180883
4         5000        4.134781            0.170816
5         6000        0.034266            0.079136
6         7000        0.262657            0.167046
7         8000        5.866333            0.289900
8         9000        0.166714            0.166418
9        10000        0.193146            0.161765
