<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/NN_Sparse_1_3_Relu_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.26-py3-none-any.whl.metadata (9.1 kB)
Downloading pgmpy-0.1.26-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pgmpy
Successfully installed pgmpy-0.1.26


In [None]:
import numpy as np
import pandas as pd
from pgmpy.estimators import HillClimbSearch, BicScore, MaximumLikelihoodEstimator
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import BayesianEstimator
from sklearn.model_selection import train_test_split
from scipy.stats import entropy
from tabulate import tabulate

from tensorflow.keras import models, layers, regularizers, callbacks
from sklearn.model_selection import train_test_split

# Bayesian Network Data Generation 500, ..., 20000 Samples (sparse)

In [None]:
# Function to generate CPDs for the sparse structure with 2 nodes influencing SP
def generate_cpds_sparse_3_total_nodes():
    # Generate random probabilities for each of the 2 independent nodes
    ir_probs = np.random.rand(3)
    ir_probs /= ir_probs.sum()

    ei_given_ir_probs = np.random.rand(3, 3)
    ei_given_ir_probs /= ei_given_ir_probs.sum(axis=0, keepdims=True)

    # SP depends on the 2 other nodes without interactions between them
    sp_probs = np.random.rand(3, 3, 3)
    sp_probs /= sp_probs.sum(axis=0, keepdims=True)

    return ir_probs, ei_given_ir_probs, sp_probs

# Function to generate and save samples with the sparse structure of 3 nodes total
def generate_and_save_samples_sparse_3_total_nodes(ir_probs, ei_probs, sp_probs, sample_size, filename):
    output_data = []

    # Generate `sample_size` random samples
    for _ in range(sample_size):
        # Sample each of the 2 nodes individually
        ir_state_idx = np.random.choice(3, p=ir_probs)
        ir_state = ['low', 'medium', 'high'][ir_state_idx]

        ei_probs_given_ir = ei_probs[:, ir_state_idx]
        ei_state_idx = np.random.choice(3, p=ei_probs_given_ir)
        ei_state = ['poor', 'average', 'good'][ei_state_idx]

        # Calculate SP probability based on the state of each node (sparse dependency on each)
        sp_probs_given_all = sp_probs[:, ir_state_idx, ei_state_idx]
        sp_state_idx = np.random.choice(3, p=sp_probs_given_all)
        sp_state = ['decrease', 'stable', 'increase'][sp_state_idx]

        # Append sample data to output list including probabilities for all nodes
        output_data.append({
            'IR_State': ir_state,
            'EI_State': ei_state,
            'SP_Probabilities (decrease, stable, increase)': ', '.join([f'{prob:.4f}' for prob in sp_probs_given_all]),
            'Chosen_SP_State': sp_state
        })

    # Create a DataFrame from the output data
    output_df = pd.DataFrame(output_data)

    # Save the output DataFrame to a CSV file
    output_df.to_csv(filename, index=False)

    # Print the first few rows for visual confirmation
    print(f"\nSample size: {sample_size} - First few rows of generated samples:\n")
    print(tabulate(output_df.head(), headers='keys', tablefmt='grid'))

# Generate and save samples for sample sizes
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

for size in sample_sizes:
    ir_probs, ei_probs, sp_probs = generate_cpds_sparse_3_total_nodes()
    generate_and_save_samples_sparse_3_total_nodes(ir_probs, ei_probs, sp_probs, size, f'combined_probabilities_{size}.csv')

print("\nGeneration and saving of individual samples complete for all sample sizes!")


Sample size: 50 - First few rows of generated samples:

+----+------------+------------+-------------------------------------------------+-------------------+
|    | IR_State   | EI_State   | SP_Probabilities (decrease, stable, increase)   | Chosen_SP_State   |
|  0 | medium     | good       | 0.2485, 0.6565, 0.0950                          | increase          |
+----+------------+------------+-------------------------------------------------+-------------------+
|  1 | low        | good       | 0.3822, 0.1741, 0.4437                          | increase          |
+----+------------+------------+-------------------------------------------------+-------------------+
|  2 | medium     | good       | 0.2485, 0.6565, 0.0950                          | stable            |
+----+------------+------------+-------------------------------------------------+-------------------+
|  3 | high       | good       | 0.6848, 0.2334, 0.0818                          | decrease          |
+----+----------

# NN & KL-Div

In [None]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Define the Neural Network architecture with L2 regularization
def create_nn_model(hidden_layers=1, nodes_per_layer=3, l2_lambda=0.01):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=(2,)))

    # Hidden layers with L2 regularization and Dropout
    for layer_num in range(hidden_layers):
        model.add(layers.Dense(
            nodes_per_layer,
            activation='relu',
            kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
            name=f"hidden_layer_{layer_num + 1}"
        ))
        model.add(layers.Dropout(0.2))  # Dropout layer to reduce overfitting

    # Output layer (3 classes: decrease, stable, increase) with L2 regularization
    model.add(layers.Dense(
        3,
        activation='softmax',
        kernel_regularizer=regularizers.l2(l2_lambda),
        name="output_layer"
    ))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Prepare a dictionary to store extracted data for each sample size
extracted_data = {}

# Extract the required columns from all sample sizes first
for size in sample_sizes:
    outcomes_file = f'combined_probabilities_{size}.csv'
    df = pd.read_csv(outcomes_file)

    required_columns = ['IR_State', 'EI_State', 'Chosen_SP_State']
    df_extracted = df[required_columns]

    # Encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
    df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
    df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)

    extracted_data[size] = df_extracted

# Initialize list to store K-L divergence and standard deviation results
results = []
epsilon = 1e-10  # Small value for smoothing

for size in sample_sizes:
    df = extracted_data[size]

    # Features (IR and EI) and labels (SP)
    X = df[['IR_encoded', 'EI_encoded']]
    y = df['SP_encoded']

    # Split into training, validation, and test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=True, random_state=42)

    # Create and train the Neural Network model
    nn_model = create_nn_model(hidden_layers=1, nodes_per_layer=3, l2_lambda=0.01)
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    nn_model.fit(X_train, y_train, epochs=25, batch_size=16, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose=0)

    # Evaluate model accuracy
    train_loss, train_accuracy = nn_model.evaluate(X_train, y_train, verbose=0)
    val_loss, val_accuracy = nn_model.evaluate(X_val, y_val, verbose=0)
    test_loss, test_accuracy = nn_model.evaluate(X_test, y_test, verbose=0)

    print(f"\nSample size: {size}")
    print(f"Training Accuracy: {train_accuracy:.4f}")
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # Predict on test data
    predictions = nn_model.predict(X_test)
    predicted_classes = predictions.argmax(axis=1)

    # Calculate ground truth and predicted probabilities
    ground_truth_probabilities = y_test.value_counts(normalize=True).sort_index()
    predicted_probabilities = pd.Series(predicted_classes).value_counts(normalize=True).sort_index()

    # Reindex both distributions and add smoothing
    all_categories = sorted(set(ground_truth_probabilities.index).union(set(predicted_probabilities.index)))
    ground_truth_probabilities = ground_truth_probabilities.reindex(all_categories, fill_value=epsilon)
    predicted_probabilities = predicted_probabilities.reindex(all_categories, fill_value=epsilon)

    # Calculate K-L divergence and standard deviation
    kl_divergence = entropy(pk=ground_truth_probabilities, qk=predicted_probabilities)
    std_dev = np.std(predicted_probabilities - ground_truth_probabilities)

    results.append({
        'Sample_Size': size,
        'K-L_Divergence': kl_divergence,
        'Standard_Deviation': std_dev
    })

    print(f"K-L Divergence: {kl_divergence:.4f}")
    print(f"Standard Deviation: {std_dev:.4f}")

    # Map integers back to the original SP labels
    sp_reverse_map = ['decrease', 'stable', 'increase']
    predicted_labels = [sp_reverse_map[label] for label in predicted_classes]

    # Create DataFrame for displaying nodes, predicted SP, and chosen SP
    result_df = pd.DataFrame({
        'IR_State': df['IR_State'].iloc[X_test.index],
        'EI_State': df['EI_State'].iloc[X_test.index],
        'Chosen_SP': df['Chosen_SP_State'].iloc[X_test.index],
        'Predicted_SP': predicted_labels
    })
    print(f"\nPredicted Results for {size} samples (First 10 rows):")
    print(result_df.head(10))

    # Save results for this sample size in a dedicated CSV
    result_df.to_csv(f'test_results_{size}.csv', index=False)

# Save only K-L and Standard Deviation results to a summary file
results_df = pd.DataFrame(results)
results_df.to_csv('kl_std_results_summary.csv', index=False)

print("\nAll K-L divergence and standard deviation results have been saved in 'kl_std_results_summary.csv'.")




Sample size: 50
Training Accuracy: 0.4286
Validation Accuracy: 0.5714
Test Accuracy: 0.3750
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
K-L Divergence: 7.9775
Standard Deviation: 0.2700

Predicted Results for 50 samples (First 10 rows):
   IR_State EI_State Chosen_SP Predicted_SP
19      low     good  increase     decrease
4      high     good    stable     decrease
13     high     good  decrease     decrease
8       low     poor    stable     increase
48     high     good  decrease     decrease
32     high  average    stable     decrease
30      low     poor  decrease     increase
39      low     good  decrease     decrease





Sample size: 100
Training Accuracy: 0.2571
Validation Accuracy: 0.2667
Test Accuracy: 0.5333
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
K-L Divergence: 3.0395
Standard Deviation: 0.2880

Predicted Results for 100 samples (First 10 rows):
   IR_State EI_State Chosen_SP Predicted_SP
96      low     good  decrease     decrease
4    medium     poor  decrease     decrease
42      low     good  decrease     decrease
77   medium     good  increase     decrease
10   medium     poor  decrease     decrease
0    medium     good  increase     decrease
9       low     good  increase     decrease
69      low  average  decrease     decrease
73      low     good  decrease     decrease
83      low     good  decrease     decrease





Sample size: 150
Training Accuracy: 0.4762
Validation Accuracy: 0.4091
Test Accuracy: 0.4348
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
K-L Divergence: 4.5589
Standard Deviation: 0.2217

Predicted Results for 150 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
32    medium     poor  decrease     decrease
145   medium  average    stable       stable
108   medium     good  decrease       stable
16    medium     poor  decrease     decrease
146     high     good    stable     decrease
85    medium     good    stable       stable
76       low  average  decrease       stable
36       low  average    stable       stable
68    medium     good  decrease       stable
78    medium  average  increase       stable





Sample size: 200
Training Accuracy: 0.3929
Validation Accuracy: 0.5333
Test Accuracy: 0.4667
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
K-L Divergence: 2.7882
Standard Deviation: 0.1656

Predicted Results for 200 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
95       low  average  increase     decrease
115   medium     poor  increase     increase
135   medium     good  increase     increase
195   medium     poor    stable     increase
78       low  average    stable     decrease
117   medium     good  increase     increase
75       low     poor  increase     increase
143   medium     good  decrease     increase
165      low     poor  increase     increase
98       low  average  increase     decrease





Sample size: 250
Training Accuracy: 0.5143
Validation Accuracy: 0.5135
Test Accuracy: 0.5000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
K-L Divergence: 10.5080
Standard Deviation: 0.3615

Predicted Results for 250 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
112      low  average    stable     decrease
84      high  average  increase     decrease
200   medium     good  decrease     decrease
142   medium     good    stable     decrease
111     high  average    stable     decrease
156   medium     good  increase     decrease
30    medium     good    stable     decrease
236      low     good  decrease     decrease
234   medium     poor    stable     decrease
125   medium     poor  decrease     decrease





Sample size: 300
Training Accuracy: 0.4476
Validation Accuracy: 0.3333
Test Accuracy: 0.4000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
K-L Divergence: 12.7270
Standard Deviation: 0.4244

Predicted Results for 300 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
93      high  average  decrease     decrease
249   medium  average    stable     decrease
278     high  average  increase     decrease
108     high  average  decrease     decrease
203   medium  average  decrease     decrease
281      low     good  decrease     decrease
73    medium     poor    stable     decrease
30      high  average  increase     decrease
237   medium     good  decrease     decrease
77    medium     good  increase     decrease





Sample size: 350
Training Accuracy: 0.4857
Validation Accuracy: 0.4038
Test Accuracy: 0.3396
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
K-L Divergence: 8.2521
Standard Deviation: 0.3961

Predicted Results for 350 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
294   medium  average  decrease       stable
94       low     good    stable       stable
76    medium     good  decrease       stable
109     high     good  decrease       stable
152      low     good  decrease       stable
307     high     poor  decrease     increase
19       low     poor  increase       stable
288   medium  average  increase       stable
75      high     good  increase       stable
157   medium  average  decrease       stable





Sample size: 400
Training Accuracy: 0.5607
Validation Accuracy: 0.5333
Test Accuracy: 0.5500
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
K-L Divergence: 5.5603
Standard Deviation: 0.1891

Predicted Results for 400 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
82    medium  average  decrease     increase
181      low     poor  decrease     decrease
93       low     poor  decrease     decrease
39       low  average  decrease     decrease
114   medium  average  increase     increase
101      low     good  decrease     decrease
45       low  average  decrease     decrease
9        low     poor  decrease     decrease
381     high     poor  decrease     increase
137     high     poor    stable     increase





Sample size: 450
Training Accuracy: 0.4381
Validation Accuracy: 0.4179
Test Accuracy: 0.3382
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
K-L Divergence: 14.1591
Standard Deviation: 0.4726

Predicted Results for 450 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
406      low  average  decrease     decrease
362      low     good  increase     decrease
248      low     poor    stable     decrease
39    medium     poor    stable     decrease
72       low     poor  increase     decrease
287     high  average  increase     decrease
70      high     poor  decrease     decrease
45       low     good  increase     decrease
324   medium  average  decrease     decrease
284      low  average  decrease     decrease





Sample size: 500
Training Accuracy: 0.4943
Validation Accuracy: 0.5867
Test Accuracy: 0.4667
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
K-L Divergence: 6.1799
Standard Deviation: 0.2395

Predicted Results for 500 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
290      low     poor  decrease     decrease
316      low     poor    stable     decrease
117     high  average  decrease       stable
455     high     good  decrease       stable
268     high     good    stable       stable
336     high  average  increase       stable
79      high     good  decrease       stable
208      low     poor    stable     decrease
238     high  average    stable       stable
477   medium  average  increase       stable





Sample size: 550
Training Accuracy: 0.4597
Validation Accuracy: 0.3537
Test Accuracy: 0.4699
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
K-L Divergence: 6.3325
Standard Deviation: 0.2288

Predicted Results for 550 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
261   medium  average  decrease     decrease
54      high     poor  increase       stable
361     high     poor  increase       stable
55    medium     good  increase       stable
11    medium     good  decrease       stable
507   medium     good    stable       stable
395   medium  average  decrease     decrease
70    medium     good  decrease       stable
89    medium  average  decrease     decrease
305   medium  average  decrease     decrease





Sample size: 600
Training Accuracy: 0.5762
Validation Accuracy: 0.6556
Test Accuracy: 0.5333
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
K-L Divergence: 9.7444
Standard Deviation: 0.3331

Predicted Results for 600 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
86      high  average  increase     increase
506     high  average  decrease     increase
332     high  average  decrease     increase
559     high  average    stable     increase
117   medium  average  increase     increase
234   medium  average  increase     increase
368      low     poor  decrease     increase
81    medium  average    stable     increase
584   medium  average  increase     increase
494     high     poor  increase     increase





Sample size: 650
Training Accuracy: 0.4857
Validation Accuracy: 0.5052
Test Accuracy: 0.4796
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
K-L Divergence: 0.0803
Standard Deviation: 0.1283

Predicted Results for 650 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
430      low     good    stable     decrease
363     high     poor    stable       stable
248   medium     good  decrease     increase
646      low     poor  decrease     increase
163      low     good  increase     decrease
221   medium     poor  decrease       stable
328   medium     good  increase     increase
247      low     good    stable     decrease
70    medium     good    stable     increase
521   medium  average  decrease       stable





Sample size: 700
Training Accuracy: 0.5061
Validation Accuracy: 0.4857
Test Accuracy: 0.5048
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
K-L Divergence: 4.1464
Standard Deviation: 0.1910

Predicted Results for 700 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
133      low     poor    stable       stable
231      low     poor    stable       stable
131      low     poor    stable       stable
42      high     good    stable       stable
31      high     good  decrease       stable
213     high  average  increase       stable
596      low     good    stable     decrease
104      low     poor  increase       stable
356      low  average  decrease     decrease
140      low     poor  increase       stable





Sample size: 750
Training Accuracy: 0.4248
Validation Accuracy: 0.4554
Test Accuracy: 0.4425
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
K-L Divergence: 11.7730
Standard Deviation: 0.3962

Predicted Results for 750 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
736      low  average    stable     decrease
356      low     good  increase     decrease
404      low  average    stable     decrease
140      low  average  decrease     decrease
685      low     good  increase     decrease
29       low  average  increase     decrease
192      low  average  increase     decrease
709      low  average    stable     decrease
41       low  average  decrease     decrease
326      low     good    stable     decrease





Sample size: 800
Training Accuracy: 0.4196
Validation Accuracy: 0.5000
Test Accuracy: 0.3833
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
K-L Divergence: 13.1071
Standard Deviation: 0.4363

Predicted Results for 800 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
360   medium     poor  increase     decrease
346     high     poor  decrease     decrease
393      low     good    stable     decrease
367      low     poor  increase     decrease
76       low  average  decrease     decrease
133     high     good  increase     decrease
292      low     good    stable     decrease
786     high     poor  decrease     decrease
456     high  average    stable     decrease
218     high     good  decrease     decrease





Sample size: 850
Training Accuracy: 0.3613
Validation Accuracy: 0.3386
Test Accuracy: 0.3672
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
K-L Divergence: 6.5461
Standard Deviation: 0.3219

Predicted Results for 850 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
148   medium  average  increase     decrease
756   medium     good    stable       stable
266   medium     poor  decrease       stable
523     high     good    stable       stable
436      low     good    stable       stable
606     high  average    stable       stable
521     high  average    stable       stable
326      low     good    stable       stable
467   medium     good  increase       stable
731     high     good  increase       stable





Sample size: 900
Training Accuracy: 0.4000
Validation Accuracy: 0.4519
Test Accuracy: 0.4296
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
K-L Divergence: 12.0586
Standard Deviation: 0.4042

Predicted Results for 900 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
260     high  average  increase     increase
807   medium     poor    stable     increase
664     high     poor    stable     increase
829     high  average  decrease     increase
629     high     poor  decrease     increase
857     high     poor  increase     increase
718   medium     good  decrease     increase
625      low     poor  decrease     increase
312     high  average  decrease     increase
408     high  average    stable     increase





Sample size: 950
Training Accuracy: 0.4677
Validation Accuracy: 0.4930
Test Accuracy: 0.5245
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
K-L Divergence: 5.1910
Standard Deviation: 0.2709

Predicted Results for 950 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
174   medium  average  decrease     decrease
239     high     poor    stable       stable
78       low  average  increase       stable
54    medium     good  increase       stable
935      low     good  increase       stable
118     high     poor    stable       stable
323     high     poor    stable       stable
743     high     poor    stable       stable
266      low  average  decrease       stable
10    medium  average  decrease     decrease





Sample size: 1000
Training Accuracy: 0.4629
Validation Accuracy: 0.4133
Test Accuracy: 0.4533
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
K-L Divergence: 5.1223
Standard Deviation: 0.3492

Predicted Results for 1000 samples (First 10 rows):
    IR_State EI_State Chosen_SP Predicted_SP
557   medium     good  decrease     decrease
798      low     good  decrease     decrease
977     high     good    stable     decrease
136      low     poor  increase     decrease
575     high  average  increase     decrease
544     high  average  decrease     decrease
332     high  average  decrease     decrease
917     high  average  increase     decrease
678     high  average  decrease     decrease
363     high     good    stable     decrease





Sample size: 2000
Training Accuracy: 0.5336
Validation Accuracy: 0.5567
Test Accuracy: 0.4800
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
K-L Divergence: 6.8099
Standard Deviation: 0.2958

Predicted Results for 2000 samples (First 10 rows):
     IR_State EI_State Chosen_SP Predicted_SP
771      high     good  decrease     decrease
1788   medium     good  increase     decrease
1106      low     good  decrease     decrease
787       low     poor  increase     decrease
785    medium  average    stable     decrease
1301      low     good    stable     decrease
56       high  average    stable     decrease
931       low     poor    stable     decrease
368       low     good  decrease     decrease
978    medium     good  decrease     decrease





Sample size: 3000
Training Accuracy: 0.4238
Validation Accuracy: 0.4511
Test Accuracy: 0.4222
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
K-L Divergence: 12.2262
Standard Deviation: 0.4095

Predicted Results for 3000 samples (First 10 rows):
     IR_State EI_State Chosen_SP Predicted_SP
1694   medium  average  increase     decrease
1450   medium     good  decrease     decrease
1831     high     poor    stable     decrease
676    medium  average  increase     decrease
1298   medium     good  increase     decrease
1872   medium  average  decrease     decrease
402    medium  average    stable     decrease
2986   medium     poor  decrease     decrease
2748   medium     good  decrease     decrease
2525   medium  average  decrease     decrease





Sample size: 4000
Training Accuracy: 0.4107
Validation Accuracy: 0.4050
Test Accuracy: 0.4317
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
K-L Divergence: 3.7801
Standard Deviation: 0.2072

Predicted Results for 4000 samples (First 10 rows):
     IR_State EI_State Chosen_SP Predicted_SP
2659     high     poor    stable       stable
3800     high     poor    stable       stable
2515     high     poor    stable       stable
3493     high  average  decrease       stable
3224   medium  average  decrease       stable
48       high     poor  increase       stable
3987   medium     good  increase     increase
2552     high     poor    stable       stable
1916   medium  average  increase       stable
2111     high  average  decrease       stable





Sample size: 5000
Training Accuracy: 0.4743
Validation Accuracy: 0.5000
Test Accuracy: 0.4693
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
K-L Divergence: 11.1755
Standard Deviation: 0.3789

Predicted Results for 5000 samples (First 10 rows):
     IR_State EI_State Chosen_SP Predicted_SP
790    medium     good  decrease       stable
2879   medium     good  increase       stable
2372   medium     poor    stable       stable
1351   medium  average  decrease       stable
3382   medium     poor  decrease       stable
3433   medium     good  decrease       stable
1129   medium     good  decrease       stable
549    medium     poor    stable       stable
2835      low  average  decrease       stable
626    medium     good  increase       stable





Sample size: 6000
Training Accuracy: 0.3838
Validation Accuracy: 0.3822
Test Accuracy: 0.3889
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
K-L Divergence: 0.3881
Standard Deviation: 0.1777

Predicted Results for 6000 samples (First 10 rows):
     IR_State EI_State Chosen_SP Predicted_SP
2462   medium  average  decrease     increase
2223   medium     good  decrease     decrease
1242   medium  average  increase     increase
177    medium  average  increase     increase
5536   medium  average  increase     increase
3316     high     good    stable     decrease
2642   medium     poor  increase     increase
1188     high  average    stable     increase
4148   medium  average  increase     increase
952      high     good    stable     decrease





Sample size: 7000
Training Accuracy: 0.4490
Validation Accuracy: 0.4429
Test Accuracy: 0.4524
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 4.2556
Standard Deviation: 0.1543

Predicted Results for 7000 samples (First 10 rows):
     IR_State EI_State Chosen_SP Predicted_SP
4006   medium     good  decrease       stable
434    medium     poor  increase       stable
4812     high     poor  increase       stable
1487   medium     good  increase       stable
6034   medium     good    stable       stable
4096      low     poor    stable     decrease
5671   medium     good  increase       stable
2569      low     good  increase     decrease
5985   medium     good  decrease       stable
3499     high     poor  decrease       stable





Sample size: 8000
Training Accuracy: 0.4407
Validation Accuracy: 0.4450
Test Accuracy: 0.4525
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 3.9702
Standard Deviation: 0.2059

Predicted Results for 8000 samples (First 10 rows):
     IR_State EI_State Chosen_SP Predicted_SP
2750     high  average  decrease     increase
1498     high  average  decrease     increase
2758      low     good  decrease     decrease
6286     high  average  decrease     increase
4003      low  average  decrease     decrease
3182     high  average  increase     increase
4157      low     poor    stable     increase
6852      low     good  decrease     decrease
5219      low  average  decrease     decrease
1978      low     poor  increase     increase





Sample size: 9000
Training Accuracy: 0.5200
Validation Accuracy: 0.5141
Test Accuracy: 0.5274
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 4.1240
Standard Deviation: 0.1693

Predicted Results for 9000 samples (First 10 rows):
     IR_State EI_State Chosen_SP Predicted_SP
7439     high     poor    stable     increase
4262     high     poor    stable     increase
6026      low     poor  increase     decrease
7692      low  average  decrease     decrease
2252      low     poor  decrease     decrease
6683   medium  average  decrease     increase
1561   medium     poor  increase     increase
6436   medium  average  increase     increase
8441   medium  average  increase     increase
5076   medium  average  increase     increase





Sample size: 10000
Training Accuracy: 0.4450
Validation Accuracy: 0.4420
Test Accuracy: 0.4433
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 4.8091
Standard Deviation: 0.3530

Predicted Results for 10000 samples (First 10 rows):
     IR_State EI_State Chosen_SP Predicted_SP
2697   medium     good  decrease     decrease
6871   medium  average  increase     decrease
3487   medium     good  decrease     decrease
92       high  average    stable     decrease
9537     high     poor    stable     decrease
3205   medium     poor  decrease     decrease
6641      low  average    stable     decrease
8909   medium     poor  decrease     decrease
2884   medium  average  increase     decrease
7173      low  average  increase     decrease





Sample size: 11000
Training Accuracy: 0.5295
Validation Accuracy: 0.5358
Test Accuracy: 0.5127
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 10.2578
Standard Deviation: 0.3596

Predicted Results for 11000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
8366       low  average    stable     increase
3445       low  average    stable     increase
10360     high  average  increase     increase
4631      high     poor  increase     increase
6947       low  average    stable     increase
4191       low  average  increase     increase
4740       low  average  increase     increase
1111       low  average  increase     increase
9803       low     good  increase     increase
7250      high     good  increase     increase





Sample size: 12000
Training Accuracy: 0.5086
Validation Accuracy: 0.5100
Test Accuracy: 0.5161
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 10.1151
Standard Deviation: 0.3424

Predicted Results for 12000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
6423       low  average    stable     increase
4826    medium     poor    stable     increase
2413       low  average  increase     increase
4441    medium  average  increase     increase
5517       low  average  increase     increase
5090       low     good  decrease     increase
4562       low  average  increase     increase
4367       low  average    stable     increase
11612      low     poor  increase     increase
10987      low  average  decrease     increase





Sample size: 13000
Training Accuracy: 0.4874
Validation Accuracy: 0.5036
Test Accuracy: 0.4862
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 5.1642
Standard Deviation: 0.3139

Predicted Results for 13000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
3353    medium  average    stable     decrease
6440       low     good    stable     decrease
12509      low     good  decrease     decrease
5076    medium     poor  decrease     decrease
9245       low  average    stable     decrease
4382       low  average  increase     decrease
852       high     poor    stable     decrease
6475    medium  average    stable     decrease
42        high     poor  decrease     decrease
613        low     poor  decrease     decrease





Sample size: 14000
Training Accuracy: 0.4566
Validation Accuracy: 0.4462
Test Accuracy: 0.4486
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 11.6283
Standard Deviation: 0.3902

Predicted Results for 14000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
676     medium     good  decrease     decrease
11373      low     poor  increase     decrease
5987      high     poor  decrease     decrease
8331       low     good  decrease     decrease
5032       low     poor  decrease     decrease
10710   medium  average  decrease     decrease
3817       low     good  decrease     decrease
9118    medium  average  increase     decrease
1463       low     poor  decrease     decrease
3322    medium  average  decrease     decrease





Sample size: 15000
Training Accuracy: 0.4504
Validation Accuracy: 0.4196
Test Accuracy: 0.4453
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 4.9598
Standard Deviation: 0.2414

Predicted Results for 15000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
8602      high  average  increase       stable
438       high  average  increase       stable
8094       low     poor    stable       stable
14355   medium     poor  decrease       stable
8581       low     poor  increase       stable
12358     high  average  increase       stable
511        low     good  increase     decrease
6594      high     good  increase       stable
5245      high  average    stable       stable
5437      high  average  decrease       stable





Sample size: 16000
Training Accuracy: 0.5267
Validation Accuracy: 0.5254
Test Accuracy: 0.5329
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
K-L Divergence: 5.5466
Standard Deviation: 0.2510

Predicted Results for 16000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
14628      low     poor    stable     decrease
12756   medium  average  decrease     decrease
6572       low  average    stable     decrease
10633   medium     poor  decrease     decrease
7872       low  average  decrease     decrease
4089       low  average    stable     decrease
3652      high  average    stable       stable
6954    medium  average  decrease     decrease
4438    medium  average  decrease     decrease
9053      high  average  increase       stable





Sample size: 17000
Training Accuracy: 0.4372
Validation Accuracy: 0.4412
Test Accuracy: 0.4306
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 6.6484
Standard Deviation: 0.2267

Predicted Results for 17000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
1351       low     good  decrease     decrease
14452     high     good  decrease     decrease
10373     high     good    stable     decrease
2347      high  average    stable     increase
5134      high     poor  increase     increase
8270    medium     poor    stable     increase
10255      low     poor    stable     increase
805     medium     good  increase     decrease
2619    medium     good    stable     decrease
1295    medium     good  increase     decrease





Sample size: 18000
Training Accuracy: 0.4047
Validation Accuracy: 0.3963
Test Accuracy: 0.4159
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 12.3650
Standard Deviation: 0.4130

Predicted Results for 18000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
9902    medium     poor  increase     increase
3778      high     good    stable     increase
12698     high     good  increase     increase
15657      low  average  increase     increase
5846      high     poor    stable     increase
17090   medium  average  increase     increase
13578   medium     poor    stable     increase
937       high     good  decrease     increase
12353   medium     poor    stable     increase
8515    medium  average    stable     increase





Sample size: 19000
Training Accuracy: 0.4553
Validation Accuracy: 0.4558
Test Accuracy: 0.4712
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 5.7121
Standard Deviation: 0.2163

Predicted Results for 19000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
9595    medium  average  decrease     decrease
16583     high     poor  increase     increase
1935      high     poor  increase     increase
18613   medium     poor  decrease     decrease
15103   medium     poor  increase     decrease
2698      high     poor  increase     increase
6069      high  average    stable     increase
13434     high     good    stable     increase
11588   medium  average  decrease     decrease
15315      low     poor  decrease     decrease





Sample size: 20000
Training Accuracy: 0.4857
Validation Accuracy: 0.4857
Test Accuracy: 0.4917
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 0.5846
Standard Deviation: 0.3020

Predicted Results for 20000 samples (First 10 rows):
      IR_State EI_State Chosen_SP Predicted_SP
5348       low     poor  increase     decrease
339       high  average  increase     increase
13591      low     poor    stable     decrease
8153    medium  average  increase     decrease
16345     high     poor    stable       stable
16404   medium  average    stable     decrease
17185      low     poor    stable     decrease
5709    medium     poor    stable     decrease
13020      low     good  decrease     decrease
7763    medium     good  decrease     decrease

All K-L divergence and standard deviation results have been saved in 'kl_std_results_summary.csv'.
