<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/NN_Dense_2_3_Relu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.26-py3-none-any.whl.metadata (9.1 kB)
Downloading pgmpy-0.1.26-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pgmpy
Successfully installed pgmpy-0.1.26


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.sampling import BayesianModelSampling
from tabulate import tabulate

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers, callbacks, regularizers

from scipy.stats import entropy

# Bayesian Network Data Generation 500, ..., 20000 Samples (dense)

In [3]:
# Function to generate CPDs
def generate_cpds():
    # Generate random probabilities for IR
    ir_probs = np.random.rand(3)
    ir_probs /= ir_probs.sum()  # Normalize to make it a valid probability distribution

    # Generate random probabilities for EI given IR
    ei_given_ir_probs = np.random.rand(3, 3)
    ei_given_ir_probs /= ei_given_ir_probs.sum(axis=0, keepdims=True)

    # Generate random probabilities for SP given IR and EI
    sp_probs = np.random.rand(3, 3, 3)
    sp_probs /= sp_probs.sum(axis=0, keepdims=True)

    return ir_probs, ei_given_ir_probs, sp_probs

# Function to generate and save samples
def generate_and_save_samples(ir_probs, ei_probs, sp_probs, sample_size, filename):
    output_data = []

    # Generate `sample_size` random samples
    for _ in range(sample_size):
        # Sample `IR` state based on `IR` probabilities
        ir_state_idx = np.random.choice(3, p=ir_probs)
        ir_state = ['low', 'medium', 'high'][ir_state_idx]
        ir_prob = ir_probs[ir_state_idx]

        # Sample `EI` state based on `EI` probabilities given `IR`
        ei_probs_given_ir = ei_probs[:, ir_state_idx]
        ei_state_idx = np.random.choice(3, p=ei_probs_given_ir)
        ei_state = ['poor', 'average', 'good'][ei_state_idx]
        ei_prob = ei_probs_given_ir[ei_state_idx]

        # Sample `SP` state based on `SP` probabilities given `IR` and `EI`
        sp_probs_given_ir_ei = sp_probs[:, ir_state_idx, ei_state_idx]
        sp_state_idx = np.random.choice(3, p=sp_probs_given_ir_ei)
        sp_state = ['decrease', 'stable', 'increase'][sp_state_idx]
        sp_prob = sp_probs_given_ir_ei[sp_state_idx]

        # Append sample data to output list
        output_data.append({
            'IR_State': ir_state,
            'IR_Prob': f'{ir_prob:.4f}',
            'EI_State': ei_state,
            'EI_Prob': f'{ei_prob:.4f}',
            'SP_Probabilities (decrease, stable, increase)': ', '.join([f'{prob:.4f}' for prob in sp_probs_given_ir_ei]),
            'Chosen_SP_State': sp_state,
            'Chosen_SP_Probability': f'{sp_prob:.4f}'
        })

    # Create a DataFrame from the output data
    output_df = pd.DataFrame(output_data)

    # Save the output DataFrame to a CSV file
    output_df.to_csv(filename, index=False)

    # Print the first few rows for visual confirmation
    print(f"\nSample size: {sample_size} - First few rows of generated samples:\n")
    print(tabulate(output_df.head(), headers='keys', tablefmt='grid'))

# Generate and save samples for sample sizes from 1000 to 10000 every 1000
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

for size in sample_sizes:
    # Generate the CPDs
    ir_probs, ei_given_ir_probs, sp_probs = generate_cpds()

    # Generate and save individual samples for the given sample size
    generate_and_save_samples(ir_probs, ei_given_ir_probs, sp_probs, size, f'combined_probabilities_{size}.csv')

# Notify the user that the process is done
print("\nGeneration and saving of individual samples complete for all sample sizes!")


Sample size: 50 - First few rows of generated samples:

+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|    | IR_State   |   IR_Prob | EI_State   |   EI_Prob | SP_Probabilities (decrease, stable, increase)   | Chosen_SP_State   |   Chosen_SP_Probability |
|  0 | medium     |    0.3518 | good       |    0.3503 | 0.2147, 0.2567, 0.5285                          | decrease          |                  0.2147 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  1 | low        |    0.2676 | average    |    0.6651 | 0.1944, 0.4747, 0.3309                          | stable            |                  0.4747 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  2 | high       |    0.

# Hypothesis Model: 500, ..., 20000 Samples (dense) 2 hidden Layer, 3 Neurons Relu

In [4]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Define the Neural Network architecture with L2 regularization
def create_nn_model(hidden_layers=2, nodes_per_layer=3, l2_lambda=0.01):
    model = models.Sequential()

    # Input layer (2 input features: IR_encoded and EI_encoded)
    model.add(layers.InputLayer(input_shape=(2,)))

    # Hidden layers with L2 regularization and Dropout
    for layer_num in range(hidden_layers):
        model.add(layers.Dense(
            nodes_per_layer,
            activation='relu',
            kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
            name=f"hidden_layer_{layer_num + 1}"
        ))
        model.add(layers.Dropout(0.2))  # Dropout layer to reduce overfitting

    # Output layer (3 classes: decrease, stable, increase) with L2 regularization
    model.add(layers.Dense(
        3,
        activation='softmax',
        kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
        name="output_layer"
    ))

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Prepare a dictionary to store the extracted data for each sample size
extracted_data = {}

# Extract the required columns from all sample sizes first
for size in sample_sizes:
    # Load data for the current sample size (adjust the file paths if necessary)
    outcomes_file = f'combined_probabilities_{size}.csv'
    df = pd.read_csv(outcomes_file)

    # Extract only the required columns
    required_columns = ['IR_State', 'EI_State', 'Chosen_SP_State']
    df_extracted = df[required_columns]

    # Manually encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
    df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
    df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)

    # Store the extracted and encoded data for later use
    extracted_data[size] = df_extracted

# Loop through each sample size for NN training, validation, and testing
for size in sample_sizes:
    # Retrieve the extracted data for the current sample size
    df = extracted_data[size]

    # Features (IR and EI) and labels (SP)
    X = df[['IR_encoded', 'EI_encoded']]
    y = df['SP_encoded']

    # Refresh the data split for each iteration
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=False, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)

    # Show split confirmation
    print(f"\nSample size: {size}")
    print("Training Data:", X_train.shape, y_train.shape)
    print("Validation Data:", X_val.shape, y_val.shape)
    print("Test Data:", X_test.shape, y_test.shape)

    # Create the Neural Network model with L2 regularization
    nn_model = create_nn_model(hidden_layers=2, nodes_per_layer=3, l2_lambda=0.01)

    # Early stopping callback to prevent overfitting
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    history = nn_model.fit(X_train, y_train,
                           epochs=25,
                           batch_size=16,
                           validation_data=(X_val, y_val),
                           callbacks=[early_stopping],
                           verbose=0)  # Set verbose=0 to avoid too much output

    # Print training, validation, and test accuracy
    train_loss, train_accuracy = nn_model.evaluate(X_train, y_train, verbose=0)
    val_loss, val_accuracy = nn_model.evaluate(X_val, y_val, verbose=0)
    test_loss, test_accuracy = nn_model.evaluate(X_test, y_test, verbose=0)
    print(f"Training Accuracy for {size} samples: {train_accuracy:.4f}")
    print(f"Validation Accuracy for {size} samples: {val_accuracy:.4f}")
    print(f"Test Accuracy for {size} samples: {test_accuracy:.4f}")

    # Make predictions on the test set
    predictions = nn_model.predict(X_test)

    # Convert the predicted probabilities to class labels
    predicted_classes = predictions.argmax(axis=1)

    # Create a list to map integers back to the original SP labels
    sp_reverse_map = ['decrease', 'stable', 'increase']

    # Convert the predicted classes to the original labels
    predicted_labels = [sp_reverse_map[label] for label in predicted_classes]

    # Create a DataFrame for the predicted probabilities
    probs_df = pd.DataFrame(predictions, columns=['Prob_decrease', 'Prob_stable', 'Prob_increase'])

    # Output the IR, EI, predicted SP, and the NN probabilities
    result_df = pd.DataFrame({
        'IR': df['IR_State'].iloc[X_test.index],  # IR column from the original dataframe for the test set
        'EI': df['EI_State'].iloc[X_test.index],  # EI column from the original dataframe for the test set
        'Predicted_SP': predicted_labels           # Predicted SP labels
    })

    # Combine the result with the predicted probabilities
    combined_df = pd.concat([result_df.reset_index(drop=True), probs_df.reset_index(drop=True)], axis=1)

    # Save the test data with predictions to a CSV file
    combined_df.to_csv(f'test_data_nn_{size}.csv', index=False)

    # Show the first few rows of the results for this sample size
    print(f"\nPredicted Results and Probabilities for {size} samples (First 15 rows):")
    print(combined_df.head(15))

# After the loop is done, print this message
print("\nLooping through all sample sizes complete!")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)



Sample size: 50
Training Data: (35, 2) (35,)
Validation Data: (7, 2) (7,)
Test Data: (8, 2) (8,)
Training Accuracy for 50 samples: 0.3429
Validation Accuracy for 50 samples: 0.1429
Test Accuracy for 50 samples: 0.5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step

Predicted Results and Probabilities for 50 samples (First 15 rows):
       IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0    high  average       stable       0.324114     0.339269       0.336616
1    high  average       stable       0.324114     0.339269       0.336616
2     low     poor     increase       0.323162     0.336611       0.340227
3  medium     poor       stable       0.304156     0.350853       0.344991
4    high     poor       stable       0.273710     0.381124       0.345166
5    high  average       stable       0.324114     0.339269       0.336616
6    high     poor       stable       0.273710     0.381124       0.345166
7  medium  average       stable       0.



Training Accuracy for 100 samples: 0.3857
Validation Accuracy for 100 samples: 0.2667
Test Accuracy for 100 samples: 0.5333
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step

Predicted Results and Probabilities for 100 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     increase       0.331874     0.333188       0.334937
1   medium     good     increase       0.331874     0.333188       0.334937
2      low  average     increase       0.331874     0.333188       0.334937
3   medium     good     increase       0.331874     0.333188       0.334937
4     high     poor     decrease       0.427650     0.370105       0.202245
5      low     poor     increase       0.331748     0.333105       0.335146
6     high  average     decrease       0.392630     0.359570       0.247800
7   medium     good     increase       0.331874     0.333188       0.334937
8   medium  average     decrease       0.343486   



Training Accuracy for 150 samples: 0.4952
Validation Accuracy for 150 samples: 0.3636
Test Accuracy for 150 samples: 0.4783
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step

Predicted Results and Probabilities for 150 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     decrease       0.434672     0.341228       0.224100
1   medium     poor       stable       0.286920     0.365644       0.347435
2      low  average       stable       0.315717     0.387320       0.296963
3     high  average       stable       0.327777     0.367735       0.304488
4   medium     good     decrease       0.436360     0.340687       0.222954
5     high     poor       stable       0.286920     0.365644       0.347435
6     high     poor       stable       0.286920     0.365644       0.347435
7   medium     good     decrease       0.436360     0.340687       0.222954
8      low  average       stable       0.315717   



Training Accuracy for 200 samples: 0.3714
Validation Accuracy for 200 samples: 0.2667
Test Accuracy for 200 samples: 0.4333
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step

Predicted Results and Probabilities for 200 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good     increase       0.312592     0.343040       0.344368
1   medium     poor     increase       0.264544     0.357803       0.377654
2   medium     good     increase       0.312371     0.343218       0.344411
3   medium  average     increase       0.300832     0.347079       0.352089
4      low     good     increase       0.312592     0.343040       0.344368
5      low     good     increase       0.312592     0.343040       0.344368
6   medium  average     increase       0.300832     0.347079       0.352089
7     high     poor     increase       0.219911     0.368944       0.411145
8     high     poor     increase       0.219911   



Training Accuracy for 250 samples: 0.4743
Validation Accuracy for 250 samples: 0.4324
Test Accuracy for 250 samples: 0.5000
[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 60ms/step



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step

Predicted Results and Probabilities for 250 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor       stable       0.290912     0.404077       0.305011
1     high     poor       stable       0.290912     0.404077       0.305011
2     high     good       stable       0.272636     0.402962       0.324403
3   medium     good       stable       0.278380     0.403420       0.318200
4   medium     poor       stable       0.290912     0.404077       0.305011
5   medium     good       stable       0.278380     0.403420       0.318200
6      low  average       stable       0.290912     0.404077       0.305011
7     high  average       stable       0.284530     0.403800       0.311670
8   medium     poor       stable       0.290912     0.404077       0.305011
9      low     good       stable       0.284178     0.403781




Sample size: 300
Training Data: (210, 2) (210,)
Validation Data: (45, 2) (45,)
Test Data: (45, 2) (45,)
Training Accuracy for 300 samples: 0.4333
Validation Accuracy for 300 samples: 0.4222
Test Accuracy for 300 samples: 0.3556
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step

Predicted Results and Probabilities for 300 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     decrease       0.389511     0.272506       0.337983
1   medium     good     decrease       0.389511     0.272506       0.337983
2   medium     poor     decrease       0.409001     0.262837       0.328163
3   medium  average     decrease       0.409001     0.262837       0.328163
4     high  average     decrease       0.409001     0.262837       0.328163
5   medium     poor     decrease       0.409001     0.262837       0.328163
6      low     good     decrease       0.395395     0.269578       0.335027
7     high     poor  



Training Accuracy for 350 samples: 0.3592
Validation Accuracy for 350 samples: 0.4038
Test Accuracy for 350 samples: 0.2830
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step

Predicted Results and Probabilities for 350 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     increase       0.305566     0.343324       0.351111
1   medium  average     decrease       0.340046     0.326709       0.333246
2      low     poor     increase       0.287313     0.352089       0.360598
3     high  average     decrease       0.402732     0.296342       0.300926
4      low     poor     increase       0.287313     0.352089       0.360598
5   medium     good     decrease       0.391936     0.301585       0.306479
6     high  average     decrease       0.402732     0.296342       0.300926
7      low     poor     increase       0.287313     0.352089       0.360598
8   medium     good     decrease       0.391936   



Training Accuracy for 400 samples: 0.4357
Validation Accuracy for 400 samples: 0.4833
Test Accuracy for 400 samples: 0.5167
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step

Predicted Results and Probabilities for 400 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor       stable       0.238639     0.433916       0.327445
1   medium  average       stable       0.278040     0.403899       0.318061
2     high     poor       stable       0.211945     0.455222       0.332833
3     high  average       stable       0.275880     0.405506       0.318614
4      low     poor       stable       0.267506     0.411776       0.320718
5   medium  average       stable       0.278040     0.403899       0.318061
6      low  average       stable       0.278040     0.403899       0.318061
7     high     poor       stable       0.211945     0.455222       0.332833
8   medium  average       stable       0.278040   



Training Accuracy for 450 samples: 0.4127
Validation Accuracy for 450 samples: 0.5373
Test Accuracy for 450 samples: 0.4853
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step

Predicted Results and Probabilities for 450 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor     increase       0.294125     0.282198       0.423676
1   medium  average     increase       0.317963     0.303498       0.378539
2     high     poor     increase       0.302810     0.297854       0.399336
3   medium     poor     increase       0.294125     0.282198       0.423676
4      low     poor     increase       0.294334     0.281900       0.423767
5   medium     poor     increase       0.294125     0.282198       0.423676
6   medium     poor     increase       0.294125     0.282198       0.423676
7      low     poor     increase       0.294334     0.281900       0.423767
8     high  average     decrease       0.349398   



Training Accuracy for 500 samples: 0.3714
Validation Accuracy for 500 samples: 0.3467
Test Accuracy for 500 samples: 0.3333
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step

Predicted Results and Probabilities for 500 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     increase       0.352314     0.284928       0.362758
1      low     poor     increase       0.347162     0.289396       0.363442
2   medium     good     increase       0.352314     0.284928       0.362758
3     high     good     increase       0.352314     0.284928       0.362758
4      low     poor     increase       0.347162     0.289396       0.363442
5   medium     good     increase       0.352314     0.284928       0.362758
6     high     good     increase       0.352314     0.284928       0.362758
7      low     poor     increase       0.347162     0.289396       0.363442
8   medium  average     increase       0.351814   



Training Accuracy for 550 samples: 0.3610
Validation Accuracy for 550 samples: 0.4634
Test Accuracy for 550 samples: 0.3012
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step

Predicted Results and Probabilities for 550 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     increase       0.338879     0.315484       0.345637
1   medium     good     increase       0.330399     0.317407       0.352194
2   medium     good     increase       0.330399     0.317407       0.352194
3   medium     poor       stable       0.183344     0.411407       0.405249
4      low  average     increase       0.338879     0.315484       0.345637
5      low     poor     increase       0.338632     0.315541       0.345827
6     high     good       stable       0.179914     0.415282       0.404804
7      low  average     increase       0.338879     0.315484       0.345637
8      low  average     increase       0.338879   



Training Accuracy for 600 samples: 0.3952
Validation Accuracy for 600 samples: 0.4000
Test Accuracy for 600 samples: 0.3111
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step

Predicted Results and Probabilities for 600 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good       stable        0.30854     0.386914       0.304546
1   medium     poor       stable        0.30854     0.386914       0.304546
2      low  average       stable        0.30854     0.386914       0.304546
3   medium     good       stable        0.30854     0.386914       0.304546
4     high     poor       stable        0.30854     0.386914       0.304546
5     high     good       stable        0.30854     0.386914       0.304546
6     high     good       stable        0.30854     0.386914       0.304546
7     high     good       stable        0.30854     0.386914       0.304546
8     high     poor       stable        0.30854   



Training Accuracy for 650 samples: 0.4044
Validation Accuracy for 650 samples: 0.4330
Test Accuracy for 650 samples: 0.3469
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step

Predicted Results and Probabilities for 650 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good       stable       0.226986     0.427781       0.345233
1   medium     poor       stable       0.308378     0.359697       0.331925
2     high     good       stable       0.196041     0.459443       0.344516
3     high  average       stable       0.237343     0.422646       0.340011
4      low     poor       stable       0.327222     0.344277       0.328501
5   medium  average       stable       0.259554     0.400144       0.340302
6     high  average       stable       0.237343     0.422646       0.340011
7      low     poor       stable       0.327222     0.344277       0.328501
8     high     good       stable       0.196041   



Training Accuracy for 700 samples: 0.4429
Validation Accuracy for 700 samples: 0.4381
Test Accuracy for 700 samples: 0.3619
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step

Predicted Results and Probabilities for 700 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     increase       0.310291     0.298017       0.391691
1     high     good     increase       0.310291     0.298017       0.391691
2      low  average     increase       0.309156     0.296841       0.394003
3     high     poor     increase       0.265572     0.241739       0.492689
4   medium     poor     increase       0.273081     0.251885       0.475034
5      low  average     increase       0.309156     0.296841       0.394003
6     high     poor     increase       0.265572     0.241739       0.492689
7     high     poor     increase       0.265572     0.241739       0.492689
8     high     good     increase       0.310291   



Training Accuracy for 750 samples: 0.4895
Validation Accuracy for 750 samples: 0.3929
Test Accuracy for 750 samples: 0.4425
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step

Predicted Results and Probabilities for 750 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average     increase       0.243425     0.222077       0.534499
1   medium     poor     increase       0.249877     0.286380       0.463742
2     high  average     increase       0.243425     0.222077       0.534499
3     high     poor     increase       0.237341     0.189519       0.573140
4     high  average     increase       0.243425     0.222077       0.534499
5   medium  average     increase       0.250194     0.327604       0.422201
6   medium     good     increase       0.248384     0.365200       0.386416
7     high  average     increase       0.243425     0.222077       0.534499
8     high  average     increase       0.243425   



Training Accuracy for 800 samples: 0.4518
Validation Accuracy for 800 samples: 0.4167
Test Accuracy for 800 samples: 0.4083
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step

Predicted Results and Probabilities for 800 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     increase       0.258506     0.305927       0.435567
1     high     poor     increase       0.241934     0.300305       0.457761
2   medium  average     increase       0.257650     0.305652       0.436698
3     high     poor     increase       0.241934     0.300305       0.457761
4     high  average     increase       0.256315     0.305222       0.438463
5     high     good     increase       0.255736     0.305031       0.439232
6   medium  average     increase       0.257650     0.305652       0.436698
7      low  average     increase       0.258506     0.305927       0.435567
8      low  average     increase       0.258506   



Training Accuracy for 850 samples: 0.3513
Validation Accuracy for 850 samples: 0.3150
Test Accuracy for 850 samples: 0.3203
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

Predicted Results and Probabilities for 850 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average     decrease       0.354886     0.325523       0.319590
1   medium     poor     decrease       0.358661     0.322994       0.318345
2      low     good     decrease       0.350693     0.329644       0.319663
3      low  average     decrease       0.355201     0.325977       0.318822
4   medium     good     decrease       0.350590     0.329404       0.320007
5     high  average     decrease       0.354886     0.325523       0.319590
6      low     good     decrease       0.350693     0.329644       0.319663
7   medium  average     decrease       0.355075     0.325743       0.319182
8     high     poor     decrease       0.357903   



Training Accuracy for 900 samples: 0.4286
Validation Accuracy for 900 samples: 0.4963
Test Accuracy for 900 samples: 0.4444
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step

Predicted Results and Probabilities for 900 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     increase       0.303081     0.287432       0.409487
1      low  average     increase       0.310932     0.258491       0.430578
2   medium     poor     increase       0.328487     0.260544       0.410969
3     high     poor     increase       0.303081     0.287432       0.409487
4   medium     poor     increase       0.328487     0.260544       0.410969
5   medium     good     increase       0.315271     0.256682       0.428046
6      low     poor     increase       0.325231     0.256663       0.418106
7      low     poor     increase       0.325231     0.256663       0.418106
8      low  average     increase       0.310932   



Training Accuracy for 950 samples: 0.5383
Validation Accuracy for 950 samples: 0.5211
Test Accuracy for 950 samples: 0.5524
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step

Predicted Results and Probabilities for 950 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor       stable       0.272093     0.522947       0.204960
1     high  average       stable       0.273528     0.531892       0.194580
2     high     good       stable       0.277672     0.531873       0.190455
3   medium     poor       stable       0.272093     0.522947       0.204960
4   medium     good       stable       0.279816     0.523653       0.196531
5     high  average       stable       0.273528     0.531892       0.194580
6     high  average       stable       0.273528     0.531892       0.194580
7     high     poor       stable       0.269405     0.531832       0.198763
8   medium  average       stable       0.276281   



Training Accuracy for 1000 samples: 0.4857
Validation Accuracy for 1000 samples: 0.5000
Test Accuracy for 1000 samples: 0.4867
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step

Predicted Results and Probabilities for 1000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor       stable       0.320119     0.377403       0.302478
1      low     poor       stable       0.320119     0.377403       0.302478
2   medium  average       stable       0.340166     0.373398       0.286436
3      low     good     decrease       0.418508     0.353678       0.227815
4      low     poor       stable       0.320119     0.377403       0.302478
5   medium  average       stable       0.340166     0.373398       0.286436
6   medium     poor       stable       0.317391     0.377797       0.304813
7      low     poor       stable       0.320119     0.377403       0.302478
8      low     poor       stable       0.32011



Training Accuracy for 2000 samples: 0.5071
Validation Accuracy for 2000 samples: 0.4967
Test Accuracy for 2000 samples: 0.4633
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step

Predicted Results and Probabilities for 2000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good       stable       0.298624     0.438340       0.263036
1   medium     good       stable       0.298624     0.438340       0.263036
2      low     poor     decrease       0.436193     0.294145       0.269661
3     high     poor     decrease       0.566077     0.183579       0.250343
4      low  average       stable       0.315517     0.419390       0.265093
5   medium  average     decrease       0.384058     0.345897       0.270045
6      low     good       stable       0.296733     0.440345       0.262922
7      low     good       stable       0.296733     0.440345       0.262922
8      low     good       stable       0.2967



Training Accuracy for 3000 samples: 0.4019
Validation Accuracy for 3000 samples: 0.4133
Test Accuracy for 3000 samples: 0.4067
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step

Predicted Results and Probabilities for 3000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average       stable       0.368184     0.378727       0.253089
1   medium     good     decrease       0.383850     0.331767       0.284383
2      low     good     decrease       0.395531     0.295105       0.309365
3      low     poor     decrease       0.391843     0.307236       0.300921
4   medium  average     decrease       0.381553     0.338459       0.279988
5   medium  average     decrease       0.381553     0.338459       0.279988
6   medium     good     decrease       0.383850     0.331767       0.284383
7     high     poor       stable       0.365667     0.385941       0.248392
8      low     poor     decrease       0.3918



Training Accuracy for 4000 samples: 0.3793
Validation Accuracy for 4000 samples: 0.3867
Test Accuracy for 4000 samples: 0.3733
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Predicted Results and Probabilities for 4000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good     decrease       0.378012     0.357732       0.264257
1      low     poor     decrease       0.377215     0.354676       0.268109
2   medium     poor     decrease       0.378035     0.357825       0.264140
3   medium     poor     decrease       0.378035     0.357825       0.264140
4   medium  average     decrease       0.378422     0.359351       0.262227
5   medium  average     decrease       0.378422     0.359351       0.262227
6     high     good     decrease       0.379559     0.364017       0.256423
7   medium  average     decrease       0.378422     0.359351       0.262227
8     high     good     decrease       0.3795



Training Accuracy for 5000 samples: 0.4823
Validation Accuracy for 5000 samples: 0.4827
Test Accuracy for 5000 samples: 0.4773
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 5000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     increase       0.240941     0.276258       0.482801
1     high     poor     increase       0.240941     0.276258       0.482801
2      low     poor     increase       0.240941     0.276258       0.482801
3      low     poor     increase       0.240941     0.276258       0.482801
4      low     good     increase       0.240941     0.276258       0.482801
5      low     poor     increase       0.240941     0.276258       0.482801
6      low     poor     increase       0.240941     0.276258       0.482801
7     high     poor     increase       0.240941     0.276258       0.482801
8     high  average     increase       0.2409



Training Accuracy for 6000 samples: 0.4836
Validation Accuracy for 6000 samples: 0.4733
Test Accuracy for 6000 samples: 0.4744
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 6000 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0    low  average       stable       0.313642     0.353722       0.332635
1   high  average     decrease       0.371982     0.267507       0.360511
2    low     poor       stable       0.321270     0.341704       0.337026
3    low  average       stable       0.313642     0.353722       0.332635
4    low  average       stable       0.313642     0.353722       0.332635
5    low  average       stable       0.313642     0.353722       0.332635
6   high     poor     decrease       0.421710     0.201330       0.376959
7    low  average       stable       0.313642     0.353722       0.332635
8    low     good     increase       0.305649     0.344935     



Training Accuracy for 7000 samples: 0.3939
Validation Accuracy for 7000 samples: 0.3933
Test Accuracy for 7000 samples: 0.3990
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Predicted Results and Probabilities for 7000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor       stable       0.254977     0.394351       0.350672
1      low  average       stable       0.254977     0.394351       0.350672
2      low     poor       stable       0.254977     0.394351       0.350672
3      low     good       stable       0.254977     0.394351       0.350672
4      low     good       stable       0.254977     0.394351       0.350672
5     high     good       stable       0.254977     0.394351       0.350672
6     high     poor       stable       0.254977     0.394351       0.350672
7     high     good       stable       0.254977     0.394351       0.350672
8     high     poor       stable       0.2549



Training Accuracy for 8000 samples: 0.5157
Validation Accuracy for 8000 samples: 0.5000
Test Accuracy for 8000 samples: 0.5125
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 8000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     decrease       0.428712     0.280578       0.290710
1     high     poor     decrease       0.739830     0.171511       0.088658
2   medium     poor     decrease       0.464850     0.272651       0.262498
3   medium     good     decrease       0.428712     0.280578       0.290710
4   medium     good     decrease       0.428712     0.280578       0.290710
5   medium  average     decrease       0.428712     0.280578       0.290710
6     high  average     decrease       0.587791     0.235978       0.176231
7   medium     good     decrease       0.428712     0.280578       0.290710
8      low     poor     decrease       0.4287



Training Accuracy for 9000 samples: 0.4557
Validation Accuracy for 9000 samples: 0.4452
Test Accuracy for 9000 samples: 0.4644
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 9000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     decrease       0.453375     0.240302       0.306323
1   medium  average     decrease       0.453375     0.240302       0.306323
2   medium     poor     decrease       0.453375     0.240302       0.306323
3   medium     good     decrease       0.453375     0.240302       0.306323
4     high     good     decrease       0.453375     0.240302       0.306323
5     high     good     decrease       0.453375     0.240302       0.306323
6      low  average     decrease       0.453375     0.240302       0.306323
7      low     poor     decrease       0.453375     0.240302       0.306323
8   medium     poor     decrease       0.4533



Training Accuracy for 10000 samples: 0.4187
Validation Accuracy for 10000 samples: 0.4033
Test Accuracy for 10000 samples: 0.4127
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 10000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     increase       0.250520     0.332526       0.416955
1     high     good     increase       0.250520     0.332526       0.416955
2      low     good     increase       0.250523     0.332527       0.416951
3   medium     good     increase       0.250521     0.332526       0.416953
4     high  average     increase       0.250519     0.332525       0.416955
5      low  average     increase       0.250522     0.332526       0.416951
6      low     poor     increase       0.250522     0.332526       0.416952
7     high  average     increase       0.250519     0.332525       0.416955
8     high  average     increase       0.



Training Accuracy for 11000 samples: 0.4836
Validation Accuracy for 11000 samples: 0.4885
Test Accuracy for 11000 samples: 0.4782
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 11000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     decrease       0.361106     0.313586       0.325307
1   medium  average     decrease       0.361106     0.313586       0.325307
2      low     poor       stable       0.332839     0.350080       0.317081
3   medium  average     decrease       0.361106     0.313586       0.325307
4      low     poor       stable       0.332839     0.350080       0.317081
5     high  average     decrease       0.441660     0.220016       0.338325
6   medium     good       stable       0.307530     0.384375       0.308094
7   medium     good       stable       0.307530     0.384375       0.308094
8     high     poor     decrease       0.



Training Accuracy for 12000 samples: 0.5454
Validation Accuracy for 12000 samples: 0.5411
Test Accuracy for 12000 samples: 0.5383
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 12000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good     decrease       0.527116     0.203654       0.269231
1     high  average     decrease       0.379236     0.342189       0.278575
2     high  average     decrease       0.379236     0.342189       0.278575
3      low     poor     decrease       0.499269     0.289074       0.211658
4     high  average     decrease       0.379236     0.342189       0.278575
5      low     good     decrease       0.527116     0.203654       0.269231
6      low     good     decrease       0.527116     0.203654       0.269231
7     high     good     decrease       0.529236     0.193487       0.277277
8   medium     good     decrease       0.



Training Accuracy for 13000 samples: 0.5051
Validation Accuracy for 13000 samples: 0.5021
Test Accuracy for 13000 samples: 0.5164
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step

Predicted Results and Probabilities for 13000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor       stable       0.156366     0.498695       0.344940
1      low     poor       stable       0.156366     0.498695       0.344940
2   medium     poor       stable       0.156366     0.498695       0.344940
3      low     good       stable       0.219764     0.515376       0.264860
4      low  average       stable       0.156725     0.498867       0.344408
5      low  average       stable       0.156725     0.498867       0.344408
6      low  average       stable       0.156725     0.498867       0.344408
7   medium  average       stable       0.194499     0.511669       0.293832
8      low  average       stable       0.



Training Accuracy for 14000 samples: 0.4611
Validation Accuracy for 14000 samples: 0.4352
Test Accuracy for 14000 samples: 0.4476
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 14000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average       stable       0.266085     0.394979       0.338936
1     high     poor     increase       0.281128     0.356785       0.362087
2      low  average       stable       0.241845     0.455225       0.302930
3     high  average     increase       0.281128     0.356785       0.362087
4     high  average     increase       0.281128     0.356785       0.362087
5      low     good       stable       0.230603     0.482709       0.286688
6      low  average       stable       0.241845     0.455225       0.302930
7     high     poor     increase       0.281128     0.356785       0.362087
8   medium     good       stable       0.



Training Accuracy for 15000 samples: 0.4244
Validation Accuracy for 15000 samples: 0.4173
Test Accuracy for 15000 samples: 0.4173
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 15000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     increase       0.352534     0.278173       0.369293
1      low  average     increase       0.352534     0.278173       0.369293
2   medium     poor     increase       0.352534     0.278173       0.369293
3      low     poor     increase       0.352534     0.278173       0.369293
4      low     good     decrease       0.400661     0.355352       0.243987
5      low     good     decrease       0.400661     0.355352       0.243987
6   medium     good     decrease       0.410275     0.376507       0.213217
7   medium     poor     increase       0.352534     0.278173       0.369293
8   medium  average     decrease       0.



Training Accuracy for 16000 samples: 0.4830
Validation Accuracy for 16000 samples: 0.4725
Test Accuracy for 16000 samples: 0.4558
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 16000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     decrease       0.354952     0.299363       0.345684
1      low     poor       stable       0.277969     0.428510       0.293521
2   medium  average       stable       0.317025     0.361484       0.321491
3     high  average       stable       0.271632     0.439658       0.288711
4      low  average     decrease       0.360906     0.289903       0.349191
5   medium     poor       stable       0.272202     0.438652       0.289146
6      low     good     decrease       0.433379     0.182189       0.384432
7     high     good     decrease       0.354952     0.299363       0.345684
8   medium     poor       stable       0.



Training Accuracy for 17000 samples: 0.5690
Validation Accuracy for 17000 samples: 0.5475
Test Accuracy for 17000 samples: 0.5651
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 17000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average       stable       0.355731     0.399703       0.244565
1     high  average       stable       0.295497     0.452473       0.252030
2   medium     good     decrease       0.422699     0.344796       0.232505
3      low     good     decrease       0.422699     0.344796       0.232505
4      low  average       stable       0.355731     0.399703       0.244565
5     high     poor       stable       0.191654     0.553915       0.254432
6      low  average       stable       0.355731     0.399703       0.244565
7      low  average       stable       0.355731     0.399703       0.244565
8     high  average       stable       0.



Training Accuracy for 18000 samples: 0.5870
Validation Accuracy for 18000 samples: 0.5778
Test Accuracy for 18000 samples: 0.5800
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 18000 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0    low     good     decrease       0.565957     0.257569       0.176474
1   high     good     decrease       0.504796     0.329801       0.165403
2    low     good     decrease       0.565957     0.257569       0.176474
3   high     good     decrease       0.504796     0.329801       0.165403
4   high     poor       stable       0.271142     0.605077       0.123781
5   high  average       stable       0.383320     0.468094       0.148586
6    low  average     decrease       0.552403     0.277339       0.170258
7    low  average     decrease       0.552403     0.277339       0.170258
8   high     good     decrease       0.504796     0.329801 



Training Accuracy for 19000 samples: 0.5205
Validation Accuracy for 19000 samples: 0.5249
Test Accuracy for 19000 samples: 0.5011
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 19000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     increase       0.363641     0.265046       0.371313
1      low     good     decrease       0.550084     0.304350       0.145566
2      low     poor     increase       0.363641     0.265046       0.371313
3      low     poor     increase       0.363641     0.265046       0.371313
4      low     good     decrease       0.550084     0.304350       0.145566
5   medium  average     increase       0.363641     0.265046       0.371313
6      low     poor     increase       0.363641     0.265046       0.371313
7      low     poor     increase       0.363641     0.265046       0.371313
8      low     poor     increase       0.



Training Accuracy for 20000 samples: 0.4023
Validation Accuracy for 20000 samples: 0.4140
Test Accuracy for 20000 samples: 0.4190
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 20000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     increase       0.367671     0.225242       0.407087
1   medium  average     increase       0.367671     0.225242       0.407087
2   medium  average     increase       0.367671     0.225242       0.407087
3     high     good     increase       0.367671     0.225242       0.407087
4   medium  average     increase       0.367671     0.225242       0.407087
5   medium     poor     increase       0.367671     0.225242       0.407087
6      low     good     increase       0.367671     0.225242       0.407087
7   medium     poor     increase       0.367671     0.225242       0.407087
8   medium     good     increase       0.

# K-L Divergence NN Dense Data

In [5]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Prepare a list to store K-L divergence results
kl_divergence_results = []

# Loop through each sample size
for size in sample_sizes:
    print(f"\nProcessing sample size: {size}")

    # Load the combined BN data for the current sample size
    combined_data_bn = pd.read_csv(f'combined_probabilities_{size}.csv')

    # Split the data into train, validation, and test sets
    X = combined_data_bn[['IR_State', 'EI_State']]
    y = combined_data_bn[['Chosen_SP_State', 'SP_Probabilities (decrease, stable, increase)']]

    # Refresh the data split for each iteration
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=False, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)

    # Get the test indices
    test_indices = X_test.index

    # Get the corresponding rows from the combined BN data using the test indices
    bn_test_data = combined_data_bn.loc[test_indices]

    # Load the corresponding NN test data for the current sample size
    nn_test_data = pd.read_csv(f'test_data_nn_{size}.csv')

    # Extract NN predicted probabilities and BN ground truth probabilities
    nn_probs = nn_test_data[['Prob_decrease', 'Prob_stable', 'Prob_increase']].values
    bn_probs = bn_test_data['SP_Probabilities (decrease, stable, increase)'].apply(
        lambda x: np.array(list(map(float, x.strip('[]').split(','))))
    ).values

    # Calculate K-L divergence between NN predicted probabilities and BN ground truth probabilities
    kl_divergences = []
    output_data = []  # For tabulating output

    for i in range(len(nn_probs)):
        nn_prob = nn_probs[i]
        bn_prob = bn_probs[i]

        # Ensure both are valid probability distributions
        epsilon = 1e-10
        nn_prob = np.clip(nn_prob, epsilon, 1)
        bn_prob = np.clip(bn_prob, epsilon, 1)

        # Normalize to ensure they sum to 1
        nn_prob /= nn_prob.sum()
        bn_prob /= bn_prob.sum()

        # Compute K-L divergence
        kl_div = entropy(bn_prob, nn_prob)
        kl_divergences.append(kl_div)

        # Add data to output for tabulation
        output_data.append({
            'Sample_Index': i,
            'IR': bn_test_data.iloc[i]['IR_State'],
            'EI': bn_test_data.iloc[i]['EI_State'],
            'Ground_Truth_Probs': ', '.join([f'{prob:.4f}' for prob in bn_prob]),
            'NN_Probs': ', '.join([f'{prob:.4f}' for prob in nn_prob]),
            'KL_Divergence': f'{kl_div:.4f}'
        })

    # Create a DataFrame for the output data and tabulate the first few rows
    output_df = pd.DataFrame(output_data)
    print(f"\nK-L Divergence Results for {size} samples (First 5 rows):\n")
    print(tabulate(output_df.head(5), headers='keys', tablefmt='grid'))

    # Calculate and display the average K-L divergence for this sample size
    average_kl_divergence = np.mean(kl_divergences)
    std_kl_divergence = np.std(kl_divergences)
    print(f"\nAverage K-L Divergence for {size} samples: {average_kl_divergence:.4f}, Std Dev: {std_kl_divergence:.4f}")

    # Append the results to the list
    kl_divergence_results.append({
        'Sample_Size': size,
        'Average_KL_Divergence': average_kl_divergence,
        'Std_Dev': std_kl_divergence
    })

# Save the K-L divergence results to a CSV file
kl_divergence_df = pd.DataFrame(kl_divergence_results)
kl_divergence_df.to_csv('kl_div_NN_2_3_dense.csv', index=False)

print("\nAll sample sizes have been processed and K-L divergences calculated. Results saved to 'kl_div_NN_2_3_dense.csv'.")


Processing sample size: 50

K-L Divergence Results for 50 samples (First 5 rows):

+----+----------------+--------+---------+------------------------+------------------------+-----------------+
|    |   Sample_Index | IR     | EI      | Ground_Truth_Probs     | NN_Probs               |   KL_Divergence |
|  0 |              0 | high   | average | 0.1519, 0.4715, 0.3767 | 0.3241, 0.3393, 0.3366 |          0.0823 |
+----+----------------+--------+---------+------------------------+------------------------+-----------------+
|  1 |              1 | high   | average | 0.1519, 0.4715, 0.3767 | 0.3241, 0.3393, 0.3366 |          0.0823 |
+----+----------------+--------+---------+------------------------+------------------------+-----------------+
|  2 |              2 | low    | poor    | 0.3985, 0.2272, 0.3743 | 0.3232, 0.3366, 0.3402 |          0.0299 |
+----+----------------+--------+---------+------------------------+------------------------+-----------------+
|  3 |              3 | medi