<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/NN_Dense_4_10_Relu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [3]:
pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.26-py3-none-any.whl.metadata (9.1 kB)
Downloading pgmpy-0.1.26-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pgmpy
Successfully installed pgmpy-0.1.26


In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.sampling import BayesianModelSampling
from tabulate import tabulate

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers, callbacks, regularizers

from scipy.stats import entropy

# Bayesian Network Data Generation 500, ..., 20000 Samples (dense)

In [20]:
# Function to generate CPDs
def generate_cpds():
    # Generate random probabilities for IR
    ir_probs = np.random.rand(3)
    ir_probs /= ir_probs.sum()  # Normalize to make it a valid probability distribution

    # Generate random probabilities for EI given IR
    ei_given_ir_probs = np.random.rand(3, 3)
    ei_given_ir_probs /= ei_given_ir_probs.sum(axis=0, keepdims=True)

    # Generate random probabilities for SP given IR and EI
    sp_probs = np.random.rand(3, 3, 3)
    sp_probs /= sp_probs.sum(axis=0, keepdims=True)

    return ir_probs, ei_given_ir_probs, sp_probs

# Function to generate and save samples
def generate_and_save_samples(ir_probs, ei_probs, sp_probs, sample_size, filename):
    output_data = []

    # Generate `sample_size` random samples
    for _ in range(sample_size):
        # Sample `IR` state based on `IR` probabilities
        ir_state_idx = np.random.choice(3, p=ir_probs)
        ir_state = ['low', 'medium', 'high'][ir_state_idx]
        ir_prob = ir_probs[ir_state_idx]

        # Sample `EI` state based on `EI` probabilities given `IR`
        ei_probs_given_ir = ei_probs[:, ir_state_idx]
        ei_state_idx = np.random.choice(3, p=ei_probs_given_ir)
        ei_state = ['poor', 'average', 'good'][ei_state_idx]
        ei_prob = ei_probs_given_ir[ei_state_idx]

        # Sample `SP` state based on `SP` probabilities given `IR` and `EI`
        sp_probs_given_ir_ei = sp_probs[:, ir_state_idx, ei_state_idx]
        sp_state_idx = np.random.choice(3, p=sp_probs_given_ir_ei)
        sp_state = ['decrease', 'stable', 'increase'][sp_state_idx]
        sp_prob = sp_probs_given_ir_ei[sp_state_idx]

        # Append sample data to output list
        output_data.append({
            'IR_State': ir_state,
            'IR_Prob': f'{ir_prob:.4f}',
            'EI_State': ei_state,
            'EI_Prob': f'{ei_prob:.4f}',
            'SP_Probabilities (decrease, stable, increase)': ', '.join([f'{prob:.4f}' for prob in sp_probs_given_ir_ei]),
            'Chosen_SP_State': sp_state,
            'Chosen_SP_Probability': f'{sp_prob:.4f}'
        })

    # Create a DataFrame from the output data
    output_df = pd.DataFrame(output_data)

    # Save the output DataFrame to a CSV file
    output_df.to_csv(filename, index=False)

    # Print the first few rows for visual confirmation
    print(f"\nSample size: {sample_size} - First few rows of generated samples:\n")
    print(tabulate(output_df.head(), headers='keys', tablefmt='grid'))

# Generate and save samples for sample sizes from 1000 to 10000 every 1000
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

for size in sample_sizes:
    # Generate the CPDs
    ir_probs, ei_given_ir_probs, sp_probs = generate_cpds()

    # Generate and save individual samples for the given sample size
    generate_and_save_samples(ir_probs, ei_given_ir_probs, sp_probs, size, f'combined_probabilities_{size}.csv')

# Notify the user that the process is done
print("\nGeneration and saving of individual samples complete for all sample sizes!")


Sample size: 50 - First few rows of generated samples:

+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|    | IR_State   |   IR_Prob | EI_State   |   EI_Prob | SP_Probabilities (decrease, stable, increase)   | Chosen_SP_State   |   Chosen_SP_Probability |
|  0 | high       |    0.3408 | poor       |    0.2437 | 0.1806, 0.6933, 0.1261                          | stable            |                  0.6933 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  1 | low        |    0.546  | good       |    0.5268 | 0.1634, 0.5172, 0.3194                          | increase          |                  0.3194 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  2 | low        |    0.

# Hypothesis Model: 500,...,20000 Samples (dense) 4 hidden Layers, 10 Neurons Relu

In [21]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Define the Neural Network architecture with L2 regularization
def create_nn_model(hidden_layers=4, nodes_per_layer=10, l2_lambda=0.01):
    model = models.Sequential()

    # Input layer (2 input features: IR_encoded and EI_encoded)
    model.add(layers.InputLayer(input_shape=(2,)))

    # Hidden layers with L2 regularization and Dropout
    for layer_num in range(hidden_layers):
        model.add(layers.Dense(
            nodes_per_layer,
            activation='relu',
            kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
            name=f"hidden_layer_{layer_num + 1}"
        ))
        model.add(layers.Dropout(0.2))  # Dropout layer to reduce overfitting

    # Output layer (3 classes: decrease, stable, increase) with L2 regularization
    model.add(layers.Dense(
        3,
        activation='softmax',
        kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
        name="output_layer"
    ))

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Prepare a dictionary to store the extracted data for each sample size
extracted_data = {}

# Extract the required columns from all sample sizes first
for size in sample_sizes:
    # Load data for the current sample size (adjust the file paths if necessary)
    outcomes_file = f'combined_probabilities_{size}.csv'
    df = pd.read_csv(outcomes_file)

    # Extract only the required columns
    required_columns = ['IR_State', 'EI_State', 'Chosen_SP_State']
    df_extracted = df[required_columns]

    # Manually encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
    df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
    df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)

    # Store the extracted and encoded data for later use
    extracted_data[size] = df_extracted

# Loop through each sample size for NN training, validation, and testing
for size in sample_sizes:
    # Retrieve the extracted data for the current sample size
    df = extracted_data[size]

    # Features (IR and EI) and labels (SP)
    X = df[['IR_encoded', 'EI_encoded']]
    y = df['SP_encoded']

    # Refresh the data split for each iteration
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=False, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)

    # Show split confirmation
    print(f"\nSample size: {size}")
    print("Training Data:", X_train.shape, y_train.shape)
    print("Validation Data:", X_val.shape, y_val.shape)
    print("Test Data:", X_test.shape, y_test.shape)

    # Create the Neural Network model with L2 regularization
    nn_model = create_nn_model(hidden_layers=4, nodes_per_layer=10, l2_lambda=0.01)

    # Early stopping callback to prevent overfitting
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    history = nn_model.fit(X_train, y_train,
                           epochs=50,
                           batch_size=32,
                           validation_data=(X_val, y_val),
                           callbacks=[early_stopping],
                           verbose=0)  # Set verbose=0 to avoid too much output

    # Print training, validation, and test accuracy
    train_loss, train_accuracy = nn_model.evaluate(X_train, y_train, verbose=0)
    val_loss, val_accuracy = nn_model.evaluate(X_val, y_val, verbose=0)
    test_loss, test_accuracy = nn_model.evaluate(X_test, y_test, verbose=0)
    print(f"Training Accuracy for {size} samples: {train_accuracy:.4f}")
    print(f"Validation Accuracy for {size} samples: {val_accuracy:.4f}")
    print(f"Test Accuracy for {size} samples: {test_accuracy:.4f}")

    # Make predictions on the test set
    predictions = nn_model.predict(X_test)

    # Convert the predicted probabilities to class labels
    predicted_classes = predictions.argmax(axis=1)

    # Create a list to map integers back to the original SP labels
    sp_reverse_map = ['decrease', 'stable', 'increase']

    # Convert the predicted classes to the original labels
    predicted_labels = [sp_reverse_map[label] for label in predicted_classes]

    # Create a DataFrame for the predicted probabilities
    probs_df = pd.DataFrame(predictions, columns=['Prob_decrease', 'Prob_stable', 'Prob_increase'])

    # Output the IR, EI, predicted SP, and the NN probabilities
    result_df = pd.DataFrame({
        'IR': df['IR_State'].iloc[X_test.index],  # IR column from the original dataframe for the test set
        'EI': df['EI_State'].iloc[X_test.index],  # EI column from the original dataframe for the test set
        'Predicted_SP': predicted_labels           # Predicted SP labels
    })

    # Combine the result with the predicted probabilities
    combined_df = pd.concat([result_df.reset_index(drop=True), probs_df.reset_index(drop=True)], axis=1)

    # Save the test data with predictions to a CSV file
    combined_df.to_csv(f'test_data_nn_{size}.csv', index=False)

    # Show the first few rows of the results for this sample size
    print(f"\nPredicted Results and Probabilities for {size} samples (First 15 rows):")
    print(combined_df.head(15))

# After the loop is done, print this message
print("\nLooping through all sample sizes complete!")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)



Sample size: 50
Training Data: (35, 2) (35,)
Validation Data: (7, 2) (7,)
Test Data: (8, 2) (8,)




Training Accuracy for 50 samples: 0.4571
Validation Accuracy for 50 samples: 0.4286
Test Accuracy for 50 samples: 0.3750
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step

Predicted Results and Probabilities for 50 samples (First 15 rows):
     IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   low  average       stable       0.302429     0.386032       0.311540
1   low     good       stable       0.269074     0.419443       0.311483
2  high     good       stable       0.237137     0.478903       0.283960
3   low     poor       stable       0.331220     0.345743       0.323037
4  high     good       stable       0.237137     0.478903       0.283960
5   low  average       stable       0.302429     0.386032       0.311540
6  high     poor       stable       0.298564     0.399516       0.301920
7   low     good       stable       0.269074     0.419443       0.311483

Sample size: 100
Training Data: (70, 2) (70,)
Validation Data: (15, 2) (15,)
Tes



Training Accuracy for 100 samples: 0.4286
Validation Accuracy for 100 samples: 0.4000
Test Accuracy for 100 samples: 0.3333
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step

Predicted Results and Probabilities for 100 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     increase       0.285742     0.309479       0.404780
1   medium     good     increase       0.285742     0.309479       0.404780
2     high     poor     increase       0.285612     0.315082       0.399306
3   medium     poor     increase       0.303461     0.311069       0.385470
4     high     good     increase       0.275373     0.311891       0.412736
5   medium  average     increase       0.299013     0.309710       0.391276
6   medium     good     increase       0.285742     0.309479       0.404780
7      low     poor     increase       0.311026     0.323293       0.365681
8   medium  average     increase       0.299013  



Training Accuracy for 150 samples: 0.6000
Validation Accuracy for 150 samples: 0.4545
Test Accuracy for 150 samples: 0.5652
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step

Predicted Results and Probabilities for 150 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     increase       0.262580     0.301339       0.436081
1     high  average     increase       0.228839     0.285171       0.485990
2      low  average     increase       0.241426     0.307865       0.450709
3     high     poor     increase       0.238367     0.291563       0.470070
4      low  average     increase       0.241426     0.307865       0.450709
5      low  average     increase       0.241426     0.307865       0.450709
6     high  average     increase       0.228839     0.285171       0.485990
7      low     poor     increase       0.262580     0.301339       0.436081
8     high     good     increase       0.218158   



Training Accuracy for 200 samples: 0.4214
Validation Accuracy for 200 samples: 0.3667
Test Accuracy for 200 samples: 0.3333
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step

Predicted Results and Probabilities for 200 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     increase       0.314358     0.339841       0.345802
1      low     good       stable       0.306218     0.348958       0.344824
2     high     poor       stable       0.306398     0.348414       0.345188
3   medium     good       stable       0.306791     0.348759       0.344450
4   medium     poor     increase       0.311082     0.343899       0.345019
5   medium     good       stable       0.306791     0.348759       0.344450
6   medium     poor     increase       0.311082     0.343899       0.345019
7      low     poor     increase       0.314358     0.339841       0.345802
8      low     good       stable       0.306218  



Training Accuracy for 250 samples: 0.4686
Validation Accuracy for 250 samples: 0.4324
Test Accuracy for 250 samples: 0.2895
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step

Predicted Results and Probabilities for 250 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     increase       0.395770     0.203429       0.400801
1     high  average     increase       0.362387     0.199240       0.438373
2   medium     good     increase       0.385015     0.208391       0.406595
3      low     poor     increase       0.326476     0.212355       0.461169
4     high     poor     increase       0.330247     0.187186       0.482567
5      low     good     increase       0.374992     0.209332       0.415675
6   medium     poor     increase       0.322097     0.196697       0.481205
7      low     good     increase       0.374992     0.209332       0.415675
8   medium     poor     increase       0.322097   



Training Accuracy for 300 samples: 0.3952
Validation Accuracy for 300 samples: 0.3111
Test Accuracy for 300 samples: 0.3333
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step

Predicted Results and Probabilities for 300 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     increase       0.305967     0.322940       0.371093
1      low     poor     increase       0.309103     0.323628       0.367268
2   medium  average     increase       0.305967     0.322940       0.371093
3     high     good     increase       0.293531     0.300858       0.405611
4     high     poor     increase       0.301609     0.320121       0.378270
5      low     good     increase       0.291299     0.221069       0.487632
6   medium     poor     increase       0.308568     0.328239       0.363193
7     high  average     increase       0.296969     0.317346       0.385685
8     high     good     increase       0.293531   



Training Accuracy for 350 samples: 0.4245
Validation Accuracy for 350 samples: 0.3077
Test Accuracy for 350 samples: 0.3585
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step

Predicted Results and Probabilities for 350 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     decrease       0.409752     0.365476       0.224772
1      low  average     decrease       0.410027     0.371434       0.218540
2   medium     good     decrease       0.424137     0.393645       0.182218
3      low     good     decrease       0.414594     0.384601       0.200805
4   medium     good     decrease       0.424137     0.393645       0.182218
5      low  average     decrease       0.410027     0.371434       0.218540
6     high  average     decrease       0.428058     0.397966       0.173976
7     high     good     decrease       0.431681     0.407821       0.160499
8     high     good     decrease       0.431681   



Training Accuracy for 400 samples: 0.4786
Validation Accuracy for 400 samples: 0.3833
Test Accuracy for 400 samples: 0.3500
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step

Predicted Results and Probabilities for 400 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good     increase       0.373857     0.206385       0.419758
1     high     poor     increase       0.362679     0.144126       0.493195
2      low  average     increase       0.368505     0.177520       0.453975
3   medium     poor     increase       0.361969     0.142869       0.495162
4     high  average     increase       0.369982     0.174600       0.455418
5     high     poor     increase       0.362679     0.144126       0.493195
6      low     good     increase       0.373857     0.206385       0.419758
7   medium     poor     increase       0.361969     0.142869       0.495162
8      low  average     increase       0.368505   



Training Accuracy for 450 samples: 0.4159
Validation Accuracy for 450 samples: 0.3134
Test Accuracy for 450 samples: 0.3971
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step

Predicted Results and Probabilities for 450 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     decrease       0.399561     0.315132       0.285307
1   medium     poor     decrease       0.405300     0.312671       0.282029
2   medium     poor     decrease       0.405300     0.312671       0.282029
3   medium     poor     decrease       0.405300     0.312671       0.282029
4     high     good     decrease       0.401907     0.314045       0.284048
5   medium  average     decrease       0.400611     0.314664       0.284726
6   medium  average     decrease       0.400611     0.314664       0.284726
7   medium  average     decrease       0.400611     0.314664       0.284726
8   medium     good     decrease       0.399623   



Training Accuracy for 500 samples: 0.4057
Validation Accuracy for 500 samples: 0.3200
Test Accuracy for 500 samples: 0.3600
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step

Predicted Results and Probabilities for 500 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     increase       0.238082     0.357859       0.404059
1     high     poor     increase       0.238082     0.357859       0.404059
2     high     good     increase       0.247805     0.349130       0.403064
3      low     poor     increase       0.232249     0.362419       0.405332
4     high     good     increase       0.247805     0.349130       0.403064
5     high     good     increase       0.247805     0.349130       0.403064
6   medium     good     increase       0.264797     0.336955       0.398248
7      low     poor     increase       0.232249     0.362419       0.405332
8      low     good     increase       0.281815   



Training Accuracy for 550 samples: 0.3922
Validation Accuracy for 550 samples: 0.3780
Test Accuracy for 550 samples: 0.3494
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step

Predicted Results and Probabilities for 550 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     increase       0.284792     0.340099       0.375109
1   medium     poor     increase       0.286282     0.338782       0.374936
2     high  average     increase       0.267531     0.348282       0.384187
3   medium     poor     increase       0.286282     0.338782       0.374936
4     high  average     increase       0.267531     0.348282       0.384187
5     high     good     increase       0.246433     0.357727       0.395839
6     high  average     increase       0.267531     0.348282       0.384187
7   medium     poor     increase       0.286282     0.338782       0.374936
8      low     poor     increase       0.284681   



Training Accuracy for 600 samples: 0.4905
Validation Accuracy for 600 samples: 0.4333
Test Accuracy for 600 samples: 0.5333
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step

Predicted Results and Probabilities for 600 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average       stable       0.282846     0.370212       0.346942
1      low     good     increase       0.260556     0.352908       0.386536
2   medium     good     increase       0.226440     0.322409       0.451150
3   medium     good     increase       0.226440     0.322409       0.451150
4   medium     good     increase       0.226440     0.322409       0.451150
5     high     good     increase       0.189124     0.285487       0.525390
6      low     poor       stable       0.287478     0.371824       0.340698
7      low     good     increase       0.260556     0.352908       0.386536
8     high     good     increase       0.189124   



Training Accuracy for 650 samples: 0.5275
Validation Accuracy for 650 samples: 0.5155
Test Accuracy for 650 samples: 0.4898
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step

Predicted Results and Probabilities for 650 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   high  average     increase       0.276846     0.241999       0.481155
1    low     poor     decrease       0.388819     0.349103       0.262078
2    low     poor     decrease       0.388819     0.349103       0.262078
3    low     poor     decrease       0.388819     0.349103       0.262078
4   high  average     increase       0.276846     0.241999       0.481155
5   high  average     increase       0.276846     0.241999       0.481155
6    low  average     increase       0.336230     0.300876       0.362894
7    low     poor     decrease       0.388819     0.349103       0.262078
8    low     poor     decrease       0.388819     0.349103       0.2



Training Accuracy for 700 samples: 0.4714
Validation Accuracy for 700 samples: 0.3714
Test Accuracy for 700 samples: 0.5238
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step

Predicted Results and Probabilities for 700 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor       stable       0.369507     0.388192       0.242301
1      low     good       stable       0.362886     0.389649       0.247465
2      low     poor       stable       0.369507     0.388192       0.242301
3     high     poor     decrease       0.423614     0.374240       0.202147
4     high     poor     decrease       0.423614     0.374240       0.202147
5   medium     good       stable       0.364811     0.389350       0.245839
6     high  average     decrease       0.408211     0.378708       0.213081
7     high     good     decrease       0.392875     0.382793       0.224332
8     high     poor     decrease       0.423614   



Training Accuracy for 750 samples: 0.4571
Validation Accuracy for 750 samples: 0.4821
Test Accuracy for 750 samples: 0.3186
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step

Predicted Results and Probabilities for 750 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good     increase       0.258472     0.295310       0.446218
1      low     good     increase       0.258472     0.295310       0.446218
2     high     good     increase       0.262603     0.289811       0.447586
3   medium  average     increase       0.260350     0.292634       0.447016
4   medium  average     increase       0.260350     0.292634       0.447016
5   medium  average     increase       0.260350     0.292634       0.447016
6   medium     poor     increase       0.378478     0.168831       0.452691
7   medium     poor     increase       0.378478     0.168831       0.452691
8   medium     poor     increase       0.378478   



Training Accuracy for 800 samples: 0.3607
Validation Accuracy for 800 samples: 0.3667
Test Accuracy for 800 samples: 0.3833
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step

Predicted Results and Probabilities for 800 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   high     good     decrease       0.358244     0.329440       0.312315
1    low  average     decrease       0.358243     0.329440       0.312317
2   high     poor     decrease       0.358209     0.329463       0.312328
3    low     poor     decrease       0.358227     0.329450       0.312323
4    low     good     decrease       0.358261     0.329428       0.312311
5    low     poor     decrease       0.358227     0.329450       0.312323
6    low  average     decrease       0.358243     0.329440       0.312317
7    low  average     decrease       0.358243     0.329440       0.312317
8   high     good     decrease       0.358244     0.329440       0.3



Training Accuracy for 850 samples: 0.3832
Validation Accuracy for 850 samples: 0.4252
Test Accuracy for 850 samples: 0.3906
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

Predicted Results and Probabilities for 850 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor       stable       0.298342     0.381148       0.320510
1   medium  average       stable       0.298339     0.381158       0.320503
2     high     good       stable       0.298336     0.381168       0.320496
3   medium     poor       stable       0.298341     0.381152       0.320507
4     high     good       stable       0.298336     0.381168       0.320496
5     high     good       stable       0.298336     0.381168       0.320496
6   medium     poor       stable       0.298341     0.381152       0.320507
7     high     poor       stable       0.298339     0.381157       0.320503
8     high     good       stable       0.298336   



Training Accuracy for 900 samples: 0.4651
Validation Accuracy for 900 samples: 0.4741
Test Accuracy for 900 samples: 0.4963
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step

Predicted Results and Probabilities for 900 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average       stable       0.282264     0.454991       0.262745
1     high     good       stable       0.263357     0.436756       0.299887
2      low     poor       stable       0.289232     0.461225       0.249542
3     high  average       stable       0.364731     0.502812       0.132457
4     high     good       stable       0.263357     0.436756       0.299887
5      low  average       stable       0.296874     0.467349       0.235777
6     high  average       stable       0.364731     0.502812       0.132457
7     high     poor       stable       0.433460     0.507621       0.058919
8     high     poor       stable       0.433460   



Training Accuracy for 950 samples: 0.5038
Validation Accuracy for 950 samples: 0.4789
Test Accuracy for 950 samples: 0.5105
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step

Predicted Results and Probabilities for 950 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     decrease       0.506346     0.287514       0.206141
1      low     good     decrease       0.471131     0.308868       0.220002
2     high     poor     decrease       0.544332     0.264629       0.191040
3     high  average     decrease       0.542193     0.265977       0.191830
4     high     good     decrease       0.539832     0.267462       0.192706
5     high     poor     decrease       0.544332     0.264629       0.191040
6      low     good     decrease       0.471131     0.308868       0.220002
7     high     good     decrease       0.539832     0.267462       0.192706
8      low     poor     decrease       0.472198   



Training Accuracy for 1000 samples: 0.4200
Validation Accuracy for 1000 samples: 0.4333
Test Accuracy for 1000 samples: 0.4667
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step

Predicted Results and Probabilities for 1000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor       stable       0.246888     0.414717       0.338394
1   medium  average       stable       0.246660     0.414988       0.338352
2   medium     good       stable       0.246869     0.414743       0.338389
3   medium     good       stable       0.246869     0.414743       0.338389
4      low     poor       stable       0.246060     0.415731       0.338208
5      low  average       stable       0.246244     0.415500       0.338256
6      low  average       stable       0.246244     0.415500       0.338256
7      low     good       stable       0.246437     0.415258       0.338305
8      low     poor       stable       0.24606



Training Accuracy for 2000 samples: 0.3479
Validation Accuracy for 2000 samples: 0.3833
Test Accuracy for 2000 samples: 0.2900
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step

Predicted Results and Probabilities for 2000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     decrease        0.34797     0.314347       0.337683
1   medium  average     decrease        0.34797     0.314347       0.337683
2      low  average     decrease        0.34797     0.314347       0.337683
3     high  average     decrease        0.34797     0.314347       0.337683
4   medium  average     decrease        0.34797     0.314347       0.337683
5      low  average     decrease        0.34797     0.314347       0.337683
6      low     poor     decrease        0.34797     0.314347       0.337683
7     high  average     decrease        0.34797     0.314347       0.337683
8      low  average     decrease        0.34



Training Accuracy for 3000 samples: 0.3933
Validation Accuracy for 3000 samples: 0.4178
Test Accuracy for 3000 samples: 0.3956
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step

Predicted Results and Probabilities for 3000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     increase       0.244035     0.361395       0.394569
1      low     good     increase       0.244035     0.361395       0.394569
2   medium     poor     increase       0.244035     0.361395       0.394569
3     high     poor     increase       0.244035     0.361395       0.394569
4   medium     poor     increase       0.244035     0.361395       0.394569
5     high     poor     increase       0.244035     0.361395       0.394569
6   medium     poor     increase       0.244035     0.361395       0.394569
7     high     poor     increase       0.244035     0.361395       0.394569
8     high     good     increase       0.2440



Training Accuracy for 4000 samples: 0.4421
Validation Accuracy for 4000 samples: 0.4000
Test Accuracy for 4000 samples: 0.4217
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step

Predicted Results and Probabilities for 4000 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0    low     poor     decrease       0.421841     0.188962       0.389197
1    low     poor     decrease       0.421841     0.188962       0.389197
2    low     poor     decrease       0.421841     0.188962       0.389197
3    low     poor     decrease       0.421841     0.188962       0.389197
4    low  average     decrease       0.421585     0.189105       0.389309
5   high  average     increase       0.231845     0.319903       0.448252
6   high  average     increase       0.231845     0.319903       0.448252
7   high  average     increase       0.231845     0.319903       0.448252
8   high     good     increase       0.111142     0.444060     



Training Accuracy for 5000 samples: 0.4151
Validation Accuracy for 5000 samples: 0.3973
Test Accuracy for 5000 samples: 0.3947
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Predicted Results and Probabilities for 5000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     increase        0.18445     0.406021       0.409529
1      low  average     increase        0.18445     0.406021       0.409529
2     high     poor     increase        0.18445     0.406021       0.409529
3   medium  average     increase        0.18445     0.406021       0.409529
4   medium     good     increase        0.18445     0.406021       0.409529
5   medium     good     increase        0.18445     0.406021       0.409529
6   medium     poor     increase        0.18445     0.406021       0.409529
7     high     poor     increase        0.18445     0.406021       0.409529
8      low     poor     increase        0.184



Training Accuracy for 6000 samples: 0.3748
Validation Accuracy for 6000 samples: 0.3789
Test Accuracy for 6000 samples: 0.3633
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 6000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     increase       0.276041     0.349359         0.3746
1     high  average     increase       0.276041     0.349359         0.3746
2     high     good     increase       0.276041     0.349359         0.3746
3   medium  average     increase       0.276041     0.349359         0.3746
4     high  average     increase       0.276041     0.349359         0.3746
5      low     good     increase       0.276041     0.349359         0.3746
6     high     poor     increase       0.276041     0.349359         0.3746
7     high  average     increase       0.276041     0.349359         0.3746
8   medium     good     increase       0.2760



Training Accuracy for 7000 samples: 0.3635
Validation Accuracy for 7000 samples: 0.3848
Test Accuracy for 7000 samples: 0.3895
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step

Predicted Results and Probabilities for 7000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     decrease       0.365674     0.313809       0.320517
1     high  average     decrease       0.365674     0.313809       0.320517
2     high     good     decrease       0.365674     0.313809       0.320517
3      low  average     decrease       0.365674     0.313809       0.320517
4   medium     good     decrease       0.365674     0.313809       0.320517
5      low     poor     decrease       0.365674     0.313809       0.320517
6     high     good     decrease       0.365674     0.313809       0.320517
7      low  average     decrease       0.365674     0.313809       0.320517
8      low  average     decrease       0.3656



Training Accuracy for 8000 samples: 0.5343
Validation Accuracy for 8000 samples: 0.5367
Test Accuracy for 8000 samples: 0.5217
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 8000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good       stable       0.231743     0.471482       0.296775
1     high  average     decrease       0.521771     0.134411       0.343818
2     high     good     decrease       0.648627     0.053308       0.298065
3     high  average     decrease       0.521771     0.134411       0.343818
4   medium     poor       stable       0.231743     0.471482       0.296775
5   medium  average       stable       0.231743     0.471482       0.296775
6   medium  average       stable       0.231743     0.471482       0.296775
7   medium  average       stable       0.231743     0.471482       0.296775
8     high     poor     decrease       0.3517



Training Accuracy for 9000 samples: 0.4838
Validation Accuracy for 9000 samples: 0.4911
Test Accuracy for 9000 samples: 0.4881
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Predicted Results and Probabilities for 9000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average     decrease       0.482447     0.256847       0.260706
1      low  average     decrease       0.482447     0.256847       0.260706
2     high     good     decrease       0.482447     0.256847       0.260706
3     high  average     decrease       0.482447     0.256847       0.260706
4     high     good     decrease       0.482447     0.256847       0.260706
5      low     good     decrease       0.482447     0.256847       0.260706
6   medium     good     decrease       0.482447     0.256847       0.260706
7   medium     poor     decrease       0.482447     0.256847       0.260706
8      low  average     decrease       0.4824



Training Accuracy for 10000 samples: 0.5113
Validation Accuracy for 10000 samples: 0.5200
Test Accuracy for 10000 samples: 0.5127
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 10000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     increase       0.149171     0.308201       0.542628
1     high     poor     increase       0.372707     0.185268       0.442025
2   medium     good     increase       0.149171     0.308201       0.542628
3      low     good     increase       0.092940     0.357268       0.549792
4      low     good     increase       0.092940     0.357268       0.549792
5      low     good     increase       0.092940     0.357268       0.549792
6   medium     good     increase       0.149171     0.308201       0.542628
7     high     good     increase       0.234311     0.253432       0.512257
8   medium  average     increase       0.



Training Accuracy for 11000 samples: 0.5453
Validation Accuracy for 11000 samples: 0.5230
Test Accuracy for 11000 samples: 0.5303
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 11000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average     decrease       0.384731     0.349683       0.265586
1     high  average     decrease       0.384731     0.349683       0.265586
2     high     good     increase       0.109108     0.056087       0.834805
3   medium  average     decrease       0.374818     0.331011       0.294171
4     high  average     decrease       0.384731     0.349683       0.265586
5   medium     poor     decrease       0.395983     0.374176       0.229840
6      low  average     decrease       0.367161     0.317839       0.314999
7     high     good     increase       0.109108     0.056087       0.834805
8   medium  average     decrease       0.



Training Accuracy for 12000 samples: 0.5310
Validation Accuracy for 12000 samples: 0.5044
Test Accuracy for 12000 samples: 0.5367
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 12000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good       stable       0.267055     0.527714       0.205231
1   medium  average       stable       0.267055     0.527714       0.205231
2     high     good       stable       0.267055     0.527714       0.205231
3      low     poor       stable       0.267055     0.527714       0.205231
4   medium  average       stable       0.267055     0.527714       0.205231
5   medium     poor       stable       0.267055     0.527714       0.205231
6     high     good       stable       0.267055     0.527714       0.205231
7   medium     good       stable       0.267055     0.527714       0.205231
8   medium     poor       stable       0.



Training Accuracy for 13000 samples: 0.3599
Validation Accuracy for 13000 samples: 0.3615
Test Accuracy for 13000 samples: 0.3795
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 13000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     decrease       0.362872     0.291278        0.34585
1      low     poor     decrease       0.362872     0.291278        0.34585
2      low     poor     decrease       0.362872     0.291278        0.34585
3      low  average     decrease       0.362872     0.291278        0.34585
4     high  average     decrease       0.362872     0.291278        0.34585
5     high     poor     decrease       0.362872     0.291278        0.34585
6   medium  average     decrease       0.362872     0.291278        0.34585
7   medium  average     decrease       0.362872     0.291278        0.34585
8   medium  average     decrease       0.



Training Accuracy for 14000 samples: 0.3853
Validation Accuracy for 14000 samples: 0.3981
Test Accuracy for 14000 samples: 0.3957
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 14000 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   high     poor     increase       0.341747     0.272721       0.385532
1   high     good     increase       0.341747     0.272721       0.385532
2   high     poor     increase       0.341747     0.272721       0.385532
3    low     poor     increase       0.341747     0.272721       0.385532
4    low  average     increase       0.341747     0.272721       0.385532
5   high     poor     increase       0.341747     0.272721       0.385532
6   high     good     increase       0.341747     0.272721       0.385532
7    low     poor     increase       0.341747     0.272721       0.385532
8    low  average     increase       0.341747     0.272721 



Training Accuracy for 15000 samples: 0.4072
Validation Accuracy for 15000 samples: 0.4222
Test Accuracy for 15000 samples: 0.4080
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 15000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor       stable       0.378381     0.410625       0.210994
1     high     good       stable       0.378381     0.410625       0.210994
2     high     good       stable       0.378381     0.410625       0.210994
3      low     poor       stable       0.378381     0.410625       0.210994
4     high     good       stable       0.378381     0.410625       0.210994
5      low  average       stable       0.378381     0.410625       0.210994
6      low  average       stable       0.378381     0.410625       0.210994
7     high     good       stable       0.378381     0.410625       0.210994
8      low     poor       stable       0.



Training Accuracy for 16000 samples: 0.3714
Validation Accuracy for 16000 samples: 0.3812
Test Accuracy for 16000 samples: 0.3592
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 16000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good       stable       0.364705     0.372576       0.262719
1     high  average       stable       0.364705     0.372576       0.262719
2   medium     poor       stable       0.364705     0.372576       0.262719
3   medium  average       stable       0.364705     0.372576       0.262719
4   medium  average       stable       0.364705     0.372576       0.262719
5   medium     poor       stable       0.364705     0.372576       0.262719
6     high     good       stable       0.364705     0.372576       0.262719
7   medium  average       stable       0.364705     0.372576       0.262719
8   medium     good       stable       0.



Training Accuracy for 17000 samples: 0.3839
Validation Accuracy for 17000 samples: 0.3961
Test Accuracy for 17000 samples: 0.3647
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 17000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor       stable       0.262538     0.383453       0.354009
1      low  average       stable       0.262538     0.383453       0.354009
2     high  average       stable       0.262538     0.383453       0.354009
3   medium     poor       stable       0.262538     0.383453       0.354009
4      low     good       stable       0.262538     0.383453       0.354009
5      low  average       stable       0.262538     0.383453       0.354009
6      low  average       stable       0.262538     0.383453       0.354009
7     high     poor       stable       0.262538     0.383453       0.354009
8     high     poor       stable       0.



Training Accuracy for 18000 samples: 0.4897
Validation Accuracy for 18000 samples: 0.4896
Test Accuracy for 18000 samples: 0.4667
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 18000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor       stable       0.252335     0.384234       0.363430
1     high     good     decrease       0.604333     0.108684       0.286983
2     high     good     decrease       0.604333     0.108684       0.286983
3   medium     good     decrease       0.494957     0.173058       0.331985
4     high     good     decrease       0.604333     0.108684       0.286983
5   medium  average       stable       0.252335     0.384234       0.363430
6      low  average       stable       0.252335     0.384234       0.363430
7   medium     poor       stable       0.252335     0.384234       0.363430
8   medium     good     decrease       0.



Training Accuracy for 19000 samples: 0.3882
Validation Accuracy for 19000 samples: 0.3744
Test Accuracy for 19000 samples: 0.3912
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 19000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good     decrease       0.385362     0.280864       0.333774
1      low     good     decrease       0.385362     0.280864       0.333774
2      low     good     decrease       0.385362     0.280864       0.333774
3      low     poor     decrease       0.385362     0.280864       0.333774
4      low     good     decrease       0.385362     0.280864       0.333774
5   medium     poor     decrease       0.385362     0.280864       0.333774
6      low     good     decrease       0.385362     0.280864       0.333774
7      low     good     decrease       0.385362     0.280864       0.333774
8     high     good     decrease       0.



Training Accuracy for 20000 samples: 0.4989
Validation Accuracy for 20000 samples: 0.5200
Test Accuracy for 20000 samples: 0.5043
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 20000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     decrease       0.648281     0.221238       0.130482
1   medium     good     decrease       0.648281     0.221238       0.130482
2   medium  average       stable       0.285600     0.373753       0.340647
3     high     good     decrease       0.730197     0.176623       0.093181
4   medium     poor     increase       0.222755     0.388562       0.388683
5     high     good     decrease       0.730197     0.176623       0.093181
6      low     good     decrease       0.551890     0.269558       0.178552
7      low     good     decrease       0.551890     0.269558       0.178552
8     high  average     decrease       0.

# K-L Divergence NN Dense Data

In [22]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Prepare a list to store K-L divergence results
kl_divergence_results = []

# Loop through each sample size
for size in sample_sizes:
    print(f"\nProcessing sample size: {size}")

    # Load the combined BN data for the current sample size
    combined_data_bn = pd.read_csv(f'combined_probabilities_{size}.csv')

    # Split the data into train, validation, and test sets
    X = combined_data_bn[['IR_State', 'EI_State']]
    y = combined_data_bn[['Chosen_SP_State', 'SP_Probabilities (decrease, stable, increase)']]

    # Refresh the data split for each iteration
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=False, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)

    # Get the test indices
    test_indices = X_test.index

    # Get the corresponding rows from the combined BN data using the test indices
    bn_test_data = combined_data_bn.loc[test_indices]

    # Load the corresponding NN test data for the current sample size
    nn_test_data = pd.read_csv(f'test_data_nn_{size}.csv')

    # Extract NN predicted probabilities and BN ground truth probabilities
    nn_probs = nn_test_data[['Prob_decrease', 'Prob_stable', 'Prob_increase']].values
    bn_probs = bn_test_data['SP_Probabilities (decrease, stable, increase)'].apply(
        lambda x: np.array(list(map(float, x.strip('[]').split(','))))
    ).values

    # Calculate K-L divergence between NN predicted probabilities and BN ground truth probabilities
    kl_divergences = []
    output_data = []  # For tabulating output

    for i in range(len(nn_probs)):
        nn_prob = nn_probs[i]
        bn_prob = bn_probs[i]

        # Ensure both are valid probability distributions
        epsilon = 1e-10
        nn_prob = np.clip(nn_prob, epsilon, 1)
        bn_prob = np.clip(bn_prob, epsilon, 1)

        # Normalize to ensure they sum to 1
        nn_prob /= nn_prob.sum()
        bn_prob /= bn_prob.sum()

        # Compute K-L divergence
        kl_div = entropy(bn_prob, nn_prob)
        kl_divergences.append(kl_div)

        # Add data to output for tabulation
        output_data.append({
            'Sample_Index': i,
            'IR': bn_test_data.iloc[i]['IR_State'],
            'EI': bn_test_data.iloc[i]['EI_State'],
            'Ground_Truth_Probs': ', '.join([f'{prob:.4f}' for prob in bn_prob]),
            'NN_Probs': ', '.join([f'{prob:.4f}' for prob in nn_prob]),
            'KL_Divergence': f'{kl_div:.4f}'
        })

    # Create a DataFrame for the output data and tabulate the first few rows
    output_df = pd.DataFrame(output_data)
    print(f"\nK-L Divergence Results for {size} samples (First 5 rows):\n")
    print(tabulate(output_df.head(5), headers='keys', tablefmt='grid'))

    # Calculate and display the average K-L divergence for this sample size
    average_kl_divergence = np.mean(kl_divergences)
    std_kl_divergence = np.std(kl_divergences)
    print(f"\nAverage K-L Divergence for {size} samples: {average_kl_divergence:.4f}, Std Dev: {std_kl_divergence:.4f}")

    # Append the results to the list
    kl_divergence_results.append({
        'Sample_Size': size,
        'Average_KL_Divergence': average_kl_divergence,
        'Std_Dev': std_kl_divergence
    })

# Save the K-L divergence results to a CSV file
kl_divergence_df = pd.DataFrame(kl_divergence_results)
kl_divergence_df.to_csv('kl_div_NN_4_10_dense.csv', index=False)

print("\nAll sample sizes have been processed and K-L divergences calculated. Results saved to 'kl_div_NN_4_10_dense.csv'.")


Processing sample size: 50

K-L Divergence Results for 50 samples (First 5 rows):

+----+----------------+------+---------+------------------------+------------------------+-----------------+
|    |   Sample_Index | IR   | EI      | Ground_Truth_Probs     | NN_Probs               |   KL_Divergence |
|  0 |              0 | low  | average | 0.4588, 0.1882, 0.3530 | 0.3024, 0.3860, 0.3115 |          0.1001 |
+----+----------------+------+---------+------------------------+------------------------+-----------------+
|  1 |              1 | low  | good    | 0.1634, 0.5172, 0.3194 | 0.2691, 0.4194, 0.3115 |          0.0349 |
+----+----------------+------+---------+------------------------+------------------------+-----------------+
|  2 |              2 | high | good    | 0.4202, 0.4960, 0.0838 | 0.2371, 0.4789, 0.2840 |          0.1555 |
+----+----------------+------+---------+------------------------+------------------------+-----------------+
|  3 |              3 | low  | poor    | 0.4