<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/NN_Dense_4_10_Relu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.26-py3-none-any.whl.metadata (9.1 kB)
Downloading pgmpy-0.1.26-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pgmpy
Successfully installed pgmpy-0.1.26


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.sampling import BayesianModelSampling
from tabulate import tabulate

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers, callbacks, regularizers

from scipy.stats import entropy

# Bayesian Network Data Generation 500, ..., 20000 Samples (dense)

In [8]:
# Function to generate CPDs
def generate_cpds():
    # Generate random probabilities for IR
    ir_probs = np.random.rand(3)
    ir_probs /= ir_probs.sum()  # Normalize to make it a valid probability distribution

    # Generate random probabilities for EI given IR
    ei_given_ir_probs = np.random.rand(3, 3)
    ei_given_ir_probs /= ei_given_ir_probs.sum(axis=0, keepdims=True)

    # Generate random probabilities for SP given IR and EI
    sp_probs = np.random.rand(3, 3, 3)
    sp_probs /= sp_probs.sum(axis=0, keepdims=True)

    return ir_probs, ei_given_ir_probs, sp_probs

# Function to generate and save samples
def generate_and_save_samples(ir_probs, ei_probs, sp_probs, sample_size, filename):
    output_data = []

    # Generate `sample_size` random samples
    for _ in range(sample_size):
        # Sample `IR` state based on `IR` probabilities
        ir_state_idx = np.random.choice(3, p=ir_probs)
        ir_state = ['low', 'medium', 'high'][ir_state_idx]
        ir_prob = ir_probs[ir_state_idx]

        # Sample `EI` state based on `EI` probabilities given `IR`
        ei_probs_given_ir = ei_probs[:, ir_state_idx]
        ei_state_idx = np.random.choice(3, p=ei_probs_given_ir)
        ei_state = ['poor', 'average', 'good'][ei_state_idx]
        ei_prob = ei_probs_given_ir[ei_state_idx]

        # Sample `SP` state based on `SP` probabilities given `IR` and `EI`
        sp_probs_given_ir_ei = sp_probs[:, ir_state_idx, ei_state_idx]
        sp_state_idx = np.random.choice(3, p=sp_probs_given_ir_ei)
        sp_state = ['decrease', 'stable', 'increase'][sp_state_idx]
        sp_prob = sp_probs_given_ir_ei[sp_state_idx]

        # Append sample data to output list
        output_data.append({
            'IR_State': ir_state,
            'IR_Prob': f'{ir_prob:.4f}',
            'EI_State': ei_state,
            'EI_Prob': f'{ei_prob:.4f}',
            'SP_Probabilities (decrease, stable, increase)': ', '.join([f'{prob:.4f}' for prob in sp_probs_given_ir_ei]),
            'Chosen_SP_State': sp_state,
            'Chosen_SP_Probability': f'{sp_prob:.4f}'
        })

    # Create a DataFrame from the output data
    output_df = pd.DataFrame(output_data)

    # Save the output DataFrame to a CSV file
    output_df.to_csv(filename, index=False)

    # Print the first few rows for visual confirmation
    print(f"\nSample size: {sample_size} - First few rows of generated samples:\n")
    print(tabulate(output_df.head(), headers='keys', tablefmt='grid'))

# Generate and save samples for sample sizes from 1000 to 10000 every 1000
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

for size in sample_sizes:
    # Generate the CPDs
    ir_probs, ei_given_ir_probs, sp_probs = generate_cpds()

    # Generate and save individual samples for the given sample size
    generate_and_save_samples(ir_probs, ei_given_ir_probs, sp_probs, size, f'combined_probabilities_{size}.csv')

# Notify the user that the process is done
print("\nGeneration and saving of individual samples complete for all sample sizes!")


Sample size: 50 - First few rows of generated samples:

+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|    | IR_State   |   IR_Prob | EI_State   |   EI_Prob | SP_Probabilities (decrease, stable, increase)   | Chosen_SP_State   |   Chosen_SP_Probability |
|  0 | high       |    0.4775 | poor       |    0.2992 | 0.0077, 0.4751, 0.5171                          | increase          |                  0.5171 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  1 | high       |    0.4775 | good       |    0.5353 | 0.3868, 0.1764, 0.4368                          | stable            |                  0.1764 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  2 | low        |    0.

# Hypothesis Model: 500,...,20000 Samples (dense) 4 hidden Layers, 10 Neurons Relu

In [9]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Define the Neural Network architecture with L2 regularization
def create_nn_model(hidden_layers=4, nodes_per_layer=10, l2_lambda=0.01):
    model = models.Sequential()

    # Input layer (2 input features: IR_encoded and EI_encoded)
    model.add(layers.InputLayer(input_shape=(2,)))

    # Hidden layers with L2 regularization and Dropout
    for layer_num in range(hidden_layers):
        model.add(layers.Dense(
            nodes_per_layer,
            activation='relu',
            kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
            name=f"hidden_layer_{layer_num + 1}"
        ))
        model.add(layers.Dropout(0.2))  # Dropout layer to reduce overfitting

    # Output layer (3 classes: decrease, stable, increase) with L2 regularization
    model.add(layers.Dense(
        3,
        activation='softmax',
        kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
        name="output_layer"
    ))

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Prepare a dictionary to store the extracted data for each sample size
extracted_data = {}

# Extract the required columns from all sample sizes first
for size in sample_sizes:
    # Load data for the current sample size (adjust the file paths if necessary)
    outcomes_file = f'combined_probabilities_{size}.csv'
    df = pd.read_csv(outcomes_file)

    # Extract only the required columns
    required_columns = ['IR_State', 'EI_State', 'Chosen_SP_State']
    df_extracted = df[required_columns]

    # Manually encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
    df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
    df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)

    # Store the extracted and encoded data for later use
    extracted_data[size] = df_extracted

# Loop through each sample size for NN training, validation, and testing
for size in sample_sizes:
    # Retrieve the extracted data for the current sample size
    df = extracted_data[size]

    # Features (IR and EI) and labels (SP)
    X = df[['IR_encoded', 'EI_encoded']]
    y = df['SP_encoded']

    # Refresh the data split for each iteration
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=False, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)

    # Show split confirmation
    print(f"\nSample size: {size}")
    print("Training Data:", X_train.shape, y_train.shape)
    print("Validation Data:", X_val.shape, y_val.shape)
    print("Test Data:", X_test.shape, y_test.shape)

    # Create the Neural Network model with L2 regularization
    nn_model = create_nn_model(hidden_layers=4, nodes_per_layer=10, l2_lambda=0.01)

    # Early stopping callback to prevent overfitting
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    history = nn_model.fit(X_train, y_train,
                           epochs=50,
                           batch_size=32,
                           validation_data=(X_val, y_val),
                           callbacks=[early_stopping],
                           verbose=0)  # Set verbose=0 to avoid too much output

    # Print training, validation, and test accuracy
    train_loss, train_accuracy = nn_model.evaluate(X_train, y_train, verbose=0)
    val_loss, val_accuracy = nn_model.evaluate(X_val, y_val, verbose=0)
    test_loss, test_accuracy = nn_model.evaluate(X_test, y_test, verbose=0)
    print(f"Training Accuracy for {size} samples: {train_accuracy:.4f}")
    print(f"Validation Accuracy for {size} samples: {val_accuracy:.4f}")
    print(f"Test Accuracy for {size} samples: {test_accuracy:.4f}")

    # Make predictions on the test set
    predictions = nn_model.predict(X_test)

    # Convert the predicted probabilities to class labels
    predicted_classes = predictions.argmax(axis=1)

    # Create a list to map integers back to the original SP labels
    sp_reverse_map = ['decrease', 'stable', 'increase']

    # Convert the predicted classes to the original labels
    predicted_labels = [sp_reverse_map[label] for label in predicted_classes]

    # Create a DataFrame for the predicted probabilities
    probs_df = pd.DataFrame(predictions, columns=['Prob_decrease', 'Prob_stable', 'Prob_increase'])

    # Output the IR, EI, predicted SP, and the NN probabilities
    result_df = pd.DataFrame({
        'IR': df['IR_State'].iloc[X_test.index],  # IR column from the original dataframe for the test set
        'EI': df['EI_State'].iloc[X_test.index],  # EI column from the original dataframe for the test set
        'Predicted_SP': predicted_labels           # Predicted SP labels
    })

    # Combine the result with the predicted probabilities
    combined_df = pd.concat([result_df.reset_index(drop=True), probs_df.reset_index(drop=True)], axis=1)

    # Save the test data with predictions to a CSV file
    combined_df.to_csv(f'test_data_nn_{size}.csv', index=False)

    # Show the first few rows of the results for this sample size
    print(f"\nPredicted Results and Probabilities for {size} samples (First 15 rows):")
    print(combined_df.head(15))

# After the loop is done, print this message
print("\nLooping through all sample sizes complete!")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)



Sample size: 50
Training Data: (35, 2) (35,)
Validation Data: (7, 2) (7,)
Test Data: (8, 2) (8,)
Training Accuracy for 50 samples: 0.4000
Validation Accuracy for 50 samples: 0.4286
Test Accuracy for 50 samples: 0.8750
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step

Predicted Results and Probabilities for 50 samples (First 15 rows):
     IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   low     poor     increase       0.311394     0.328326       0.360280
1  high     good     increase       0.307561     0.325535       0.366904
2  high     good     increase       0.307561     0.325535       0.366904
3  high  average     increase       0.284996     0.338641       0.376363
4  high     good     increase       0.307561     0.325535       0.366904
5   low     poor     increase       0.311394     0.328326       0.360280
6   low     poor     increase       0.311394     0.328326       0.360280
7  high     poor     increase       0.293503     0.33335



Training Accuracy for 100 samples: 0.4571
Validation Accuracy for 100 samples: 0.2000
Test Accuracy for 100 samples: 0.5333
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step

Predicted Results and Probabilities for 100 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor     increase       0.268382     0.345051       0.386568
1     high  average     increase       0.229423     0.329008       0.441568
2      low  average     increase       0.266700     0.346227       0.387074
3   medium  average     increase       0.251739     0.339584       0.408677
4      low     good     increase       0.250747     0.337937       0.411316
5      low     good     increase       0.250747     0.337937       0.411316
6     high  average     increase       0.229423     0.329008       0.441568
7     high     poor     increase       0.254425     0.336857       0.408717
8     high     poor     increase       0.254425  



Training Accuracy for 150 samples: 0.5810
Validation Accuracy for 150 samples: 0.5909
Test Accuracy for 150 samples: 0.3913
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step

Predicted Results and Probabilities for 150 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average     increase       0.293139     0.343823       0.363039
1      low     poor     increase       0.259621     0.302003       0.438376
2      low  average       stable       0.285873     0.360469       0.353658
3      low     good       stable       0.307765     0.414668       0.277567
4      low     good       stable       0.307765     0.414668       0.277567
5      low     poor     increase       0.259621     0.302003       0.438376
6      low     good       stable       0.307765     0.414668       0.277567
7   medium     good       stable       0.312630     0.405766       0.281604
8      low     good       stable       0.307765   



Training Accuracy for 200 samples: 0.4429
Validation Accuracy for 200 samples: 0.4333
Test Accuracy for 200 samples: 0.4667
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step

Predicted Results and Probabilities for 200 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average       stable       0.279603     0.362113       0.358283
1   medium     poor     decrease       0.346988     0.335542       0.317470
2     high  average       stable       0.253847     0.405688       0.340466
3     high     good       stable       0.223955     0.392539       0.383505
4     high  average       stable       0.253847     0.405688       0.340466
5     high     poor     decrease       0.349801     0.341830       0.308369
6     high     good       stable       0.223955     0.392539       0.383505
7     high     good       stable       0.223955     0.392539       0.383505
8   medium     good     increase       0.294980   



Training Accuracy for 250 samples: 0.4743
Validation Accuracy for 250 samples: 0.4324
Test Accuracy for 250 samples: 0.4211
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step

Predicted Results and Probabilities for 250 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor       stable       0.283486     0.394245       0.322269
1   medium  average       stable       0.284353     0.397427       0.318219
2     high  average       stable       0.283800     0.391912       0.324288
3     high  average       stable       0.283800     0.391912       0.324288
4   medium  average       stable       0.284353     0.397427       0.318219
5     high     good       stable       0.285478     0.398000       0.316523
6     high     poor       stable       0.284814     0.388964       0.326223
7   medium  average       stable       0.284353     0.397427       0.318219
8     high  average       stable       0.283800   



Training Accuracy for 300 samples: 0.4524
Validation Accuracy for 300 samples: 0.5333
Test Accuracy for 300 samples: 0.3556
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step

Predicted Results and Probabilities for 300 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     decrease       0.436951     0.315995       0.247054
1      low     poor     decrease       0.400352     0.327329       0.272319
2   medium     good     decrease       0.492007     0.301014       0.206978
3     high     poor     decrease       0.399021     0.327884       0.273095
4     high     poor     decrease       0.399021     0.327884       0.273095
5   medium     poor     decrease       0.398172     0.328138       0.273690
6     high     good     decrease       0.499957     0.298603       0.201441
7   medium     poor     decrease       0.398172     0.328138       0.273690
8   medium     poor     decrease       0.398172   



Training Accuracy for 350 samples: 0.4286
Validation Accuracy for 350 samples: 0.4615
Test Accuracy for 350 samples: 0.3962
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step

Predicted Results and Probabilities for 350 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average       stable       0.232816     0.452843       0.314341
1      low  average       stable       0.232816     0.452843       0.314341
2     high  average       stable       0.333335     0.387406       0.279258
3      low  average       stable       0.232816     0.452843       0.314341
4      low     good       stable       0.214532     0.464807       0.320661
5     high     poor       stable       0.325946     0.392168       0.281886
6      low     good       stable       0.214532     0.464807       0.320661
7      low  average       stable       0.232816     0.452843       0.314341
8      low  average       stable       0.232816   



Training Accuracy for 400 samples: 0.4464
Validation Accuracy for 400 samples: 0.5667
Test Accuracy for 400 samples: 0.5500
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step

Predicted Results and Probabilities for 400 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average       stable       0.149560     0.431899       0.418541
1      low     poor     increase       0.303869     0.335758       0.360372
2     high  average       stable       0.149560     0.431899       0.418541
3   medium  average     increase       0.229994     0.380267       0.389739
4     high  average       stable       0.149560     0.431899       0.418541
5     high     poor       stable       0.132642     0.442978       0.424381
6   medium  average     increase       0.229994     0.380267       0.389739
7   medium  average     increase       0.229994     0.380267       0.389739
8      low  average     increase       0.307323   



Training Accuracy for 450 samples: 0.5492
Validation Accuracy for 450 samples: 0.5821
Test Accuracy for 450 samples: 0.5588
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step

Predicted Results and Probabilities for 450 samples (First 15 rows):
        IR    EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  good     decrease       0.397459     0.255192       0.347349
1      low  poor     decrease       0.416625     0.212496       0.370879
2      low  poor     decrease       0.416625     0.212496       0.370879
3   medium  good       stable       0.244824     0.438000       0.317176
4      low  poor     decrease       0.416625     0.212496       0.370879
5      low  poor     decrease       0.416625     0.212496       0.370879
6   medium  good       stable       0.244824     0.438000       0.317176
7     high  poor     decrease       0.406839     0.199850       0.393311
8     high  poor     decrease       0.406839     0.199850       0.393311
9   



Training Accuracy for 500 samples: 0.4943
Validation Accuracy for 500 samples: 0.5600
Test Accuracy for 500 samples: 0.4000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step

Predicted Results and Probabilities for 500 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     decrease       0.388411     0.269172       0.342416
1      low     poor     decrease       0.388411     0.269172       0.342416
2      low     poor     decrease       0.388411     0.269172       0.342416
3     high  average     increase       0.282744     0.288075       0.429181
4   medium  average     increase       0.303175     0.287176       0.409649
5      low     poor     decrease       0.388411     0.269172       0.342416
6      low     good     increase       0.277683     0.294475       0.427843
7     high     poor     increase       0.323458     0.280824       0.395718
8   medium     good     increase       0.255754   



Training Accuracy for 550 samples: 0.4857
Validation Accuracy for 550 samples: 0.5000
Test Accuracy for 550 samples: 0.4699
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step

Predicted Results and Probabilities for 550 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     decrease       0.467133     0.212404       0.320463
1   medium  average     decrease       0.469556     0.202546       0.327898
2     high  average     decrease       0.474289     0.195916       0.329795
3   medium     good     decrease       0.480330     0.187963       0.331707
4   medium  average     decrease       0.469556     0.202546       0.327898
5   medium  average     decrease       0.469556     0.202546       0.327898
6      low     good     decrease       0.475235     0.195220       0.329545
7     high     poor     decrease       0.467133     0.212404       0.320463
8     high     poor     decrease       0.467133   



Training Accuracy for 600 samples: 0.3857
Validation Accuracy for 600 samples: 0.3889
Test Accuracy for 600 samples: 0.4222
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step

Predicted Results and Probabilities for 600 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor       stable       0.336920     0.381672       0.281407
1     high     good       stable       0.369823     0.389560       0.240617
2   medium  average       stable       0.342433     0.384019       0.273548
3     high  average       stable       0.364134     0.389522       0.246344
4     high  average       stable       0.364134     0.389522       0.246344
5      low  average       stable       0.323450     0.375401       0.301150
6      low  average       stable       0.323450     0.375401       0.301150
7      low     poor       stable       0.317089     0.372275       0.310636
8   medium  average       stable       0.342433   



Training Accuracy for 650 samples: 0.6330
Validation Accuracy for 650 samples: 0.6495
Test Accuracy for 650 samples: 0.6531
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step

Predicted Results and Probabilities for 650 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0    low     poor     increase       0.247036     0.243150       0.509814
1    low     good     decrease       0.663711     0.167262       0.169027
2   high  average     increase       0.236707     0.238857       0.524436
3   high     poor     increase       0.238193     0.239622       0.522185
4   high  average     increase       0.236707     0.238857       0.524436
5    low     good     decrease       0.663711     0.167262       0.169027
6   high  average     increase       0.236707     0.238857       0.524436
7    low  average     increase       0.381266     0.236536       0.382197
8   high     good     increase       0.237764     0.239472       0.5



Training Accuracy for 700 samples: 0.5551
Validation Accuracy for 700 samples: 0.5143
Test Accuracy for 700 samples: 0.4857
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step

Predicted Results and Probabilities for 700 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     decrease       0.489272     0.151274       0.359454
1      low  average     decrease       0.513721     0.142531       0.343748
2      low  average     decrease       0.513721     0.142531       0.343748
3      low     good     increase       0.405375     0.187695       0.406930
4      low     poor     decrease       0.552368     0.126342       0.321290
5     high     poor     decrease       0.489272     0.151274       0.359454
6      low     good     increase       0.405375     0.187695       0.406930
7   medium     good     increase       0.294291     0.241840       0.463870
8   medium     poor     decrease       0.552641   



Training Accuracy for 750 samples: 0.3429
Validation Accuracy for 750 samples: 0.3839
Test Accuracy for 750 samples: 0.3274
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step

Predicted Results and Probabilities for 750 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     increase       0.327020     0.329985       0.342995
1     high     poor     increase       0.327032     0.329990       0.342978
2     high  average     increase       0.327033     0.329992       0.342975
3      low  average     increase       0.327020     0.329985       0.342995
4      low     good     increase       0.327022     0.329986       0.342992
5   medium     poor     increase       0.327025     0.329987       0.342988
6      low     good     increase       0.327022     0.329986       0.342992
7   medium     poor     increase       0.327025     0.329987       0.342988
8      low  average     increase       0.327020   



Training Accuracy for 800 samples: 0.4250
Validation Accuracy for 800 samples: 0.5000
Test Accuracy for 800 samples: 0.3833
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

Predicted Results and Probabilities for 800 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     decrease       0.401472     0.378805       0.219723
1      low     poor     decrease       0.436777     0.325587       0.237636
2      low     poor     decrease       0.436777     0.325587       0.237636
3      low     good     decrease       0.410393     0.365303       0.224303
4     high     good     decrease       0.401472     0.378805       0.219723
5     high  average     decrease       0.415128     0.358219       0.226653
6     high  average     decrease       0.415128     0.358219       0.226653
7   medium     poor     decrease       0.432178     0.332563       0.235259
8      low     good     decrease       0.410393   



Training Accuracy for 850 samples: 0.4050
Validation Accuracy for 850 samples: 0.4567
Test Accuracy for 850 samples: 0.3750
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 

Predicted Results and Probabilities for 850 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     increase       0.389355     0.218109       0.392536
1      low     good     increase       0.296270     0.288789       0.414941
2      low  average     increase       0.344519     0.250183       0.405298
3   medium  average     increase       0.326102     0.264403       0.409496
4     high     good     increase       0.269435     0.312045       0.418521
5   medium     good     increase       0.279901     0.302882       0.417217
6      low     poor     increase       0.389355     0.218109       0.392536
7   medium     good     increase       0.279901     0.302882       0.417217
8   medium  average     increase       0.326102   



Training Accuracy for 900 samples: 0.4016
Validation Accuracy for 900 samples: 0.4296
Test Accuracy for 900 samples: 0.4296
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step

Predicted Results and Probabilities for 900 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor     decrease       0.397739     0.246313       0.355948
1   medium  average     decrease       0.397742     0.246314       0.355945
2      low     poor     decrease       0.397739     0.246314       0.355947
3     high     poor     decrease       0.397739     0.246314       0.355947
4     high  average     decrease       0.397740     0.246317       0.355943
5     high     poor     decrease       0.397739     0.246314       0.355947
6     high     poor     decrease       0.397739     0.246314       0.355947
7      low     poor     decrease       0.397739     0.246314       0.355947
8   medium  average     decrease       0.397742   



Training Accuracy for 950 samples: 0.4541
Validation Accuracy for 950 samples: 0.4648
Test Accuracy for 950 samples: 0.4126
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step

Predicted Results and Probabilities for 950 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     decrease       0.382394     0.369369       0.248236
1   medium     poor       stable       0.380115     0.385255       0.234630
2      low     good     decrease       0.381047     0.379205       0.239749
3   medium  average     decrease       0.380957     0.379798       0.239245
4     high     good     decrease       0.382394     0.369369       0.248236
5   medium     poor       stable       0.380115     0.385255       0.234630
6      low  average       stable       0.380199     0.384734       0.235066
7   medium  average     decrease       0.380957     0.379798       0.239245
8     high     good     decrease       0.382394   



Training Accuracy for 1000 samples: 0.4129
Validation Accuracy for 1000 samples: 0.3667
Test Accuracy for 1000 samples: 0.4133
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step

Predicted Results and Probabilities for 1000 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   high  average       stable       0.314752     0.357508       0.327740
1    low  average       stable       0.293797     0.417114       0.289089
2    low     good       stable       0.248240     0.529461       0.222299
3    low  average       stable       0.293797     0.417114       0.289089
4    low     poor       stable       0.313312     0.362093       0.324595
5   high  average       stable       0.314752     0.357508       0.327740
6    low  average       stable       0.293797     0.417114       0.289089
7    low  average       stable       0.293797     0.417114       0.289089
8    low     poor       stable       0.313312     0.362093      



Training Accuracy for 2000 samples: 0.4014
Validation Accuracy for 2000 samples: 0.3600
Test Accuracy for 2000 samples: 0.4167
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step

Predicted Results and Probabilities for 2000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good       stable       0.350167      0.40019       0.249643
1     high     poor       stable       0.350167      0.40019       0.249643
2   medium  average       stable       0.350167      0.40019       0.249643
3      low     poor       stable       0.350167      0.40019       0.249643
4     high     good       stable       0.350167      0.40019       0.249643
5     high     poor       stable       0.350167      0.40019       0.249643
6     high     good       stable       0.350167      0.40019       0.249643
7     high     poor       stable       0.350167      0.40019       0.249643
8   medium     good       stable       0.3501



Training Accuracy for 3000 samples: 0.3624
Validation Accuracy for 3000 samples: 0.3978
Test Accuracy for 3000 samples: 0.3622
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step

Predicted Results and Probabilities for 3000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good       stable       0.358223     0.363807       0.277971
1   medium     good       stable       0.358223     0.363807       0.277971
2   medium     good       stable       0.358223     0.363807       0.277971
3   medium     poor       stable       0.358223     0.363807       0.277971
4   medium  average       stable       0.358223     0.363807       0.277971
5   medium     good       stable       0.358223     0.363807       0.277971
6   medium  average       stable       0.358223     0.363807       0.277971
7   medium     poor       stable       0.358223     0.363807       0.277971
8      low  average       stable       0.3582



Training Accuracy for 4000 samples: 0.4661
Validation Accuracy for 4000 samples: 0.4850
Test Accuracy for 4000 samples: 0.4817
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Predicted Results and Probabilities for 4000 samples (First 15 rows):
        IR    EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  poor     decrease       0.399964     0.338840       0.261196
1      low  poor     decrease       0.399964     0.338840       0.261196
2   medium  poor     decrease       0.390324     0.268033       0.341642
3     high  good     increase       0.332440     0.139251       0.528309
4      low  poor     decrease       0.399964     0.338840       0.261196
5      low  poor     decrease       0.399964     0.338840       0.261196
6      low  poor     decrease       0.399964     0.338840       0.261196
7      low  good     increase       0.352233     0.171952       0.475815
8     high  poor     increase       0.366316     0.203870       0.429814



Training Accuracy for 5000 samples: 0.3977
Validation Accuracy for 5000 samples: 0.4320
Test Accuracy for 5000 samples: 0.4040
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Predicted Results and Probabilities for 5000 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   high     good       stable       0.211169     0.398823       0.390008
1   high     good       stable       0.211169     0.398823       0.390008
2   high  average       stable       0.211169     0.398823       0.390008
3   high  average       stable       0.211169     0.398823       0.390008
4   high     good       stable       0.211169     0.398823       0.390008
5   high     good       stable       0.211169     0.398823       0.390008
6   high  average       stable       0.211169     0.398823       0.390008
7   high  average       stable       0.211169     0.398823       0.390008
8   high     good       stable       0.211169     0.398823     



Training Accuracy for 6000 samples: 0.3805
Validation Accuracy for 6000 samples: 0.3833
Test Accuracy for 6000 samples: 0.3556
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 6000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor       stable       0.375678     0.379134       0.245188
1      low     good       stable       0.375678     0.379134       0.245188
2   medium     poor       stable       0.375678     0.379134       0.245188
3      low  average       stable       0.375678     0.379134       0.245188
4      low  average       stable       0.375678     0.379134       0.245188
5   medium     good       stable       0.375678     0.379134       0.245188
6     high     good       stable       0.375678     0.379134       0.245188
7   medium     poor       stable       0.375678     0.379134       0.245188
8   medium     good       stable       0.3756



Training Accuracy for 7000 samples: 0.5261
Validation Accuracy for 7000 samples: 0.5295
Test Accuracy for 7000 samples: 0.5362
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step

Predicted Results and Probabilities for 7000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     increase       0.193146     0.281173       0.525681
1     high     good     increase       0.193146     0.281173       0.525681
2   medium     good     increase       0.193146     0.281173       0.525681
3     high  average     increase       0.193146     0.281173       0.525681
4     high     good     increase       0.193146     0.281173       0.525681
5   medium     poor     increase       0.193146     0.281173       0.525681
6     high  average     increase       0.193146     0.281173       0.525681
7     high     good     increase       0.193146     0.281173       0.525681
8     high     good     increase       0.1931



Training Accuracy for 8000 samples: 0.3775
Validation Accuracy for 8000 samples: 0.4017
Test Accuracy for 8000 samples: 0.3975
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 8000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     increase       0.287794     0.333989       0.378217
1   medium  average     increase       0.287794     0.333989       0.378217
2   medium     good     increase       0.287794     0.333989       0.378217
3   medium  average     increase       0.287794     0.333989       0.378217
4   medium     poor     increase       0.287794     0.333989       0.378217
5   medium     poor     increase       0.287794     0.333989       0.378217
6   medium     good     increase       0.287794     0.333989       0.378217
7   medium     poor     increase       0.287794     0.333989       0.378217
8   medium     poor     increase       0.2877



Training Accuracy for 9000 samples: 0.3838
Validation Accuracy for 9000 samples: 0.3696
Test Accuracy for 9000 samples: 0.3844
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 9000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good       stable       0.270867     0.381407       0.347726
1     high     poor       stable       0.270867     0.381407       0.347726
2   medium     good       stable       0.270867     0.381407       0.347726
3     high     good       stable       0.270867     0.381407       0.347726
4   medium     good       stable       0.270867     0.381407       0.347726
5     high  average       stable       0.270867     0.381407       0.347726
6   medium     poor       stable       0.270867     0.381407       0.347726
7   medium  average       stable       0.270867     0.381407       0.347726
8      low     good       stable       0.2708



Training Accuracy for 10000 samples: 0.3990
Validation Accuracy for 10000 samples: 0.4127
Test Accuracy for 10000 samples: 0.3940
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 10000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     increase       0.302862      0.29718       0.399958
1   medium  average     increase       0.302862      0.29718       0.399958
2     high     poor     increase       0.302862      0.29718       0.399958
3   medium     good     increase       0.302862      0.29718       0.399958
4      low     poor     increase       0.302862      0.29718       0.399958
5   medium     poor     increase       0.302862      0.29718       0.399958
6   medium     poor     increase       0.302862      0.29718       0.399958
7   medium     poor     increase       0.302862      0.29718       0.399958
8     high     good     increase       0.



Training Accuracy for 11000 samples: 0.5788
Validation Accuracy for 11000 samples: 0.5618
Test Accuracy for 11000 samples: 0.5788
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 11000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     decrease       0.594394     0.273159       0.132447
1     high     good     decrease       0.594394     0.273159       0.132447
2   medium  average     decrease       0.571703     0.287016       0.141280
3      low     poor     decrease       0.596068     0.272194       0.131738
4   medium     good       stable       0.119750     0.521566       0.358684
5   medium     good       stable       0.119750     0.521566       0.358684
6     high     good     decrease       0.594394     0.273159       0.132447
7   medium     good       stable       0.119750     0.521566       0.358684
8   medium  average     decrease       0.



Training Accuracy for 12000 samples: 0.3745
Validation Accuracy for 12000 samples: 0.3772
Test Accuracy for 12000 samples: 0.3556
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 12000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     decrease       0.373892      0.25627       0.369838
1   medium  average     decrease       0.373892      0.25627       0.369838
2   medium  average     decrease       0.373892      0.25627       0.369838
3   medium  average     decrease       0.373892      0.25627       0.369838
4   medium     good     decrease       0.373892      0.25627       0.369838
5   medium     good     decrease       0.373892      0.25627       0.369838
6   medium  average     decrease       0.373892      0.25627       0.369838
7   medium  average     decrease       0.373892      0.25627       0.369838
8   medium     good     decrease       0.



Training Accuracy for 13000 samples: 0.3711
Validation Accuracy for 13000 samples: 0.3682
Test Accuracy for 13000 samples: 0.3779
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 13000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     decrease        0.36901     0.298582       0.332408
1   medium  average     decrease        0.36901     0.298582       0.332408
2     high     good     decrease        0.36901     0.298582       0.332408
3     high  average     decrease        0.36901     0.298582       0.332408
4     high  average     decrease        0.36901     0.298582       0.332408
5   medium     poor     decrease        0.36901     0.298582       0.332408
6      low  average     decrease        0.36901     0.298582       0.332408
7   medium  average     decrease        0.36901     0.298582       0.332408
8     high  average     decrease        0



Training Accuracy for 14000 samples: 0.4097
Validation Accuracy for 14000 samples: 0.3914
Test Accuracy for 14000 samples: 0.3971
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 14000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     decrease        0.40542     0.365516       0.229064
1      low     good     decrease        0.40542     0.365516       0.229064
2      low     good     decrease        0.40542     0.365516       0.229064
3      low     good     decrease        0.40542     0.365516       0.229064
4     high     poor     decrease        0.40542     0.365516       0.229064
5      low     good     decrease        0.40542     0.365516       0.229064
6   medium  average     decrease        0.40542     0.365516       0.229064
7      low     good     decrease        0.40542     0.365516       0.229064
8      low     good     decrease        0



Training Accuracy for 15000 samples: 0.3611
Validation Accuracy for 15000 samples: 0.3751
Test Accuracy for 15000 samples: 0.3489
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 15000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average       stable       0.319279     0.362066       0.318655
1   medium  average       stable       0.319279     0.362066       0.318655
2   medium  average       stable       0.319279     0.362066       0.318655
3      low     good       stable       0.319279     0.362066       0.318655
4      low  average       stable       0.319279     0.362066       0.318655
5      low  average       stable       0.319279     0.362066       0.318655
6      low     good       stable       0.319279     0.362066       0.318655
7     high     good       stable       0.319279     0.362066       0.318655
8      low     good       stable       0.



Training Accuracy for 16000 samples: 0.5523
Validation Accuracy for 16000 samples: 0.5537
Test Accuracy for 16000 samples: 0.5688
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 16000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good     decrease       0.503265     0.303545       0.193190
1   medium     good     increase       0.225509     0.360394       0.414096
2      low  average     increase       0.195034     0.352044       0.452922
3      low     good     decrease       0.503265     0.303545       0.193190
4     high  average     increase       0.150345     0.292741       0.556914
5   medium     good     increase       0.225509     0.360394       0.414096
6      low     good     decrease       0.503265     0.303545       0.193190
7      low     good     decrease       0.503265     0.303545       0.193190
8     high  average     increase       0.



Training Accuracy for 17000 samples: 0.3870
Validation Accuracy for 17000 samples: 0.3784
Test Accuracy for 17000 samples: 0.3710
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 17000 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   high     good     decrease       0.383738     0.267707       0.348555
1   high     poor     decrease       0.383738     0.267707       0.348555
2    low     poor     decrease       0.383738     0.267707       0.348555
3   high     poor     decrease       0.383738     0.267707       0.348555
4   high     poor     decrease       0.383738     0.267707       0.348555
5    low  average     decrease       0.383738     0.267707       0.348555
6   high  average     decrease       0.383738     0.267707       0.348555
7   high     good     decrease       0.383738     0.267707       0.348555
8   high     good     decrease       0.383738     0.267707 



Training Accuracy for 18000 samples: 0.5284
Validation Accuracy for 18000 samples: 0.5256
Test Accuracy for 18000 samples: 0.5200
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 18000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average       stable       0.357282     0.445208       0.197510
1   medium  average       stable       0.357269     0.445415       0.197317
2      low     poor       stable       0.357269     0.445431       0.197299
3   medium  average       stable       0.357269     0.445415       0.197317
4      low  average       stable       0.357282     0.445208       0.197510
5      low     good     increase       0.290374     0.044041       0.665584
6      low  average       stable       0.357282     0.445208       0.197510
7      low  average       stable       0.357282     0.445208       0.197510
8     high     good     decrease       0.



Training Accuracy for 19000 samples: 0.5241
Validation Accuracy for 19000 samples: 0.5042
Test Accuracy for 19000 samples: 0.5309
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 19000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor     increase       0.174025     0.310845        0.51513
1   medium     poor     increase       0.174025     0.310845        0.51513
2     high  average     increase       0.174025     0.310845        0.51513
3     high     poor     increase       0.174025     0.310845        0.51513
4     high  average     increase       0.174025     0.310845        0.51513
5     high  average     increase       0.174025     0.310845        0.51513
6     high     good     increase       0.174025     0.310845        0.51513
7     high  average     increase       0.174025     0.310845        0.51513
8     high  average     increase       0.



Training Accuracy for 20000 samples: 0.4214
Validation Accuracy for 20000 samples: 0.4273
Test Accuracy for 20000 samples: 0.4227
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 20000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     decrease       0.425239     0.383719       0.191042
1      low     poor     decrease       0.425239     0.383719       0.191042
2     high  average     decrease       0.425239     0.383719       0.191042
3      low     good     decrease       0.425239     0.383719       0.191042
4     high     good     decrease       0.425239     0.383719       0.191042
5      low  average     decrease       0.425239     0.383719       0.191042
6      low  average     decrease       0.425239     0.383719       0.191042
7     high  average     decrease       0.425239     0.383719       0.191042
8      low     good     decrease       0.

# K-L Divergence NN Dense Data

In [10]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Prepare a list to store K-L divergence results
kl_divergence_results = []

# Loop through each sample size
for size in sample_sizes:
    print(f"\nProcessing sample size: {size}")

    # Load the combined BN data for the current sample size
    combined_data_bn = pd.read_csv(f'combined_probabilities_{size}.csv')

    # Split the data into train, validation, and test sets
    X = combined_data_bn[['IR_State', 'EI_State']]
    y = combined_data_bn[['Chosen_SP_State', 'SP_Probabilities (decrease, stable, increase)']]

    # Refresh the data split for each iteration
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=False, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)

    # Get the test indices
    test_indices = X_test.index

    # Get the corresponding rows from the combined BN data using the test indices
    bn_test_data = combined_data_bn.loc[test_indices]

    # Load the corresponding NN test data for the current sample size
    nn_test_data = pd.read_csv(f'test_data_nn_{size}.csv')

    # Extract NN predicted probabilities and BN ground truth probabilities
    nn_probs = nn_test_data[['Prob_decrease', 'Prob_stable', 'Prob_increase']].values
    bn_probs = bn_test_data['SP_Probabilities (decrease, stable, increase)'].apply(
        lambda x: np.array(list(map(float, x.strip('[]').split(','))))
    ).values

    # Calculate K-L divergence between NN predicted probabilities and BN ground truth probabilities
    kl_divergences = []
    output_data = []  # For tabulating output

    for i in range(len(nn_probs)):
        nn_prob = nn_probs[i]
        bn_prob = bn_probs[i]

        # Ensure both are valid probability distributions
        epsilon = 1e-10
        nn_prob = np.clip(nn_prob, epsilon, 1)
        bn_prob = np.clip(bn_prob, epsilon, 1)

        # Normalize to ensure they sum to 1
        nn_prob /= nn_prob.sum()
        bn_prob /= bn_prob.sum()

        # Compute K-L divergence
        kl_div = entropy(bn_prob, nn_prob)
        kl_divergences.append(kl_div)

        # Add data to output for tabulation
        output_data.append({
            'Sample_Index': i,
            'IR': bn_test_data.iloc[i]['IR_State'],
            'EI': bn_test_data.iloc[i]['EI_State'],
            'Ground_Truth_Probs': ', '.join([f'{prob:.4f}' for prob in bn_prob]),
            'NN_Probs': ', '.join([f'{prob:.4f}' for prob in nn_prob]),
            'KL_Divergence': f'{kl_div:.4f}'
        })

    # Create a DataFrame for the output data and tabulate the first few rows
    output_df = pd.DataFrame(output_data)
    print(f"\nK-L Divergence Results for {size} samples (First 5 rows):\n")
    print(tabulate(output_df.head(5), headers='keys', tablefmt='grid'))

    # Calculate and display the average K-L divergence for this sample size
    average_kl_divergence = np.mean(kl_divergences)
    std_kl_divergence = np.std(kl_divergences)
    print(f"\nAverage K-L Divergence for {size} samples: {average_kl_divergence:.4f}, Std Dev: {std_kl_divergence:.4f}")

    # Append the results to the list
    kl_divergence_results.append({
        'Sample_Size': size,
        'Average_KL_Divergence': average_kl_divergence,
        'Std_Dev': std_kl_divergence
    })

# Save the K-L divergence results to a CSV file
kl_divergence_df = pd.DataFrame(kl_divergence_results)
kl_divergence_df.to_csv('kl_div_NN_4_10_dense.csv', index=False)

print("\nAll sample sizes have been processed and K-L divergences calculated. Results saved to 'kl_div_NN_4_10_dense.csv'.")


Processing sample size: 50

K-L Divergence Results for 50 samples (First 5 rows):

+----+----------------+------+---------+------------------------+------------------------+-----------------+
|    |   Sample_Index | IR   | EI      | Ground_Truth_Probs     | NN_Probs               |   KL_Divergence |
|  0 |              0 | low  | poor    | 0.0545, 0.3651, 0.5804 | 0.3114, 0.3283, 0.3603 |          0.2205 |
+----+----------------+------+---------+------------------------+------------------------+-----------------+
|  1 |              1 | high | good    | 0.3868, 0.1764, 0.4368 | 0.3076, 0.3255, 0.3669 |          0.0568 |
+----+----------------+------+---------+------------------------+------------------------+-----------------+
|  2 |              2 | high | good    | 0.3868, 0.1764, 0.4368 | 0.3076, 0.3255, 0.3669 |          0.0568 |
+----+----------------+------+---------+------------------------+------------------------+-----------------+
|  3 |              3 | high | average | 0.2