<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/NN_Dense_2_3_Relu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.26-py3-none-any.whl.metadata (9.1 kB)
Downloading pgmpy-0.1.26-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pgmpy
Successfully installed pgmpy-0.1.26


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.sampling import BayesianModelSampling
from tabulate import tabulate

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers, callbacks, regularizers

from scipy.stats import entropy

# Bayesian Network Data Generation 500, ..., 20000 Samples (dense)

In [30]:
# Function to generate CPDs
def generate_cpds():
    # Generate random probabilities for IR
    ir_probs = np.random.rand(3)
    ir_probs /= ir_probs.sum()  # Normalize to make it a valid probability distribution

    # Generate random probabilities for EI given IR
    ei_given_ir_probs = np.random.rand(3, 3)
    ei_given_ir_probs /= ei_given_ir_probs.sum(axis=0, keepdims=True)

    # Generate random probabilities for SP given IR and EI
    sp_probs = np.random.rand(3, 3, 3)
    sp_probs /= sp_probs.sum(axis=0, keepdims=True)

    return ir_probs, ei_given_ir_probs, sp_probs

# Function to generate and save samples
def generate_and_save_samples(ir_probs, ei_probs, sp_probs, sample_size, filename):
    output_data = []

    # Generate `sample_size` random samples
    for _ in range(sample_size):
        # Sample `IR` state based on `IR` probabilities
        ir_state_idx = np.random.choice(3, p=ir_probs)
        ir_state = ['low', 'medium', 'high'][ir_state_idx]
        ir_prob = ir_probs[ir_state_idx]

        # Sample `EI` state based on `EI` probabilities given `IR`
        ei_probs_given_ir = ei_probs[:, ir_state_idx]
        ei_state_idx = np.random.choice(3, p=ei_probs_given_ir)
        ei_state = ['poor', 'average', 'good'][ei_state_idx]
        ei_prob = ei_probs_given_ir[ei_state_idx]

        # Sample `SP` state based on `SP` probabilities given `IR` and `EI`
        sp_probs_given_ir_ei = sp_probs[:, ir_state_idx, ei_state_idx]
        sp_state_idx = np.random.choice(3, p=sp_probs_given_ir_ei)
        sp_state = ['decrease', 'stable', 'increase'][sp_state_idx]
        sp_prob = sp_probs_given_ir_ei[sp_state_idx]

        # Append sample data to output list
        output_data.append({
            'IR_State': ir_state,
            'IR_Prob': f'{ir_prob:.4f}',
            'EI_State': ei_state,
            'EI_Prob': f'{ei_prob:.4f}',
            'SP_Probabilities (decrease, stable, increase)': ', '.join([f'{prob:.4f}' for prob in sp_probs_given_ir_ei]),
            'Chosen_SP_State': sp_state,
            'Chosen_SP_Probability': f'{sp_prob:.4f}'
        })

    # Create a DataFrame from the output data
    output_df = pd.DataFrame(output_data)

    # Save the output DataFrame to a CSV file
    output_df.to_csv(filename, index=False)

    # Print the first few rows for visual confirmation
    print(f"\nSample size: {sample_size} - First few rows of generated samples:\n")
    print(tabulate(output_df.head(), headers='keys', tablefmt='grid'))

# Generate and save samples for sample sizes from 1000 to 10000 every 1000
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

for size in sample_sizes:
    # Generate the CPDs
    ir_probs, ei_given_ir_probs, sp_probs = generate_cpds()

    # Generate and save individual samples for the given sample size
    generate_and_save_samples(ir_probs, ei_given_ir_probs, sp_probs, size, f'combined_probabilities_{size}.csv')

# Notify the user that the process is done
print("\nGeneration and saving of individual samples complete for all sample sizes!")


Sample size: 50 - First few rows of generated samples:

+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|    | IR_State   |   IR_Prob | EI_State   |   EI_Prob | SP_Probabilities (decrease, stable, increase)   | Chosen_SP_State   |   Chosen_SP_Probability |
|  0 | medium     |    0.4177 | average    |    0.4435 | 0.1337, 0.4245, 0.4418                          | stable            |                  0.4245 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  1 | low        |    0.3039 | poor       |    0.2568 | 0.4775, 0.4027, 0.1198                          | stable            |                  0.4027 |
+----+------------+-----------+------------+-----------+-------------------------------------------------+-------------------+-------------------------+
|  2 | low        |    0.

# Hypothesis Model: 500, ..., 20000 Samples (dense) 2 hidden Layer, 3 Neurons Relu

In [31]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Define the Neural Network architecture with L2 regularization
def create_nn_model(hidden_layers=2, nodes_per_layer=3, l2_lambda=0.01):
    model = models.Sequential()

    # Input layer (2 input features: IR_encoded and EI_encoded)
    model.add(layers.InputLayer(input_shape=(2,)))

    # Hidden layers with L2 regularization and Dropout
    for layer_num in range(hidden_layers):
        model.add(layers.Dense(
            nodes_per_layer,
            activation='relu',
            kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
            name=f"hidden_layer_{layer_num + 1}"
        ))
        model.add(layers.Dropout(0.2))  # Dropout layer to reduce overfitting

    # Output layer (3 classes: decrease, stable, increase) with L2 regularization
    model.add(layers.Dense(
        3,
        activation='softmax',
        kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
        name="output_layer"
    ))

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Prepare a dictionary to store the extracted data for each sample size
extracted_data = {}

# Extract the required columns from all sample sizes first
for size in sample_sizes:
    # Load data for the current sample size (adjust the file paths if necessary)
    outcomes_file = f'combined_probabilities_{size}.csv'
    df = pd.read_csv(outcomes_file)

    # Extract only the required columns
    required_columns = ['IR_State', 'EI_State', 'Chosen_SP_State']
    df_extracted = df[required_columns]

    # Manually encode categorical variables for IR, EI, and SP
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
    df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
    df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)

    # Store the extracted and encoded data for later use
    extracted_data[size] = df_extracted

# Loop through each sample size for NN training, validation, and testing
for size in sample_sizes:
    # Retrieve the extracted data for the current sample size
    df = extracted_data[size]

    # Features (IR and EI) and labels (SP)
    X = df[['IR_encoded', 'EI_encoded']]
    y = df['SP_encoded']

    # Refresh the data split for each iteration
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=False, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)

    # Show split confirmation
    print(f"\nSample size: {size}")
    print("Training Data:", X_train.shape, y_train.shape)
    print("Validation Data:", X_val.shape, y_val.shape)
    print("Test Data:", X_test.shape, y_test.shape)

    # Create the Neural Network model with L2 regularization
    nn_model = create_nn_model(hidden_layers=2, nodes_per_layer=3, l2_lambda=0.01)

    # Early stopping callback to prevent overfitting
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    history = nn_model.fit(X_train, y_train,
                           epochs=25,
                           batch_size=16,
                           validation_data=(X_val, y_val),
                           callbacks=[early_stopping],
                           verbose=0)  # Set verbose=0 to avoid too much output

    # Print training, validation, and test accuracy
    train_loss, train_accuracy = nn_model.evaluate(X_train, y_train, verbose=0)
    val_loss, val_accuracy = nn_model.evaluate(X_val, y_val, verbose=0)
    test_loss, test_accuracy = nn_model.evaluate(X_test, y_test, verbose=0)
    print(f"Training Accuracy for {size} samples: {train_accuracy:.4f}")
    print(f"Validation Accuracy for {size} samples: {val_accuracy:.4f}")
    print(f"Test Accuracy for {size} samples: {test_accuracy:.4f}")

    # Make predictions on the test set
    predictions = nn_model.predict(X_test)

    # Convert the predicted probabilities to class labels
    predicted_classes = predictions.argmax(axis=1)

    # Create a list to map integers back to the original SP labels
    sp_reverse_map = ['decrease', 'stable', 'increase']

    # Convert the predicted classes to the original labels
    predicted_labels = [sp_reverse_map[label] for label in predicted_classes]

    # Create a DataFrame for the predicted probabilities
    probs_df = pd.DataFrame(predictions, columns=['Prob_decrease', 'Prob_stable', 'Prob_increase'])

    # Output the IR, EI, predicted SP, and the NN probabilities
    result_df = pd.DataFrame({
        'IR': df['IR_State'].iloc[X_test.index],  # IR column from the original dataframe for the test set
        'EI': df['EI_State'].iloc[X_test.index],  # EI column from the original dataframe for the test set
        'Predicted_SP': predicted_labels           # Predicted SP labels
    })

    # Combine the result with the predicted probabilities
    combined_df = pd.concat([result_df.reset_index(drop=True), probs_df.reset_index(drop=True)], axis=1)

    # Save the test data with predictions to a CSV file
    combined_df.to_csv(f'test_data_nn_{size}.csv', index=False)

    # Show the first few rows of the results for this sample size
    print(f"\nPredicted Results and Probabilities for {size} samples (First 15 rows):")
    print(combined_df.head(15))

# After the loop is done, print this message
print("\nLooping through all sample sizes complete!")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)



Sample size: 50
Training Data: (35, 2) (35,)
Validation Data: (7, 2) (7,)
Test Data: (8, 2) (8,)
Training Accuracy for 50 samples: 0.2286
Validation Accuracy for 50 samples: 0.5714
Test Accuracy for 50 samples: 0.6250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step

Predicted Results and Probabilities for 50 samples (First 15 rows):
       IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0    high     poor     increase       0.194799     0.264753       0.540448
1     low     poor       stable       0.333093     0.334513       0.332394
2     low  average     increase       0.266590     0.316166       0.417244
3    high     poor     increase       0.194799     0.264753       0.540448
4     low     good     increase       0.205176     0.287949       0.506875
5  medium     poor     increase       0.261424     0.305081       0.433495
6  medium  average     increase       0.226040     0.285332       0.488628
7  medium     poor     increase       0.



Training Accuracy for 100 samples: 0.3429
Validation Accuracy for 100 samples: 0.4667
Test Accuracy for 100 samples: 0.4667
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step

Predicted Results and Probabilities for 100 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good       stable       0.325306     0.351687       0.323007
1   medium     poor     increase       0.332065     0.314310       0.353626
2      low     good       stable       0.325306     0.351687       0.323007
3     high  average       stable       0.323207     0.339780       0.337014
4     high  average       stable       0.323207     0.339780       0.337014
5   medium     poor     increase       0.332065     0.314310       0.353626
6   medium     poor     increase       0.332065     0.314310       0.353626
7   medium     good       stable       0.319039     0.358456       0.322505
8     high  average       stable       0.323207   



Training Accuracy for 150 samples: 0.5524
Validation Accuracy for 150 samples: 0.4545
Test Accuracy for 150 samples: 0.4348
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step

Predicted Results and Probabilities for 150 samples (First 15 rows):
      IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   high     poor     decrease       0.353349     0.319910       0.326742
1    low     good     decrease       0.511443     0.270388       0.218170
2   high  average     decrease       0.388698     0.302415       0.308887
3    low  average     decrease       0.473493     0.271861       0.254646
4   high     poor     decrease       0.353349     0.319910       0.326742
5   high     poor     decrease       0.353349     0.319910       0.326742
6    low  average     decrease       0.473493     0.271861       0.254646
7    low     poor     decrease       0.430126     0.281880       0.287993
8   high  average     decrease       0.388698     0.302415       0.3



Training Accuracy for 200 samples: 0.5357
Validation Accuracy for 200 samples: 0.4000
Test Accuracy for 200 samples: 0.5667
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step

Predicted Results and Probabilities for 200 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average       stable       0.319640     0.403910       0.276450
1      low     poor       stable       0.319789     0.393495       0.286717
2   medium     good       stable       0.319714     0.382159       0.298127
3     high     poor     increase       0.332953     0.330103       0.336944
4     high     good       stable       0.319714     0.382159       0.298127
5     high     good       stable       0.319714     0.382159       0.298127
6     high     good       stable       0.319714     0.382159       0.298127
7   medium  average       stable       0.319714     0.382159       0.298127
8     high     poor     increase       0.332953   



Training Accuracy for 250 samples: 0.5086
Validation Accuracy for 250 samples: 0.4595
Test Accuracy for 250 samples: 0.4211
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step

Predicted Results and Probabilities for 250 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average       stable       0.293900     0.411172       0.294928
1   medium     good       stable       0.299467     0.398482       0.302051
2   medium  average       stable       0.293900     0.411172       0.294928
3      low  average       stable       0.319330     0.350130       0.330540
4     high  average       stable       0.293900     0.411172       0.294928
5      low  average       stable       0.319330     0.350130       0.330540
6   medium     poor       stable       0.293900     0.411172       0.294928
7      low  average       stable       0.319330     0.350130       0.330540
8   medium     poor       stable       0.293900   



Training Accuracy for 300 samples: 0.4524
Validation Accuracy for 300 samples: 0.4667
Test Accuracy for 300 samples: 0.5333
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step

Predicted Results and Probabilities for 300 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good     increase       0.285637     0.302275       0.412088
1     high     good     increase       0.285637     0.302275       0.412088
2     high     good     increase       0.285637     0.302275       0.412088
3     high     good     increase       0.285637     0.302275       0.412088
4     high     good     increase       0.285637     0.302275       0.412088
5     high     good     increase       0.285637     0.302275       0.412088
6   medium     good     increase       0.268905     0.309170       0.421925
7   medium     poor     increase       0.322214     0.290129       0.387657
8   medium  average     increase       0.312606   



Training Accuracy for 350 samples: 0.4449
Validation Accuracy for 350 samples: 0.4038
Test Accuracy for 350 samples: 0.4717
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step

Predicted Results and Probabilities for 350 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     decrease       0.390107     0.321866       0.288027
1   medium     poor     decrease       0.407929     0.302581       0.289490
2      low  average     decrease       0.404122     0.306653       0.289225
3   medium     poor     decrease       0.407929     0.302581       0.289490
4      low     poor     decrease       0.390107     0.321866       0.288027
5     high     good     decrease       0.453527     0.255804       0.290669
6     high  average     decrease       0.439663     0.269639       0.290698
7     high  average     decrease       0.439663     0.269639       0.290698
8      low     poor     decrease       0.390107   



Training Accuracy for 400 samples: 0.4750
Validation Accuracy for 400 samples: 0.4667
Test Accuracy for 400 samples: 0.4000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step

Predicted Results and Probabilities for 400 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     increase       0.390944     0.203960       0.405096
1   medium  average     increase       0.337299     0.288399       0.374302
2     high     poor       stable       0.295908     0.362735       0.341357
3      low  average     increase       0.325106     0.308918       0.365976
4   medium  average     increase       0.337299     0.288399       0.374302
5   medium     good     increase       0.390944     0.203960       0.405096
6   medium     poor       stable       0.296087     0.363391       0.340522
7     high     poor       stable       0.295908     0.362735       0.341357
8   medium     good     increase       0.390944   



Training Accuracy for 450 samples: 0.4190
Validation Accuracy for 450 samples: 0.4030
Test Accuracy for 450 samples: 0.4853
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step

Predicted Results and Probabilities for 450 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good     increase       0.286208     0.205097       0.508695
1   medium     poor     increase       0.315134     0.264723       0.420144
2      low     good     increase       0.286208     0.205097       0.508695
3     high  average     increase       0.301350     0.274715       0.423935
4      low     poor     increase       0.322928     0.286127       0.390945
5     high     good     increase       0.283575     0.230719       0.485706
6     high  average     increase       0.301350     0.274715       0.423935
7     high  average     increase       0.301350     0.274715       0.423935
8   medium     good     increase       0.276541   



Training Accuracy for 500 samples: 0.4914
Validation Accuracy for 500 samples: 0.4933
Test Accuracy for 500 samples: 0.5200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step

Predicted Results and Probabilities for 500 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor       stable       0.316941     0.354051       0.329009
1   medium  average       stable       0.316941     0.354051       0.329009
2   medium  average       stable       0.316941     0.354051       0.329009
3      low     poor       stable       0.316941     0.354051       0.329009
4      low     good     decrease       0.366215     0.343838       0.289946
5   medium     good       stable       0.343045     0.349065       0.307890
6      low     good     decrease       0.366215     0.343838       0.289946
7      low     good     decrease       0.366215     0.343838       0.289946
8   medium     poor       stable       0.316941   



Training Accuracy for 550 samples: 0.4857
Validation Accuracy for 550 samples: 0.4512
Test Accuracy for 550 samples: 0.4458
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step

Predicted Results and Probabilities for 550 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good       stable       0.295110     0.437010       0.267880
1   medium     poor       stable       0.261295     0.454824       0.283881
2     high     poor       stable       0.261295     0.454824       0.283881
3     high     good       stable       0.322824     0.422046       0.255130
4     high  average       stable       0.285643     0.442050       0.272307
5   medium     good       stable       0.295110     0.437010       0.267880
6   medium  average       stable       0.261295     0.454824       0.283881
7     high     poor       stable       0.261295     0.454824       0.283881
8     high     poor       stable       0.261295   



Training Accuracy for 600 samples: 0.5381
Validation Accuracy for 600 samples: 0.5000
Test Accuracy for 600 samples: 0.5333
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step

Predicted Results and Probabilities for 600 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     good       stable       0.287464     0.464117       0.248419
1     high     good       stable       0.287464     0.464117       0.248419
2     high     good       stable       0.287464     0.464117       0.248419
3   medium     good       stable       0.193199     0.591854       0.214946
4      low  average       stable       0.206894     0.559294       0.233812
5     high  average       stable       0.314663     0.396617       0.288720
6   medium     good       stable       0.193199     0.591854       0.214946
7   medium     good       stable       0.193199     0.591854       0.214946
8   medium     good       stable       0.193199   



Training Accuracy for 650 samples: 0.4330
Validation Accuracy for 650 samples: 0.3918
Test Accuracy for 650 samples: 0.4490
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step

Predicted Results and Probabilities for 650 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor     increase       0.337038     0.317556       0.345406
1   medium     good       stable       0.355931     0.358218       0.285851
2   medium  average     decrease       0.348369     0.345229       0.306402
3      low     good       stable       0.355093     0.355648       0.289259
4   medium     good       stable       0.355931     0.358218       0.285851
5     high     good       stable       0.351021     0.355383       0.293596
6   medium     good       stable       0.355931     0.358218       0.285851
7     high     poor     increase       0.332331     0.317078       0.350591
8   medium  average     decrease       0.348369   



Training Accuracy for 700 samples: 0.4755
Validation Accuracy for 700 samples: 0.4190
Test Accuracy for 700 samples: 0.4381
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step

Predicted Results and Probabilities for 700 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor       stable       0.341612     0.345754       0.312634
1   medium     good     decrease       0.429966     0.232197       0.337837
2      low     good     decrease       0.395365     0.275260       0.329375
3   medium     good     decrease       0.429966     0.232197       0.337837
4      low     good     decrease       0.395365     0.275260       0.329375
5   medium     good     decrease       0.429966     0.232197       0.337837
6   medium     good     decrease       0.429966     0.232197       0.337837
7     high     poor       stable       0.340880     0.346054       0.313065
8   medium  average     decrease       0.376013   



Training Accuracy for 750 samples: 0.3981
Validation Accuracy for 750 samples: 0.3661
Test Accuracy for 750 samples: 0.3982
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step

Predicted Results and Probabilities for 750 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     decrease       0.384382     0.357953       0.257665
1   medium  average     decrease       0.384382     0.357953       0.257665
2      low     good     decrease       0.386233     0.340729       0.273038
3   medium     poor     decrease       0.379246     0.368222       0.252532
4      low     good     decrease       0.386233     0.340729       0.273038
5   medium     poor     decrease       0.379246     0.368222       0.252532
6      low     good     decrease       0.386233     0.340729       0.273038
7   medium  average     decrease       0.384382     0.357953       0.257665
8   medium  average     decrease       0.384382   



Training Accuracy for 800 samples: 0.4250
Validation Accuracy for 800 samples: 0.4500
Test Accuracy for 800 samples: 0.3583
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step

Predicted Results and Probabilities for 800 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     increase       0.272675     0.309801       0.417524
1   medium  average     increase       0.233775     0.351306       0.414920
2   medium  average     increase       0.233775     0.351306       0.414920
3      low     good     increase       0.281808     0.313352       0.404840
4   medium     poor     increase       0.202812     0.386913       0.410275
5   medium  average     increase       0.233775     0.351306       0.414920
6   medium     poor     increase       0.202812     0.386913       0.410275
7     high  average     increase       0.210113     0.358784       0.431103
8     high  average     increase       0.210113   



Training Accuracy for 850 samples: 0.4571
Validation Accuracy for 850 samples: 0.4252
Test Accuracy for 850 samples: 0.4531
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

Predicted Results and Probabilities for 850 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average     increase       0.181376     0.380848       0.437777
1     high     poor     increase       0.181376     0.380848       0.437777
2      low     poor     increase       0.181376     0.380848       0.437777
3     high  average     increase       0.181376     0.380848       0.437777
4     high  average     increase       0.181376     0.380848       0.437777
5     high     poor     increase       0.181376     0.380848       0.437777
6     high     good     increase       0.181376     0.380848       0.437777
7   medium  average     increase       0.181376     0.380848       0.437777
8     high     poor     increase       0.181376   



Training Accuracy for 900 samples: 0.5127
Validation Accuracy for 900 samples: 0.4296
Test Accuracy for 900 samples: 0.4296
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step

Predicted Results and Probabilities for 900 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor     decrease       0.347800     0.334765       0.317435
1      low     good     increase       0.291602     0.349636       0.358762
2   medium  average       stable       0.329090     0.340088       0.330822
3   medium  average       stable       0.329090     0.340088       0.330822
4     high     good       stable       0.310343     0.345060       0.344597
5     high  average     decrease       0.338965     0.337322       0.323713
6   medium  average       stable       0.329090     0.340088       0.330822
7      low     good     increase       0.291602     0.349636       0.358762
8     high     good       stable       0.310343   



Training Accuracy for 950 samples: 0.4331
Validation Accuracy for 950 samples: 0.4366
Test Accuracy for 950 samples: 0.4755
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step

Predicted Results and Probabilities for 950 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     decrease       0.425618     0.278425       0.295958
1   medium     poor     decrease       0.426220     0.278011       0.295769
2      low  average     decrease       0.425582     0.278449       0.295969
3     high  average     decrease       0.426075     0.278111       0.295815
4   medium     poor     decrease       0.426220     0.278011       0.295769
5     high     good     decrease       0.425582     0.278449       0.295969
6   medium     good     decrease       0.425582     0.278449       0.295969
7     high     good     decrease       0.425582     0.278449       0.295969
8     high     good     decrease       0.425582   



Training Accuracy for 1000 samples: 0.3714
Validation Accuracy for 1000 samples: 0.3133
Test Accuracy for 1000 samples: 0.4200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step

Predicted Results and Probabilities for 1000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     decrease       0.370075     0.297063       0.332862
1   medium     poor     increase       0.329383     0.326408       0.344210
2   medium     poor     increase       0.329383     0.326408       0.344210
3   medium     poor     increase       0.329383     0.326408       0.344210
4   medium     good     decrease       0.364482     0.300760       0.334758
5   medium     good     decrease       0.364482     0.300760       0.334758
6   medium  average     decrease       0.345972     0.313179       0.340849
7   medium     good     decrease       0.364482     0.300760       0.334758
8   medium  average     decrease       0.34597



Training Accuracy for 2000 samples: 0.3629
Validation Accuracy for 2000 samples: 0.3800
Test Accuracy for 2000 samples: 0.3767
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 

Predicted Results and Probabilities for 2000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     poor     decrease       0.364323     0.311757       0.323920
1   medium  average     decrease       0.364319     0.311760       0.323921
2      low  average     decrease       0.363818     0.312384       0.323798
3   medium  average     decrease       0.364319     0.311760       0.323921
4      low     good     decrease       0.362234     0.314364       0.323402
5   medium  average     decrease       0.364319     0.311760       0.323921
6   medium     good     decrease       0.362932     0.313491       0.323577
7   medium  average     decrease       0.364319     0.311760       0.323921
8      low     good     decrease       0.362



Training Accuracy for 3000 samples: 0.4086
Validation Accuracy for 3000 samples: 0.3756
Test Accuracy for 3000 samples: 0.3733
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Predicted Results and Probabilities for 3000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average       stable       0.299099     0.405771        0.29513
1   medium     poor       stable       0.299099     0.405771        0.29513
2   medium     good       stable       0.299099     0.405771        0.29513
3   medium  average       stable       0.299099     0.405771        0.29513
4      low     poor       stable       0.299099     0.405771        0.29513
5   medium     poor       stable       0.299099     0.405771        0.29513
6      low     poor       stable       0.299099     0.405771        0.29513
7      low     poor       stable       0.299099     0.405771        0.29513
8     high     poor       stable       0.2990



Training Accuracy for 4000 samples: 0.5218
Validation Accuracy for 4000 samples: 0.5167
Test Accuracy for 4000 samples: 0.5000
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Predicted Results and Probabilities for 4000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good     decrease       0.434229     0.371794       0.193977
1      low     poor     decrease       0.484601     0.331559       0.183840
2   medium     good       stable       0.392765     0.405960       0.201274
3     high  average     decrease       0.425575     0.378843       0.195582
4   medium     good       stable       0.392765     0.405960       0.201274
5     high     good       stable       0.352660     0.440022       0.207318
6     high     good       stable       0.352660     0.440022       0.207318
7     high     good       stable       0.352660     0.440022       0.207318
8      low     poor     decrease       0.4846



Training Accuracy for 5000 samples: 0.4011
Validation Accuracy for 5000 samples: 0.3840
Test Accuracy for 5000 samples: 0.4240
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 5000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor     decrease       0.421688     0.194701       0.383611
1   medium     poor     decrease       0.421688     0.194701       0.383611
2      low  average     decrease       0.411950     0.217433       0.370618
3   medium  average     decrease       0.390963     0.264441       0.344596
4   medium  average     decrease       0.390963     0.264441       0.344596
5   medium     poor     decrease       0.421688     0.194701       0.383611
6      low     poor     decrease       0.421688     0.194701       0.383611
7   medium     good       stable       0.349412     0.352639       0.297949
8   medium  average     decrease       0.3909



Training Accuracy for 6000 samples: 0.5295
Validation Accuracy for 6000 samples: 0.5467
Test Accuracy for 6000 samples: 0.5122
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 6000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average       stable       0.274948     0.544459       0.180593
1      low     poor       stable       0.272093     0.487750       0.240157
2     high     good       stable       0.274871     0.551314       0.173815
3   medium  average       stable       0.274476     0.520355       0.205168
4     high     good       stable       0.274871     0.551314       0.173815
5     high  average       stable       0.274948     0.544459       0.180593
6   medium     poor       stable       0.274110     0.513020       0.212870
7     high     good       stable       0.274871     0.551314       0.173815
8     high     good       stable       0.2748



Training Accuracy for 7000 samples: 0.6888
Validation Accuracy for 7000 samples: 0.7267
Test Accuracy for 7000 samples: 0.6743
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 7000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     increase       0.249599     0.121329       0.629072
1   medium     good     increase       0.249599     0.121329       0.629072
2      low     good     increase       0.249599     0.121329       0.629072
3      low     good     increase       0.249599     0.121329       0.629072
4   medium  average     increase       0.249783     0.121385       0.628832
5   medium  average     increase       0.249783     0.121385       0.628832
6   medium  average     increase       0.249783     0.121385       0.628832
7      low     good     increase       0.249599     0.121329       0.629072
8      low     good     increase       0.2495



Training Accuracy for 8000 samples: 0.3546
Validation Accuracy for 8000 samples: 0.3233
Test Accuracy for 8000 samples: 0.3400
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 8000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good     decrease       0.354273     0.338874       0.306853
1   medium  average     decrease       0.354273     0.338874       0.306853
2   medium  average     decrease       0.354273     0.338874       0.306853
3   medium     good     decrease       0.354273     0.338874       0.306853
4   medium     good     decrease       0.354273     0.338874       0.306853
5      low     poor     decrease       0.354273     0.338874       0.306853
6      low     good     decrease       0.354273     0.338874       0.306853
7   medium  average     decrease       0.354273     0.338874       0.306853
8     high  average     decrease       0.3542



Training Accuracy for 9000 samples: 0.4633
Validation Accuracy for 9000 samples: 0.4689
Test Accuracy for 9000 samples: 0.4807
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 9000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average       stable       0.315763     0.465121       0.219115
1     high     good       stable       0.315763     0.465121       0.219115
2     high     poor       stable       0.315763     0.465121       0.219115
3   medium  average       stable       0.315763     0.465121       0.219115
4     high     good       stable       0.315763     0.465121       0.219115
5     high     poor       stable       0.315763     0.465121       0.219115
6     high     poor       stable       0.315763     0.465121       0.219115
7     high     good       stable       0.315763     0.465121       0.219115
8     high  average       stable       0.3157



Training Accuracy for 10000 samples: 0.3614
Validation Accuracy for 10000 samples: 0.3440
Test Accuracy for 10000 samples: 0.3500
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 10000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor       stable        0.35588     0.359724       0.284395
1   medium  average       stable        0.35588     0.359724       0.284395
2   medium  average       stable        0.35588     0.359724       0.284395
3   medium     poor       stable        0.35588     0.359724       0.284395
4   medium  average       stable        0.35588     0.359724       0.284395
5   medium  average       stable        0.35588     0.359724       0.284395
6   medium     poor       stable        0.35588     0.359724       0.284395
7   medium  average       stable        0.35588     0.359724       0.284395
8   medium  average       stable        0



Training Accuracy for 11000 samples: 0.4666
Validation Accuracy for 11000 samples: 0.4739
Test Accuracy for 11000 samples: 0.4606
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 11000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     decrease       0.465748     0.281518       0.252734
1      low     good     decrease       0.537149     0.214261       0.248590
2     high  average     decrease       0.391479     0.359196       0.249325
3      low  average     decrease       0.508527     0.240346       0.251127
4     high  average     decrease       0.391479     0.359196       0.249325
5   medium  average     decrease       0.465748     0.281518       0.252734
6     high  average     decrease       0.391479     0.359196       0.249325
7      low     good     decrease       0.537149     0.214261       0.248590
8      low  average     decrease       0.



Training Accuracy for 12000 samples: 0.4356
Validation Accuracy for 12000 samples: 0.4411
Test Accuracy for 12000 samples: 0.4289
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 12000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor     increase       0.209018     0.355207       0.435774
1      low  average     increase       0.209018     0.355207       0.435774
2   medium  average     increase       0.209018     0.355207       0.435774
3     high  average     increase       0.209018     0.355207       0.435774
4   medium     good     increase       0.209018     0.355207       0.435774
5     high     good     increase       0.209018     0.355207       0.435774
6   medium     poor     increase       0.209018     0.355207       0.435774
7   medium     poor     increase       0.209018     0.355207       0.435774
8      low     poor     increase       0.



Training Accuracy for 13000 samples: 0.3979
Validation Accuracy for 13000 samples: 0.3856
Test Accuracy for 13000 samples: 0.3774
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 13000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     poor       stable       0.318487     0.382641       0.298873
1   medium     poor       stable       0.318487     0.382641       0.298873
2   medium     good       stable       0.265286     0.432762       0.301952
3   medium  average       stable       0.293627     0.405507       0.300866
4   medium     poor       stable       0.318487     0.382641       0.298873
5   medium     poor       stable       0.318487     0.382641       0.298873
6   medium  average       stable       0.293627     0.405507       0.300866
7   medium     poor       stable       0.318487     0.382641       0.298873
8   medium     poor       stable       0.



Training Accuracy for 14000 samples: 0.5071
Validation Accuracy for 14000 samples: 0.5019
Test Accuracy for 14000 samples: 0.5033
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 14000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium  average     decrease       0.373959     0.325652       0.300389
1      low     good     increase       0.175153     0.358357       0.466491
2     high  average     decrease       0.506856     0.279137       0.214007
3   medium     poor     decrease       0.487403     0.286777       0.225821
4      low     good     increase       0.175153     0.358357       0.466491
5      low  average     increase       0.250172     0.353463       0.396365
6     high  average     decrease       0.506856     0.279137       0.214007
7   medium     good     increase       0.231743     0.355772       0.412486
8     high     good     decrease       0.



Training Accuracy for 15000 samples: 0.4540
Validation Accuracy for 15000 samples: 0.4458
Test Accuracy for 15000 samples: 0.4600
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Predicted Results and Probabilities for 15000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor     increase       0.363975     0.239792       0.396233
1   medium     good     decrease       0.423914     0.193693       0.382393
2   medium     good     decrease       0.423914     0.193693       0.382393
3     high     poor     increase       0.363975     0.239792       0.396233
4     high     good     increase       0.310794     0.287099       0.402107
5   medium     good     decrease       0.423914     0.193693       0.382393
6     high     poor     increase       0.363975     0.239792       0.396233
7   medium     good     decrease       0.423914     0.193693       0.382393
8     high     poor     increase       0.



Training Accuracy for 16000 samples: 0.4410
Validation Accuracy for 16000 samples: 0.4325
Test Accuracy for 16000 samples: 0.4387
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 16000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high  average     decrease       0.359154     0.286199       0.354647
1     high  average     decrease       0.359154     0.286199       0.354647
2     high  average     decrease       0.359154     0.286199       0.354647
3     high  average     decrease       0.359154     0.286199       0.354647
4     high     poor     increase       0.349269     0.284096       0.366635
5      low     poor     increase       0.350294     0.284323       0.365384
6      low  average     decrease       0.360182     0.286406       0.353412
7      low  average     decrease       0.360182     0.286406       0.353412
8     high     good     decrease       0.



Training Accuracy for 17000 samples: 0.4394
Validation Accuracy for 17000 samples: 0.4329
Test Accuracy for 17000 samples: 0.4353
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 17000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low     good       stable       0.308663     0.412555       0.278782
1      low     poor       stable       0.228851     0.445809       0.325340
2   medium     poor       stable       0.198521     0.509845       0.291634
3      low     good       stable       0.308663     0.412555       0.278782
4   medium     poor       stable       0.198521     0.509845       0.291634
5      low     good       stable       0.308663     0.412555       0.278782
6      low     good       stable       0.308663     0.412555       0.278782
7      low     poor       stable       0.228851     0.445809       0.325340
8      low  average       stable       0.



Training Accuracy for 18000 samples: 0.4694
Validation Accuracy for 18000 samples: 0.4637
Test Accuracy for 18000 samples: 0.4648
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 18000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0      low  average       stable       0.364955     0.384898       0.250146
1   medium  average       stable       0.364865     0.385535       0.249600
2   medium     good       stable       0.358969     0.420790       0.220241
3     high  average       stable       0.364775     0.386171       0.249054
4      low  average       stable       0.364955     0.384898       0.250146
5   medium  average       stable       0.364865     0.385535       0.249600
6     high  average       stable       0.364775     0.386171       0.249054
7      low  average       stable       0.364955     0.384898       0.250146
8     high     good       stable       0.



Training Accuracy for 19000 samples: 0.5374
Validation Accuracy for 19000 samples: 0.5558
Test Accuracy for 19000 samples: 0.5467
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 19000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0   medium     good       stable       0.334859     0.410356       0.254785
1   medium     good       stable       0.334859     0.410356       0.254785
2     high  average       stable       0.212224     0.584004       0.203771
3   medium     poor       stable       0.212184     0.584066       0.203750
4   medium     good       stable       0.334859     0.410356       0.254785
5   medium  average       stable       0.289669     0.471123       0.239208
6     high     good       stable       0.290723     0.469662       0.239616
7     high     poor       stable       0.203160     0.598036       0.198805
8   medium     poor       stable       0.



Training Accuracy for 20000 samples: 0.5280
Validation Accuracy for 20000 samples: 0.5210
Test Accuracy for 20000 samples: 0.5207
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Predicted Results and Probabilities for 20000 samples (First 15 rows):
        IR       EI Predicted_SP  Prob_decrease  Prob_stable  Prob_increase
0     high     poor       stable       0.348407     0.516262       0.135331
1     high     good     decrease       0.503564     0.386972       0.109464
2     high     good     decrease       0.503564     0.386972       0.109464
3   medium     good     decrease       0.494148     0.394701       0.111151
4   medium     poor       stable       0.348391     0.516275       0.135333
5   medium     poor       stable       0.348391     0.516275       0.135333
6     high     poor       stable       0.348407     0.516262       0.135331
7     high     good     decrease       0.503564     0.386972       0.109464
8     high  average       stable       0.

# K-L Divergence NN Dense Data

In [32]:
# Sample sizes to loop through
sample_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]

# Prepare a list to store K-L divergence results
kl_divergence_results = []

# Loop through each sample size
for size in sample_sizes:
    print(f"\nProcessing sample size: {size}")

    # Load the combined BN data for the current sample size
    combined_data_bn = pd.read_csv(f'combined_probabilities_{size}.csv')

    # Split the data into train, validation, and test sets
    X = combined_data_bn[['IR_State', 'EI_State']]
    y = combined_data_bn[['Chosen_SP_State', 'SP_Probabilities (decrease, stable, increase)']]

    # Refresh the data split for each iteration
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=False, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)

    # Get the test indices
    test_indices = X_test.index

    # Get the corresponding rows from the combined BN data using the test indices
    bn_test_data = combined_data_bn.loc[test_indices]

    # Load the corresponding NN test data for the current sample size
    nn_test_data = pd.read_csv(f'test_data_nn_{size}.csv')

    # Extract NN predicted probabilities and BN ground truth probabilities
    nn_probs = nn_test_data[['Prob_decrease', 'Prob_stable', 'Prob_increase']].values
    bn_probs = bn_test_data['SP_Probabilities (decrease, stable, increase)'].apply(
        lambda x: np.array(list(map(float, x.strip('[]').split(','))))
    ).values

    # Calculate K-L divergence between NN predicted probabilities and BN ground truth probabilities
    kl_divergences = []
    output_data = []  # For tabulating output

    for i in range(len(nn_probs)):
        nn_prob = nn_probs[i]
        bn_prob = bn_probs[i]

        # Ensure both are valid probability distributions
        epsilon = 1e-10
        nn_prob = np.clip(nn_prob, epsilon, 1)
        bn_prob = np.clip(bn_prob, epsilon, 1)

        # Normalize to ensure they sum to 1
        nn_prob /= nn_prob.sum()
        bn_prob /= bn_prob.sum()

        # Compute K-L divergence
        kl_div = entropy(bn_prob, nn_prob)
        kl_divergences.append(kl_div)

        # Add data to output for tabulation
        output_data.append({
            'Sample_Index': i,
            'IR': bn_test_data.iloc[i]['IR_State'],
            'EI': bn_test_data.iloc[i]['EI_State'],
            'Ground_Truth_Probs': ', '.join([f'{prob:.4f}' for prob in bn_prob]),
            'NN_Probs': ', '.join([f'{prob:.4f}' for prob in nn_prob]),
            'KL_Divergence': f'{kl_div:.4f}'
        })

    # Create a DataFrame for the output data and tabulate the first few rows
    output_df = pd.DataFrame(output_data)
    print(f"\nK-L Divergence Results for {size} samples (First 5 rows):\n")
    print(tabulate(output_df.head(5), headers='keys', tablefmt='grid'))

    # Calculate and display the average K-L divergence for this sample size
    average_kl_divergence = np.mean(kl_divergences)
    std_kl_divergence = np.std(kl_divergences)
    print(f"\nAverage K-L Divergence for {size} samples: {average_kl_divergence:.4f}, Std Dev: {std_kl_divergence:.4f}")

    # Append the results to the list
    kl_divergence_results.append({
        'Sample_Size': size,
        'Average_KL_Divergence': average_kl_divergence,
        'Std_Dev': std_kl_divergence
    })

# Save the K-L divergence results to a CSV file
kl_divergence_df = pd.DataFrame(kl_divergence_results)
kl_divergence_df.to_csv('kl_div_NN_2_3_dense.csv', index=False)

print("\nAll sample sizes have been processed and K-L divergences calculated. Results saved to 'kl_div_NN_2_3_dense.csv'.")


Processing sample size: 50

K-L Divergence Results for 50 samples (First 5 rows):

+----+----------------+------+---------+------------------------+------------------------+-----------------+
|    |   Sample_Index | IR   | EI      | Ground_Truth_Probs     | NN_Probs               |   KL_Divergence |
|  0 |              0 | high | poor    | 0.0743, 0.3827, 0.5430 | 0.1948, 0.2648, 0.5404 |          0.072  |
+----+----------------+------+---------+------------------------+------------------------+-----------------+
|  1 |              1 | low  | poor    | 0.4775, 0.4027, 0.1198 | 0.3331, 0.3345, 0.3324 |          0.1244 |
+----+----------------+------+---------+------------------------+------------------------+-----------------+
|  2 |              2 | low  | average | 0.2094, 0.4506, 0.3400 | 0.2666, 0.3162, 0.4172 |          0.0395 |
+----+----------------+------+---------+------------------------+------------------------+-----------------+
|  3 |              3 | high | poor    | 0.0