<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/NN_Sparse_1_3_Relu_12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.26-py3-none-any.whl.metadata (9.1 kB)
Downloading pgmpy-0.1.26-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pgmpy
Successfully installed pgmpy-0.1.26


In [2]:
import numpy as np
import pandas as pd
from pgmpy.estimators import HillClimbSearch, BicScore, MaximumLikelihoodEstimator
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import BayesianEstimator
from sklearn.model_selection import train_test_split
from scipy.stats import entropy
from tabulate import tabulate

from tensorflow.keras import models, layers, regularizers, callbacks
from sklearn.model_selection import train_test_split

# Bayesian Network Data Generation 500, ..., 20000 Samples (sparse)

In [81]:
# Function to generate CPDs for the sparse structure with 12 nodes influencing SP
def generate_cpds_sparse_12_total_nodes():
    # Generate random probabilities for the independent nodes
    ir_probs = np.random.rand(3)
    ir_probs /= ir_probs.sum()

    # Create mixed dependency structures for the nodes
    ei_given_ir_probs = np.random.rand(3, 3)
    ei_given_ir_probs /= ei_given_ir_probs.sum(axis=0, keepdims=True)

    irt_given_ei_probs = np.random.rand(3, 3)
    irt_given_ei_probs /= irt_given_ei_probs.sum(axis=0, keepdims=True)

    ms_given_ir_probs = np.random.rand(3, 3)
    ms_given_ir_probs /= ms_given_ir_probs.sum(axis=0, keepdims=True)

    geo_given_ei_probs = np.random.rand(3, 3)
    geo_given_ei_probs /= geo_given_ei_probs.sum(axis=0, keepdims=True)

    ue_given_irt_probs = np.random.rand(3, 3)
    ue_given_irt_probs /= ue_given_irt_probs.sum(axis=0, keepdims=True)

    gdp_given_geo_probs = np.random.rand(3, 3)
    gdp_given_geo_probs /= gdp_given_geo_probs.sum(axis=0, keepdims=True)

    inf_given_ms_probs = np.random.rand(3, 3)
    inf_given_ms_probs /= inf_given_ms_probs.sum(axis=0, keepdims=True)

    # New nodes with mixed dependencies
    irp_given_ue_probs = np.random.rand(3, 3)
    irp_given_ue_probs /= irp_given_ue_probs.sum(axis=0, keepdims=True)

    inv_given_gdp_probs = np.random.rand(3, 3)
    inv_given_gdp_probs /= inv_given_gdp_probs.sum(axis=0, keepdims=True)

    ci_given_inf_probs = np.random.rand(3, 3)
    ci_given_inf_probs /= ci_given_inf_probs.sum(axis=0, keepdims=True)

    # SP depends on all 12 nodes with mixed dependencies
    sp_probs = np.random.rand(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3)
    sp_probs /= sp_probs.sum(axis=0, keepdims=True)

    return (ir_probs, ei_given_ir_probs, irt_given_ei_probs, ms_given_ir_probs,
            geo_given_ei_probs, ue_given_irt_probs, gdp_given_geo_probs, inf_given_ms_probs,
            irp_given_ue_probs, inv_given_gdp_probs, ci_given_inf_probs, sp_probs)

# Function to generate and save samples with the sparse structure of 12 nodes total
def generate_and_save_samples_sparse_12_total_nodes(ir_probs, ei_probs, irt_probs, ms_probs, geo_probs, ue_probs, gdp_probs, inf_probs, irp_probs, inv_probs, ci_probs, sp_probs, sample_size, filename):
    output_data = []

    # Generate `sample_size` random samples
    for _ in range(sample_size):
        # Sample the independent node first
        ir_state_idx = np.random.choice(3, p=ir_probs)
        ir_state = ['low', 'medium', 'high'][ir_state_idx]

        # Sample dependent nodes based on the new mixed dependency structure
        ei_probs_given_ir = ei_probs[:, ir_state_idx]
        ei_state_idx = np.random.choice(3, p=ei_probs_given_ir)
        ei_state = ['poor', 'average', 'good'][ei_state_idx]

        irt_probs_given_ei = irt_probs[:, ei_state_idx]
        irt_state_idx = np.random.choice(3, p=irt_probs_given_ei)
        irt_state = ['weak', 'moderate', 'strong'][irt_state_idx]

        ms_probs_given_ir = ms_probs[:, ir_state_idx]
        ms_state_idx = np.random.choice(3, p=ms_probs_given_ir)
        ms_state = ['low', 'medium', 'high'][ms_state_idx]

        geo_probs_given_ei = geo_probs[:, ei_state_idx]
        geo_state_idx = np.random.choice(3, p=geo_probs_given_ei)
        geo_state = ['urban', 'suburban', 'rural'][geo_state_idx]

        ue_probs_given_irt = ue_probs[:, irt_state_idx]
        ue_state_idx = np.random.choice(3, p=ue_probs_given_irt)
        ue_state = ['low', 'medium', 'high'][ue_state_idx]

        gdp_probs_given_geo = gdp_probs[:, geo_state_idx]
        gdp_state_idx = np.random.choice(3, p=gdp_probs_given_geo)
        gdp_state = ['low', 'medium', 'high'][gdp_state_idx]

        inf_probs_given_ms = inf_probs[:, ms_state_idx]
        inf_state_idx = np.random.choice(3, p=inf_probs_given_ms)
        inf_state = ['low', 'medium', 'high'][inf_state_idx]

        irp_probs_given_ue = irp_probs[:, ue_state_idx]
        irp_state_idx = np.random.choice(3, p=irp_probs_given_ue)
        irp_state = ['low', 'medium', 'high'][irp_state_idx]

        inv_probs_given_gdp = inv_probs[:, gdp_state_idx]
        inv_state_idx = np.random.choice(3, p=inv_probs_given_gdp)
        inv_state = ['low', 'medium', 'high'][inv_state_idx]

        ci_probs_given_inf = ci_probs[:, inf_state_idx]
        ci_state_idx = np.random.choice(3, p=ci_probs_given_inf)
        ci_state = ['low', 'medium', 'high'][ci_state_idx]

        # Calculate SP probability based on the state of each node (sparse dependency on each)
        sp_probs_given_all = sp_probs[:, ir_state_idx, ei_state_idx, irt_state_idx, ms_state_idx, geo_state_idx, ue_state_idx, gdp_state_idx, inf_state_idx, irp_state_idx, inv_state_idx, ci_state_idx]
        sp_state_idx = np.random.choice(3, p=sp_probs_given_all)
        sp_state = ['decrease', 'stable', 'increase'][sp_state_idx]

        # Append sample data to output list including probabilities for all nodes
        output_data.append({
            'IR_State': ir_state,
            'EI_State': ei_state,
            'IRT_State': irt_state,
            'MS_State': ms_state,
            'GEO_State': geo_state,
            'UE_State': ue_state,
            'GDP_State': gdp_state,
            'INF_State': inf_state,
            'IRP_State': irp_state,
            'INV_State': inv_state,
            'CI_State': ci_state,
            'SP_Probabilities (decrease, stable, increase)': ', '.join([f'{prob:.4f}' for prob in sp_probs_given_all]),
            'Chosen_SP_State': sp_state
        })

    # Create a DataFrame from the output data
    output_df = pd.DataFrame(output_data)

    # Save the output DataFrame to a CSV file
    output_df.to_csv(filename, index=False)

    # Print the first few rows for visual confirmation
    print(f"\nSample size: {sample_size} - First few rows of generated samples:\n")
    print(tabulate(output_df.head(), headers='keys', tablefmt='grid'))

# Generate and save samples for sample sizes
sample_sizes = [50, 100, 500, 1000, 5000, 10000, 15000, 20000]

for size in sample_sizes:
    (ir_probs, ei_probs, irt_probs, ms_probs, geo_probs, ue_probs, gdp_probs, inf_probs, irp_probs, inv_probs, ci_probs, sp_probs) = generate_cpds_sparse_12_total_nodes()
    generate_and_save_samples_sparse_12_total_nodes(ir_probs, ei_probs, irt_probs, ms_probs, geo_probs, ue_probs, gdp_probs, inf_probs, irp_probs, inv_probs, ci_probs, sp_probs, size, f'combined_probabilities_{size}.csv')

print("\nGeneration and saving of individual samples complete for all sample sizes!")


Sample size: 50 - First few rows of generated samples:

+----+------------+------------+-------------+------------+-------------+------------+-------------+-------------+-------------+-------------+------------+-------------------------------------------------+-------------------+
|    | IR_State   | EI_State   | IRT_State   | MS_State   | GEO_State   | UE_State   | GDP_State   | INF_State   | IRP_State   | INV_State   | CI_State   | SP_Probabilities (decrease, stable, increase)   | Chosen_SP_State   |
|  0 | medium     | poor       | weak        | high       | urban       | high       | medium      | medium      | medium      | high        | medium     | 0.4686, 0.5207, 0.0107                          | stable            |
+----+------------+------------+-------------+------------+-------------+------------+-------------+-------------+-------------+-------------+------------+-------------------------------------------------+-------------------+
|  1 | medium     | poor       | weak  

# NN & KL-Div

In [82]:
# Sample sizes to loop through
sample_sizes = [50, 100, 500, 1000, 5000, 10000, 15000, 20000]

# Define the Neural Network architecture with L2 regularization
def create_nn_model(hidden_layers=1, nodes_per_layer=3, l2_lambda=0.01):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=(11,)))  # Updated input shape to match the number of features

    # Hidden layers with L2 regularization and Dropout
    for layer_num in range(hidden_layers):
        model.add(layers.Dense(
            nodes_per_layer,
            activation='relu',
            kernel_regularizer=regularizers.l2(l2_lambda),  # L2 regularization
            name=f"hidden_layer_{layer_num + 1}"
        ))
        model.add(layers.Dropout(0.2))  # Dropout layer to reduce overfitting

    # Output layer (3 classes: decrease, stable, increase) with L2 regularization
    model.add(layers.Dense(
        3,
        activation='softmax',
        kernel_regularizer=regularizers.l2(l2_lambda),
        name="output_layer"
    ))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Prepare a dictionary to store extracted data for each sample size
extracted_data = {}

# Extract the required columns from all sample sizes first
for size in sample_sizes:
    outcomes_file = f'combined_probabilities_{size}.csv'
    df = pd.read_csv(outcomes_file)

    # Include new nodes in the required columns, removed duplicate 'INF_State'
    required_columns = ['IR_State', 'EI_State', 'IRT_State', 'MS_State', 'GEO_State',
                        'UE_State', 'GDP_State', 'INF_State', 'IRP_State', 'INV_State', 'CI_State', 'Chosen_SP_State']
    df_extracted = df[required_columns]

    # Encode categorical variables for all states
    ir_map = {'low': 0, 'medium': 1, 'high': 2}
    ei_map = {'poor': 0, 'average': 1, 'good': 2}
    irt_map = {'weak': 0, 'moderate': 1, 'strong': 2}
    ms_map = {'low': 0, 'medium': 1, 'high': 2}
    geo_map = {'urban': 0, 'suburban': 1, 'rural': 2}
    ue_probs_map = {'low': 0, 'medium': 1, 'high': 2}
    gdp_probs_map = {'low': 0, 'medium': 1, 'high': 2}
    inf_probs_map = {'low': 0, 'medium': 1, 'high': 2}
    irp_probs_map = {'low': 0, 'medium': 1, 'high': 2}
    inv_probs_map = {'low': 0, 'medium': 1, 'high': 2}
    ci_probs_map = {'low': 0, 'medium': 1, 'high': 2}
    sp_map = {'decrease': 0, 'stable': 1, 'increase': 2}

    df_extracted['IR_encoded'] = df_extracted['IR_State'].map(ir_map)
    df_extracted['EI_encoded'] = df_extracted['EI_State'].map(ei_map)
    df_extracted['IRT_encoded'] = df_extracted['IRT_State'].map(irt_map)
    df_extracted['MS_encoded'] = df_extracted['MS_State'].map(ms_map)
    df_extracted['GEO_encoded'] = df_extracted['GEO_State'].map(geo_map)
    df_extracted['UE_encoded'] = df_extracted['UE_State'].map(ue_probs_map)
    df_extracted['GDP_encoded'] = df_extracted['GDP_State'].map(gdp_probs_map)
    df_extracted['INF_encoded'] = df_extracted['INF_State'].map(inf_probs_map)
    df_extracted['IRP_encoded'] = df_extracted['IRP_State'].map(irp_probs_map)
    df_extracted['INV_encoded'] = df_extracted['INV_State'].map(inv_probs_map)
    df_extracted['CI_encoded'] = df_extracted['CI_State'].map(ci_probs_map)
    df_extracted['SP_encoded'] = df_extracted['Chosen_SP_State'].map(sp_map)

    extracted_data[size] = df_extracted

# Initialize list to store K-L divergence and standard deviation results
results = []
epsilon = 1e-10  # Small value for smoothing

for size in sample_sizes:
    df = extracted_data[size]

    # Features (all encoded columns) and labels (SP)
    X = df[['IR_encoded', 'EI_encoded', 'IRT_encoded', 'MS_encoded', 'GEO_encoded', 'UE_encoded',
            'GDP_encoded', 'INF_encoded', 'IRP_encoded', 'INV_encoded', 'CI_encoded']]
    y = df['SP_encoded']

    # Split into training, validation, and test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=True, random_state=42)

    # Create and train the Neural Network model
    nn_model = create_nn_model(hidden_layers=1, nodes_per_layer=3, l2_lambda=0.01)
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    nn_model.fit(X_train, y_train, epochs=25, batch_size=16, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose=0)

    # Evaluate model accuracy
    train_loss, train_accuracy = nn_model.evaluate(X_train, y_train, verbose=0)
    val_loss, val_accuracy = nn_model.evaluate(X_val, y_val, verbose=0)
    test_loss, test_accuracy = nn_model.evaluate(X_test, y_test, verbose=0)

    print(f"\nSample size: {size}")
    print(f"Training Accuracy: {train_accuracy:.4f}")
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # Predict on test data
    predictions = nn_model.predict(X_test)
    predicted_classes = predictions.argmax(axis=1)

    # Calculate ground truth and predicted probabilities
    ground_truth_probabilities = y_test.value_counts(normalize=True).sort_index()
    predicted_probabilities = pd.Series(predicted_classes).value_counts(normalize=True).sort_index()

    # Reindex both distributions and add smoothing
    all_categories = sorted(set(ground_truth_probabilities.index).union(set(predicted_probabilities.index)))
    ground_truth_probabilities = ground_truth_probabilities.reindex(all_categories, fill_value=epsilon)
    predicted_probabilities = predicted_probabilities.reindex(all_categories, fill_value=epsilon)

    # Calculate K-L divergence and standard deviation
    kl_divergence = entropy(pk=ground_truth_probabilities, qk=predicted_probabilities)
    std_dev = np.std(predicted_probabilities - ground_truth_probabilities)

    results.append({
        'Sample_Size': size,
        'K-L_Divergence': kl_divergence,
        'Standard_Deviation': std_dev
    })

    print(f"K-L Divergence: {kl_divergence:.4f}")
    print(f"Standard Deviation: {std_dev:.4f}")

    # Map integers back to the original SP labels
    sp_reverse_map = ['decrease', 'stable', 'increase']
    predicted_labels = [sp_reverse_map[label] for label in predicted_classes]

    # Create DataFrame for displaying nodes, predicted SP, and chosen SP
    result_df = pd.DataFrame({
        'IR_State': df['IR_State'].iloc[X_test.index],
        'EI_State': df['EI_State'].iloc[X_test.index],
        'IRT_State': df['IRT_State'].iloc[X_test.index],
        'MS_State': df['MS_State'].iloc[X_test.index],
        'GEO_State': df['GEO_State'].iloc[X_test.index],
        'UE_State': df['UE_State'].iloc[X_test.index],
        'GDP_State': df['GDP_State'].iloc[X_test.index],
        'INF_State': df['INF_State'].iloc[X_test.index],
        'IRP_State': df['IRP_State'].iloc[X_test.index],
        'INV_State': df['INV_State'].iloc[X_test.index],
        'CI_State': df['CI_State'].iloc[X_test.index],
        'Chosen_SP': df['Chosen_SP_State'].iloc[X_test.index],
        'Predicted_SP': predicted_labels
    })
    print(f"\nPredicted Results for {size} samples (First 10 rows):")
    print(result_df.head(10))

    # Save results for this sample size in a dedicated CSV
    result_df.to_csv(f'test_results_{size}.csv', index=False)

# Save only K-L and Standard Deviation results to a summary file
results_df = pd.DataFrame(results)
results_df.to_csv('kl_std_results_summary.csv', index=False)

print("\nAll K-L divergence and standard deviation results have been saved in 'kl_std_results_summary.csv'.")




Sample size: 50
Training Accuracy: 0.4286
Validation Accuracy: 0.2857
Test Accuracy: 0.7500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
K-L Divergence: 5.0208
Standard Deviation: 0.1768

Predicted Results for 50 samples (First 10 rows):
   IR_State EI_State IRT_State MS_State GEO_State UE_State GDP_State  \
19   medium     good  moderate   medium     urban     high      high   
4    medium     poor  moderate   medium  suburban   medium      high   
13   medium     poor      weak   medium     urban   medium      high   
8    medium     poor  moderate     high     urban   medium      high   
48   medium  average    strong   medium  suburban     high      high   
32   medium     good  moderate     high     urban      low      high   
30      low  average      weak      low  suburban   medium    medium   
39   medium     poor  moderate      low     rural      low       low   

   INF_State IRP_State INV_State CI_State Chosen_SP Predicted_SP  
19       low      




Sample size: 100
Training Accuracy: 0.4286
Validation Accuracy: 0.2667
Test Accuracy: 0.1333
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
K-L Divergence: 10.4019
Standard Deviation: 0.4838

Predicted Results for 100 samples (First 10 rows):
   IR_State EI_State IRT_State MS_State GEO_State UE_State GDP_State  \
96     high     poor    strong      low  suburban      low       low   
4    medium     good    strong   medium     urban   medium      high   
42     high     good      weak      low     urban   medium    medium   
77   medium     poor    strong   medium  suburban      low    medium   
10     high     poor  moderate     high  suburban      low    medium   
0       low     poor      weak   medium  suburban   medium    medium   
9       low  average    strong      low     rural   medium      high   
69   medium     good      weak   medium  suburban      low    medium   
73     high  average      weak      low     urban      low       low   
83   medium




Sample size: 500
Training Accuracy: 0.4057
Validation Accuracy: 0.5067
Test Accuracy: 0.3467
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
K-L Divergence: 6.7479
Standard Deviation: 0.2304

Predicted Results for 500 samples (First 10 rows):
    IR_State EI_State IRT_State MS_State GEO_State UE_State GDP_State  \
290     high     poor      weak      low     urban   medium       low   
316     high     good  moderate      low  suburban   medium       low   
117     high     good      weak      low  suburban   medium    medium   
455     high     poor  moderate      low     rural     high      high   
268   medium     good    strong   medium  suburban     high    medium   
336   medium     good  moderate   medium  suburban   medium    medium   
79       low     poor      weak      low     urban      low    medium   
208     high  average      weak      low  suburban     high    medium   
238     high     poor  moderate      low     urban   medium       low   
47




Sample size: 1000
Training Accuracy: 0.3557
Validation Accuracy: 0.3000
Test Accuracy: 0.2667
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
K-L Divergence: 0.7222
Standard Deviation: 0.2548

Predicted Results for 1000 samples (First 10 rows):
    IR_State EI_State IRT_State MS_State GEO_State UE_State GDP_State  \
557     high  average  moderate      low     urban     high      high   
798   medium     good  moderate     high     urban      low    medium   
977     high     good  moderate      low     rural      low      high   
136      low     good      weak     high     rural   medium       low   
575      low     poor    strong     high  suburban     high    medium   
544      low     poor  moderate      low     rural      low    medium   
332   medium  average    strong      low     urban   medium    medium   
917      low  average    strong      low     urban      low       low   
678     high     poor    strong      low     urban     high    medium   





Sample size: 5000
Training Accuracy: 0.3431
Validation Accuracy: 0.3453
Test Accuracy: 0.3240
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
K-L Divergence: 14.4673
Standard Deviation: 0.4781

Predicted Results for 5000 samples (First 10 rows):
     IR_State EI_State IRT_State MS_State GEO_State UE_State GDP_State  \
790    medium     good    strong      low  suburban      low       low   
2879     high     good  moderate     high     rural      low      high   
2372   medium     poor      weak     high     urban   medium       low   
1351   medium  average  moderate   medium     rural     high       low   
3382      low  average    strong     high     rural     high       low   
3433     high     good  moderate     high     rural   medium    medium   
1129   medium     poor  moderate   medium  suburban   medium       low   
549    medium     poor  moderate      low  suburban   medium    medium   
2835   medium  average    strong      low     rural     high  




Sample size: 10000
Training Accuracy: 0.3406
Validation Accuracy: 0.3113
Test Accuracy: 0.3287
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 14.3597
Standard Deviation: 0.4748

Predicted Results for 10000 samples (First 10 rows):
     IR_State EI_State IRT_State MS_State GEO_State UE_State GDP_State  \
2697      low  average  moderate      low     rural   medium      high   
6871     high     good    strong     high     urban     high    medium   
3487   medium     poor      weak     high     rural   medium    medium   
92       high     good    strong   medium     rural   medium       low   
9537     high  average      weak     high  suburban     high    medium   
3205   medium  average      weak      low  suburban     high       low   
6641   medium     poor      weak      low     urban   medium    medium   
8909     high     good  moderate     high     urban      low      high   
2884     high     good  moderate     high     urban   medium




Sample size: 15000
Training Accuracy: 0.3336
Validation Accuracy: 0.3480
Test Accuracy: 0.3200
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 14.5594
Standard Deviation: 0.4808

Predicted Results for 15000 samples (First 10 rows):
      IR_State EI_State IRT_State MS_State GEO_State UE_State GDP_State  \
8602       low  average      weak   medium     urban      low      high   
438        low     good      weak      low     rural   medium      high   
8094      high  average  moderate   medium  suburban   medium       low   
14355      low  average    strong      low  suburban     high    medium   
8581      high     poor  moderate      low  suburban   medium    medium   
12358     high  average  moderate      low  suburban   medium      high   
511        low  average      weak      low  suburban      low    medium   
6594    medium     good    strong   medium     urban     high    medium   
5245      high  average      weak      low  suburba




Sample size: 20000
Training Accuracy: 0.3341
Validation Accuracy: 0.3357
Test Accuracy: 0.3423
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
K-L Divergence: 14.0449
Standard Deviation: 0.4650

Predicted Results for 20000 samples (First 10 rows):
      IR_State EI_State IRT_State MS_State GEO_State UE_State GDP_State  \
5348    medium     good    strong     high     rural   medium    medium   
339       high     good      weak     high     urban   medium       low   
13591   medium     poor  moderate      low     rural      low    medium   
8153    medium  average      weak      low  suburban      low      high   
16345      low     good      weak     high     rural   medium    medium   
16404   medium     poor      weak     high     urban     high       low   
17185      low  average  moderate      low     rural      low    medium   
5709    medium     poor    strong     high     rural   medium       low   
13020   medium     poor    strong      low  suburba