In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Disabling TensorFlow warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Function to train and evaluate the model
def train_model(file_path, num_hidden_layers=1):
    # Load dataset
    data = pd.read_csv(file_path)
    X = data.drop(columns=['outcome']).values
    y = data['outcome'].values

    # Normalizing input features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Splitting into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Defining the model
    model = Sequential()
    model.add(Dense(4, input_shape=(X.shape[1],), activation='relu'))

    if num_hidden_layers == 2:
        model.add(Dense(4, activation='relu'))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Tracking the time
    start_time = time.time()
    history = model.fit(X_train, y_train,
                        epochs=10,
                        batch_size=32,
                        verbose=0,
                        validation_data=(X_val, y_val))
    end_time = time.time()

    # Collecting the final metrics
    train_error = history.history['loss'][-1]
    val_error = history.history['val_loss'][-1]
    duration = round(end_time - start_time, 2)

    return train_error, val_error, duration


In [2]:
# File mapping
datasets = {
    1000: "synthetic_logistic_data_1000.csv",
    10000: "synthetic_logistic_data_10000.csv",
    100000: "synthetic_logistic_data_100000.csv"
}

# Store results
results = []

# Loop over datasets and configurations
for size, file_path in datasets.items():
    for layers in [1, 2]:
        print(f"Training on {size} rows, {layers} hidden layer(s)...")
        train_err, val_err, exec_time = train_model(file_path, num_hidden_layers=layers)
        results.append({
            "Data Size": size,
            "Configuration": f"{layers} hidden layer(s), 4 nodes",
            "Training Error": round(train_err, 4),
            "Validation Error": round(val_err, 4),
            "Time (sec)": exec_time
        })

# Display results
results_df = pd.DataFrame(results)
print("\n==== Final Summary ====\n")
print(results_df.to_string(index=False))


Training on 1000 rows, 1 hidden layer(s)...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on 1000 rows, 2 hidden layer(s)...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on 10000 rows, 1 hidden layer(s)...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on 10000 rows, 2 hidden layer(s)...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on 100000 rows, 1 hidden layer(s)...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on 100000 rows, 2 hidden layer(s)...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



==== Final Summary ====

 Data Size              Configuration  Training Error  Validation Error  Time (sec)
      1000 1 hidden layer(s), 4 nodes          0.5136            0.5098        2.09
      1000 2 hidden layer(s), 4 nodes          0.4695            0.4583        2.19
     10000 1 hidden layer(s), 4 nodes          0.0730            0.0699        4.90
     10000 2 hidden layer(s), 4 nodes          0.0242            0.0244        5.20
    100000 1 hidden layer(s), 4 nodes          0.0111            0.0104       33.07
    100000 2 hidden layer(s), 4 nodes          0.0060            0.0065       44.71
