In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy

In [5]:
# Function to build and evaluate the model
def train_deep_learning_model(X, y, layers_config, data_size):
    results = {}
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    # Build model
    model = Sequential()
    model.add(Dense(layers_config[0], input_dim=X_train.shape[1], activation='relu'))
    for units in layers_config[1:]:
        model.add(Dense(units, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer=Adam(),
                  loss=BinaryCrossentropy(),
                  metrics=[BinaryAccuracy()])

    start_time = time.time()
    history = model.fit(X_train, y_train,
                        epochs=30,
                        batch_size=32,
                        validation_data=(X_val, y_val),
                        verbose=0)
    end_time = time.time()
    
    # Record metrics
    training_error = 1 - history.history['binary_accuracy'][-1]
    validation_error = 1 - history.history['val_binary_accuracy'][-1]
    exec_time = end_time - start_time

    results['Data Size'] = data_size
    results['Hidden Layers'] = '×'.join(map(str, layers_config))
    results['Training Error'] = round(training_error, 4)
    results['Validation Error'] = round(validation_error, 4)
    results['Execution Time (s)'] = round(exec_time, 2)

    return results

# Run experiments
dataset_sizes = [1000, 10000, 100000]
layer_configs = [[4], [4, 4]]
all_results = []

for size in dataset_sizes:
    df = pd.read_csv(f"synthetic_data{size}.csv")
    X = df.drop('outcome', axis=1).values
    y = df['outcome'].values

    for config in layer_configs:
        result = train_deep_learning_model(X, y, config, size)
        all_results.append(result)

# Save and display results
results_df = pd.DataFrame(all_results)
print(results_df)
results_df.to_csv("deep_learning_results.csv", index=False)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


   Data Size Hidden Layers  Training Error  Validation Error  \
0       1000             4          0.0400            0.0300   
1       1000           4×4          0.0400            0.1000   
2      10000             4          0.0030            0.0040   
3      10000           4×4          0.0021            0.0070   
4     100000             4          0.2419            0.2425   
5     100000           4×4          0.2419            0.2425   

   Execution Time (s)  
0                1.19  
1                1.14  
2                3.19  
3                3.33  
4                9.10  
5                9.49  
