In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

# Load data
df = pd.read_csv("pima_11.csv")

# Features and target
X = df.drop("outcome", axis=1).values
y = df["outcome"].values

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Function to expand dataset to desired size
def expand_data(X, y, size):
    repeats = size // len(X) + 1
    X_exp = np.tile(X, (repeats, 1))[:size]
    y_exp = np.tile(y, repeats)[:size]
    return X_exp, y_exp

# Function to build and train model
def train_model(X, y, hidden_layers=(4,), epochs=10, batch_size=32):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=(X.shape[1],)))
    for units in hidden_layers:
        model.add(tf.keras.layers.Dense(units, activation='relu'))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    start = time.time()
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                        epochs=epochs, batch_size=batch_size, verbose=0)
    end = time.time()

    train_loss = history.history['loss'][-1]
    val_loss = history.history['val_loss'][-1]
    exec_time = end - start

    return train_loss, val_loss, exec_time

# Configurations
sizes = [1000, 10000, 100000]
configs = [(4,), (4, 4)]

# Run all configurations
results = []
for size in sizes:
    X_exp, y_exp = expand_data(X, y, size)
    for hidden in configs:
        train_loss, val_loss, exec_time = train_model(X_exp, y_exp, hidden_layers=hidden)
        results.append((size, hidden, train_loss, val_loss, exec_time))

# Display results
import pandas as pd
result_df = pd.DataFrame(results, columns=["Data Size", "Hidden Layers", "Train Error", "Validation Error", "Execution Time (s)"])

ModuleNotFoundError: No module named 'ace_tools'

In [5]:
print(result_df)

   Data Size Hidden Layers  Train Error  Validation Error  Execution Time (s)
0       1000          (4,)     0.447662          0.386414            3.975968
1       1000        (4, 4)     0.439978          0.421220            2.765510
2      10000          (4,)     0.067075          0.066131            9.446270
3      10000        (4, 4)     0.031297          0.028314            8.492913
4     100000          (4,)     0.010647          0.009515           74.007096
5     100000        (4, 4)     0.005884          0.004808           71.358379
