In [2]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

df = pd.read_csv("week11.csv")

# Separate features and target
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Preprocess data - standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define configurations to test
configurations = [
    {"data_size": 1000, "hidden_layers": (4,), "name": "1 hidden layer 4 nodes"},
    {"data_size": 10000, "hidden_layers": (4,), "name": "1 hidden layer 4 nodes"},
    {"data_size": 100000, "hidden_layers": (4,), "name": "1 hidden layer 4 nodes"},
    {"data_size": 1000, "hidden_layers": (4, 4), "name": "2 hidden layers of 4 nodes each"},
    {"data_size": 10000, "hidden_layers": (4, 4), "name": "2 hidden layers of 4 nodes each"},
    {"data_size": 100000, "hidden_layers": (4, 4), "name": "2 hidden layers of 4 nodes each"},
]

# Print header
print(f"{'Data size':<10} {'Configuration':<30} {'Training error':<15} {'Validation error':<15} {'Time (s)':<15}")
print("-" * 80)

# Test each configuration
for config in configurations:
    data_size = min(config["data_size"], len(X_scaled))

    # Take a subset of data if needed
    if data_size < len(X_scaled):
        X_sample, _, y_sample, _ = train_test_split(
            X_scaled, y, train_size=data_size, random_state=42)
    else:
        X_sample, y_sample = X_scaled, y

    # Split into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(
        X_sample, y_sample, test_size=0.2, random_state=42)

    # Train model and time execution
    start_time = time.time()

    model = MLPClassifier(
        hidden_layer_sizes=config["hidden_layers"],
        max_iter=300,          # Reduced iterations for speed
        batch_size='auto',     # Faster with auto batch sizing
        solver='adam',         # Adam optimizer is usually faster
        early_stopping=True,   # Stop when validation doesn't improve
        n_iter_no_change=10,   # Stop after 10 iterations without improvement
        validation_fraction=0.1,
        random_state=42
    )

    model.fit(X_train, y_train)

    exec_time = time.time() - start_time

    # Calculate errors
    train_error = 1 - model.score(X_train, y_train)
    val_error = 1 - model.score(X_val, y_val)

    # Print results
    print(f"{data_size:<10} {config['name']:<30} {train_error:.6f}     {val_error:.6f}     {exec_time:.2f}")

Data size  Configuration                  Training error  Validation error Time (s)       
--------------------------------------------------------------------------------
1000       1 hidden layer 4 nodes         0.255000     0.250000     0.07
10000      1 hidden layer 4 nodes         0.011125     0.016000     0.84
100000     1 hidden layer 4 nodes         0.000500     0.000750     4.34
1000       2 hidden layers of 4 nodes each 0.237500     0.220000     0.04
10000      2 hidden layers of 4 nodes each 0.007250     0.005500     1.12
100000     2 hidden layers of 4 nodes each 0.000812     0.001250     5.16
