In [4]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import time
import random

In [5]:
random.seed(42)
np.random.seed(42)
def create_dataset(original_data, size):
    if size <= len(original_data):
        return original_data.sample(size, random_state=42)
    else:
        return original_data.sample(size, replace=True, random_state=42)

df_data = pd.read_csv("dfdata.csv")

X = df_data.drop('outcome', axis=1)
y = df_data['outcome']

configs = [
    {'size': 1000, 'hidden_layers': (4,)},
    {'size': 10000, 'hidden_layers': (4,)},
    {'size': 100000, 'hidden_layers': (4,)},
    {'size': 1000, 'hidden_layers': (4, 4)},
    {'size': 10000, 'hidden_layers': (4, 4)},
    {'size': 100000, 'hidden_layers': (4, 4)}
]

results = []


In [7]:
for config in configs:
    size = config['size']
    hidden_layers = config['hidden_layers']

    dataset = create_dataset(df_data, size)
    X_sample = dataset.drop('outcome', axis=1)
    y_sample = dataset['outcome']

    X_train, X_val, y_train, y_val = train_test_split(X_sample, y_sample, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    start_time = time.time()

    model = MLPClassifier(
        hidden_layer_sizes=hidden_layers,
        max_iter=100,
        early_stopping=True,
        n_iter_no_change=10,
        random_state=42,
        learning_rate_init=0.001
    )

    model.fit(X_train_scaled, y_train)

    execution_time = time.time() - start_time

    train_accuracy = accuracy_score(y_train, model.predict(X_train_scaled))
    train_error = 1 - train_accuracy

    val_accuracy = accuracy_score(y_val, model.predict(X_val_scaled))
    val_error = 1 - val_accuracy

    # Store results
    results.append({
        'Data size': size,
        'Configuration': f"{len(hidden_layers)} hidden layer{'s' if len(hidden_layers) > 1 else ''} {hidden_layers}",
        'Training error': f"{train_error:.4f}",
        'Validation error': f"{val_error:.4f}",
        'Time of execution': f"{execution_time:.2f} s"
    })

# Display results
results_df = pd.DataFrame(results)
print(results_df)

    Data size           Configuration Training error Validation error  \
0        1000     1 hidden layer (4,)         0.2400           0.2800   
1       10000     1 hidden layer (4,)         0.0111           0.0120   
2      100000     1 hidden layer (4,)         0.0008           0.0012   
3        1000  2 hidden layers (4, 4)         0.2212           0.2550   
4       10000  2 hidden layers (4, 4)         0.2389           0.2490   
5      100000  2 hidden layers (4, 4)         0.0008           0.0010   
6        1000     1 hidden layer (4,)         0.2400           0.2800   
7       10000     1 hidden layer (4,)         0.0111           0.0120   
8      100000     1 hidden layer (4,)         0.0008           0.0012   
9        1000  2 hidden layers (4, 4)         0.2212           0.2550   
10      10000  2 hidden layers (4, 4)         0.2389           0.2490   
11     100000  2 hidden layers (4, 4)         0.0008           0.0010   

   Time of execution  
0             0.07 s  
1   