In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
# from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from scikeras.wrappers import KerasClassifier


from sklearn.model_selection import GridSearchCV

2024-12-14 14:52:47.987971: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load the Titanic dataset
data = pd.read_csv("../Titanic-Dataset.csv")

# Selecting relevant features and target variable
features = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
target = "Survived"
X = data[features]
y = data[target]

# Preprocessing pipeline
numeric_features = ["Age", "SibSp", "Parch", "Fare"]
categorical_features = ["Pclass", "Sex", "Embarked"]

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Apply preprocessing
X_processed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [3]:
# Build the MLP model
def create_mlp(learning_rate=0.001, activation='relu', num_layers=2, units_per_layer=32, dropout_rate=0.2):
    model = Sequential()
    model.add(Dense(units_per_layer, activation=activation, input_dim=X_train.shape[1]))
    for _ in range(num_layers - 1):
        model.add(Dense(units_per_layer, activation=activation))
        model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))  # Binary output
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy', 'binary_accuracy'])
    return model

In [4]:
# Wrap the model for GridSearchCV
model = KerasClassifier(model=create_mlp, verbose=0)

# Hyperparameter grid
param_grid = {
    'model__learning_rate': [0.001, 0.01],
    'model__activation': ['relu', 'softmax', 'tanh'],
    'model__num_layers': [1, 2],
    'model__units_per_layer': [16, 32],
    'model__dropout_rate': [0.2, 0.4],
    'batch_size': [8, 16, 32],
    'epochs': [10]
}

# GridSearchCV setup
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, verbose=1, refit=True)

# Fit the model
grid_result = grid.fit(X_train, y_train)

Fitting 3 folds for each of 144 candidates, totalling 432 fits


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

In [5]:
# Best parameters and results
print("Best parameters:", grid_result.best_params_)
print("Best score:", grid_result.best_score_)

print("Best Estimator:", grid_result.best_estimator_)

# Evaluate the best model on the test set
test_accuracy = grid_result.best_estimator_.score(X_test, y_test)
# test_loss = grid_result.best_estimator_.loss(, y_test)
print(f"Test Accuracy: {test_accuracy}")

Best parameters: {'batch_size': 8, 'epochs': 10, 'model__activation': 'relu', 'model__dropout_rate': 0.4, 'model__learning_rate': 0.01, 'model__num_layers': 2, 'model__units_per_layer': 16}
Best score: 0.828670708789845
Best Estimator: KerasClassifier(
	model=<function create_mlp at 0x1614585e0>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=8
	validation_batch_size=None
	verbose=0
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=10
	class_weight=None
	model__activation=relu
	model__dropout_rate=0.4
	model__learning_rate=0.01
	model__num_layers=2
	model__units_per_layer=16
)
Test Accuracy: 0.8044692737430168


In [6]:
# Wrap the model for GridSearchCV
model = KerasClassifier(model=create_mlp, verbose=0)

# Hyperparameter grid
param_grid = {
    'model__learning_rate': [0.001, 0.01],
    'model__activation': ['relu', 'softmax', 'tanh'],
    'model__num_layers': [1, 2],
    'model__units_per_layer': [16, 32],
    'model__dropout_rate': [0.2, 0.4],
    'batch_size': [8, 16, 32],
    'epochs': [50]
}

# GridSearchCV setup
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, verbose=1, refit=True)

# Fit the model
grid_result = grid.fit(X_train, y_train)

Fitting 3 folds for each of 144 candidates, totalling 432 fits


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **