In [None]:

#### ---- RandomForestClassifier VS  MLPClassifier on titanic dataset ---- ####

import warnings
import time
import pandas as pd
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

warnings.simplefilter(action = "ignore", category = Warning)
warnings.filterwarnings("ignore")

df_titanic = pd.read_csv("./titanic.csv")

# features
X = df_titanic.drop(['Survived', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked' ], axis=1)
X['Pclass'] = X['Pclass'].fillna(5)
X['Age'] = X['Age'].fillna(X['Age'].mean())
X['Fare'] = X['Fare'].fillna(X['Fare'].mean())

# target
y = df_titanic['Survived']

# split train test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# standard scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

""" 
    ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 
    > RandomForestClassifier
    ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
""" 

rf_param_grid = {

    'n_estimators': [50, 100, 200],
    'max_depth': [None, 3, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]

}

rf = RandomForestClassifier(random_state=42)

# grid search
start_time = time.time()
rf_grid_search = GridSearchCV(rf, rf_param_grid, cv=5, n_jobs=-1)
rf_grid_search.fit(X_train_scaled, y_train)
end_time = time.time()
rf_execution_time = end_time - start_time

# evaluate
best_rf = rf_grid_search.best_estimator_
y_pred_rf = best_rf.predict(X_test_scaled)
rf_accuracy = accuracy_score(y_test, y_pred_rf)

""" 
    ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 
    > MLPClassifier
    ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
""" 

mlp_param_grid = {

    'hidden_layer_sizes': [(50,), (100,), (100, 4)],
    'activation': ['logistic', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive'],
    'max_iter': [200, 500, 1000]

}

mlp = MLPClassifier(random_state=42)

# grid search
start_time = time.time()
mlp_grid_search = GridSearchCV(mlp, mlp_param_grid, cv=5,  n_jobs=-1)
mlp_grid_search.fit(X_train_scaled, y_train)
end_time = time.time()
mlp_execution_time = end_time - start_time

# evaluate
best_mlp = mlp_grid_search.best_estimator_
y_pred_mlp = best_mlp.predict(X_test_scaled)
mlp_accuracy = accuracy_score(y_test, y_pred_mlp)


In [7]:

""" 
    ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 
    > Results
    ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
""" 

print()
print()
print("    > " + f"Accuracy with RandomForestClassifier: {rf_accuracy * 100:.2f}%")
print("      " + str(rf_grid_search.best_params_))
print("      " + "Execution time:" + str(round(rf_execution_time)))
print()
print("    > " + f"Accuracy with MLPClassifier: {mlp_accuracy * 100:.2f}%")
print("      " + str(mlp_grid_search.best_params_))
print("      " + "Execution time:" + str(round(mlp_execution_time)))
print()
print()




    > Accuracy with RandomForestClassifier: 74.86%
      {'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
      Execution time:32

    > Accuracy with MLPClassifier: 74.86%
      {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'adam'}
      Execution time:205


