In [47]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold, train_test_split, GridSearchCV
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from xgboost import XGBClassifier
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier

# Import Data

In [48]:
train_df = pd.read_csv("../train.csv")
test_df = pd.read_csv("../test.csv")


In [49]:
def transform(df):
    object_df = df.select_dtypes(include=['object'])
    columns = object_df.columns
    return pd.get_dummies(object_df, prefix=columns), columns
def getAllColumns(dummy_1, dummy_2):
    return list(set(dummy_1).union(dummy_2))
def concatData(df, dummy, columns, all_columns):
    return pd.concat([df.drop(columns, axis=1), dummy.reindex(columns=all_columns, fill_value=False)], axis=1)
def split(title, df):
    return df.drop(title, axis=1), df[title]

In [52]:
train_df['Surname']

0         Okwudilichukwu
1          Okwudiliolisa
2                  Hsueh
3                    Kao
4              Chiemenam
               ...      
165029              Meng
165030         Okechukwu
165031              Hsia
165032            Hsiung
165033           Ulyanov
Name: Surname, Length: 165034, dtype: object

In [53]:
train_dummy, train_columns = transform(train_df.drop(['id', "CustomerId", "Surname"], axis=1))
test_dummy, test_columns = transform(test_df.drop(['id', "CustomerId", "Surname"], axis=1))
all_columns = getAllColumns(train_dummy, test_dummy)
train_concated = concatData(train_df, train_dummy, train_columns, all_columns)
test_concated = concatData(test_df, test_dummy, test_columns, all_columns)
train_x, train_y = split("Exited", train_concated)
test_x = test_concated

In [54]:

# Define your XGBoost parameters
xg_params = {'n_estimators': [40, 45, 100]}  

# Define your RandomForest parameters
rf_params = {'min_samples_leaf': [1, 5, 10], 'n_estimators': [100, 200]}  

# Define your MLP parameters
mlp_params = {'hidden_layer_sizes': [(128, 64)], 'solver': ['adam'], 'max_iter': [1500]}

# Create instances of your models with default parameters
xg_model = XGBClassifier()
rf_model = RandomForestClassifier()
mlp_model = MLPClassifier()

# Create a StackingClassifier
stacking_model = StackingClassifier(
    estimators=[
        ('xg', xg_model),
        ('rf', rf_model),
    ],
    final_estimator=mlp_model,
    cv=StratifiedKFold(n_splits=7)
)

# Set up parameter grid for GridSearchCV for all models
param_grid = {
    'xg__n_estimators': xg_params['n_estimators'],
    'rf__min_samples_leaf': rf_params['min_samples_leaf'],
    'rf__n_estimators': rf_params['n_estimators'],
    'final_estimator__hidden_layer_sizes': mlp_params['hidden_layer_sizes'],
    'final_estimator__solver': mlp_params['solver'],
    'final_estimator__max_iter': mlp_params['max_iter'],
}

# Perform Grid Search
grid_search = GridSearchCV(stacking_model, param_grid=param_grid, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(train_x, train_y)  # Replace train_x, train_y with your training data

# Print the best parameters
print("Best Parameters:", grid_search.best_params_)


ValueError: 
All the 180 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
180 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\ensemble\_stacking.py", line 658, in fit
    return super().fit(X, y_encoded, sample_weight)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\ensemble\_stacking.py", line 206, in fit
    self.estimators_ = Parallel(n_jobs=self.n_jobs)(
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils\parallel.py", line 65, in __call__
    return super().__call__(iterable_with_config)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\joblib\parallel.py", line 1863, in __call__
    return output if self.return_generator else list(output)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\joblib\parallel.py", line 1792, in _get_sequential_output
    res = func(*args, **kwargs)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils\parallel.py", line 127, in __call__
    return self.function(*args, **kwargs)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\ensemble\_base.py", line 36, in _fit_single_estimator
    estimator.fit(X, y)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 730, in inner_f
    return func(**kwargs)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\sklearn.py", line 1500, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\sklearn.py", line 521, in _wrap_evaluation_matrices
    train_dmatrix = create_dmatrix(
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\sklearn.py", line 958, in _create_dmatrix
    return QuantileDMatrix(
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 730, in inner_f
    return func(**kwargs)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 1529, in __init__
    self._init(
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 1588, in _init
    it.reraise()
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 576, in reraise
    raise exc  # pylint: disable=raising-bad-type
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 557, in _handle_exception
    return fn()
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 641, in <lambda>
    return self._handle_exception(lambda: self.next(input_data), 0)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 1280, in next
    input_data(**self.kwargs)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 730, in inner_f
    return func(**kwargs)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\core.py", line 624, in input_data
    new, cat_codes, feature_names, feature_types = _proxy_transform(
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 1315, in _proxy_transform
    arr, feature_names, feature_types = _transform_pandas_df(
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 490, in _transform_pandas_df
    _invalid_dataframe_dtype(data)
  File "c:\Users\ss348\AppData\Local\Programs\Python\Python310\lib\site-packages\xgboost\data.py", line 308, in _invalid_dataframe_dtype
    raise ValueError(msg)
ValueError: DataFrame.dtypes for data must be int, float, bool or category. When categorical type is supplied, The experimental DMatrix parameter`enable_categorical` must be set to `True`.  Invalid columns:Surname: object
