In [24]:
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import ShuffleSplit, train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.feature_selection import SelectFromModel, RFE
#from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from collections import Counter

In [25]:
df = pd.read_csv("./WA_Fn-UseC_-Telco-Customer-Churn.csv")
df_cleaned = df[~df['TotalCharges'].isin([' ', '', '  '])].drop(columns='customerID', axis=1)
df_cleaned['TotalCharges'] = df_cleaned['TotalCharges'].astype(float)
df_cleaned.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [26]:
label_ = LabelEncoder()

df1 = df_cleaned.copy(deep = True)
# to select the columns which are not nummerical
text_data_features = [i for i in list(df_cleaned.columns) if i not in list(df_cleaned.describe().columns)]
for i in text_data_features :
    df1[i] = label_.fit_transform(df1[i])
    print(i,' : ', df1[i].unique(),' = ', label_.inverse_transform(df1[i].unique()))

gender  :  [0 1]  =  ['Female' 'Male']
Partner  :  [1 0]  =  ['Yes' 'No']
Dependents  :  [0 1]  =  ['No' 'Yes']
PhoneService  :  [0 1]  =  ['No' 'Yes']
MultipleLines  :  [1 0 2]  =  ['No phone service' 'No' 'Yes']
InternetService  :  [0 1 2]  =  ['DSL' 'Fiber optic' 'No']
OnlineSecurity  :  [0 2 1]  =  ['No' 'Yes' 'No internet service']
OnlineBackup  :  [2 0 1]  =  ['Yes' 'No' 'No internet service']
DeviceProtection  :  [0 2 1]  =  ['No' 'Yes' 'No internet service']
TechSupport  :  [0 2 1]  =  ['No' 'Yes' 'No internet service']
StreamingTV  :  [0 2 1]  =  ['No' 'Yes' 'No internet service']
StreamingMovies  :  [0 2 1]  =  ['No' 'Yes' 'No internet service']
Contract  :  [0 1 2]  =  ['Month-to-month' 'One year' 'Two year']
PaperlessBilling  :  [1 0]  =  ['Yes' 'No']
PaymentMethod  :  [2 3 0 1]  =  ['Electronic check' 'Mailed check' 'Bank transfer (automatic)'
 'Credit card (automatic)']
Churn  :  [0 1]  =  ['No' 'Yes']


In [44]:
#df1.to_csv('/Users/yanhanjun/Desktop/DS_version.csv')

In [45]:
df1.info(0)

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7032 non-null   int64  
 1   SeniorCitizen     7032 non-null   float64
 2   Partner           7032 non-null   int64  
 3   Dependents        7032 non-null   int64  
 4   tenure            7032 non-null   float64
 5   PhoneService      7032 non-null   int64  
 6   MultipleLines     7032 non-null   int64  
 7   InternetService   7032 non-null   int64  
 8   OnlineSecurity    7032 non-null   int64  
 9   OnlineBackup      7032 non-null   int64  
 10  DeviceProtection  7032 non-null   int64  
 11  TechSupport       7032 non-null   int64  
 12  StreamingTV       7032 non-null   int64  
 13  StreamingMovies   7032 non-null   int64  
 14  Contract          7032 non-null   int64  
 15  PaperlessBilling  7032 non-null   int64  
 16  PaymentMethod     7032 non-null   int64  
 17  

In [27]:
df1.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,1,0,1,0,0,2,0,0,0,0,0,1,2,29.85,29.85,0
1,1,0,0,0,34,1,0,0,2,0,2,0,0,0,1,0,3,56.95,1889.5,0
2,1,0,0,0,2,1,0,0,2,2,0,0,0,0,0,1,3,53.85,108.15,1
3,1,0,0,0,45,0,1,0,2,0,2,2,0,0,1,0,0,42.3,1840.75,0
4,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,1,2,70.7,151.65,1


In [28]:
mms = MinMaxScaler() # Normalization
# ss = StandardScaler() # Standardization
"""
df1['tenure'] = mms.fit_transform(df1[['tenure']])
df1['MonthlyCharges'] = mms.fit_transform(df1[['MonthlyCharges']])
df1['TotalCharges'] = mms.fit_transform(df1[['TotalCharges']])

# using feature engineering to deselect the irrevelant varibale/feature
df1.drop(columns = ['PhoneService', 'gender','StreamingTV','StreamingMovies','MultipleLines','InternetService'],inplace = True)
df1.head()
"""
numerical_columns = df.describe().columns
df1[numerical_columns] = mms.fit_transform(df1[numerical_columns])
df1.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0.0,1,0,0.0,0,1,0,0,2,0,0,0,0,0,1,2,0.115423,29.85,0
1,1,0.0,0,0,0.464789,1,0,0,2,0,2,0,0,0,1,0,3,0.385075,1889.5,0
2,1,0.0,0,0,0.014085,1,0,0,2,2,0,0,0,0,0,1,3,0.354229,108.15,1
3,1,0.0,0,0,0.619718,0,1,0,2,0,2,2,0,0,1,0,0,0.239303,1840.75,0
4,0,0.0,0,0,0.014085,1,0,1,0,0,0,0,0,0,0,1,2,0.521891,151.65,1


In [29]:
# SMOTE
SMOTE_ = SMOTE(sampling_strategy = 1, random_state=42)

X = df1.iloc[:, :-1].values
Y = df1['Churn'].values
X, Y = SMOTE_.fit_resample(X, Y)
Counter(Y)

Counter({0: 5163, 1: 5163})

In [30]:
# split the training and testing data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [7]:
"""
rf_selector = RandomForestClassifier(n_estimators=100, random_state=42)
rf_selector.fit(X_train, Y_train)

# 使用 feature_importances_ 进行特征选择
selector = SelectFromModel(rf_selector, prefit=True)
X_train_selected = selector.transform(X_train)
pd.DataFrame(df1.drop(columns='Churn')).loc[:, selector.get_support()]
"""

"\nrf_selector = RandomForestClassifier(n_estimators=100, random_state=42)\nrf_selector.fit(X_train, Y_train)\n\n# 使用 feature_importances_ 进行特征选择\nselector = SelectFromModel(rf_selector, prefit=True)\nX_train_selected = selector.transform(X_train)\npd.DataFrame(df1.drop(columns='Churn')).loc[:, selector.get_support()]\n"

In [31]:
def model_RFE_importance(BASE_CLF, BASE_CLF_PARA, scoring_method):

    best_score = -np.inf
    best_n_features = None
    best_params = None
    # create RF to select feature importance
    #BASE_RF = RandomForestClassifier()
    # Create the Selction-feature CLF outside the function

    for n_features in range(1, X_train.shape[1] + 1):
        
        # RFE followed by RandomsearchCV 
        RFE_ = RFE(estimator=BASE_CLF, n_features_to_select=n_features)
        
        X_train_rfe = RFE_.fit_transform(X_train, Y_train)
        random_search = RandomizedSearchCV(estimator=BASE_CLF, param_distributions=BASE_CLF_PARA, 
                                        n_iter=20, cv=5, scoring=scoring_method, # the scoring method can be chosen from accuracy to F1-score/recall
                                        random_state=42, n_jobs=-1)
        random_search.fit(X_train_rfe, Y_train)
        
        current_best_score = random_search.best_score_
        current_best_params = random_search.best_params_
        print(f"Number of features: {n_features}, Best F1 Score: {current_best_score}, Best Params: {current_best_params}")
        
        # Loop to get the best performance
        if current_best_score > best_score:
            best_score = current_best_score
            best_n_features = n_features
            best_params = current_best_params

        # adjust
        #break

    print("\nBest number of features:", best_n_features)
    print("Best hyperparameters:", best_params)
    print("Best cross-validation F1 score:", best_score)
    #print(f"Selected Feactures: {RFE_.support_}")

    return RFE_


In [32]:
# stacking
base_estimators = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('gb', GradientBoostingClassifier(random_state=42))
]

stacking_clf = StackingClassifier(
    estimators=base_estimators,
    final_estimator=LogisticRegression(),
    cv=5
)

param_grid = {
    'rf__n_estimators': [50, 100, 200],
    'rf__max_depth': [None, 10, 20],
    'gb__n_estimators': [50, 100],
    'gb__learning_rate': [0.01, 0.1, 0.2],
    'final_estimator__C': [0.1, 1, 10]
}

RF_Base = RandomForestClassifier(random_state=42)

Base_param_grid = {
    'n_estimators': [50, 100, 200, 300, 400],
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}


In [33]:
RFE_result = model_RFE_importance(RF_Base, Base_param_grid, 'recall')
# adjust to find the RFE programming

Number of features: 1, Best F1 Score: 0.6560182813108437, Best Params: {'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_depth': 20, 'bootstrap': True}
Number of features: 2, Best F1 Score: 0.8229347886552812, Best Params: {'n_estimators': 100, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': 10, 'bootstrap': True}
Number of features: 3, Best F1 Score: 0.8785756739110703, Best Params: {'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': False}
Number of features: 4, Best F1 Score: 0.8843794923744916, Best Params: {'n_estimators': 100, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': 10, 'bootstrap': True}
Number of features: 5, Best F1 Score: 0.8710734268088807, Best Params: {'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': False}
Number of features: 6, Best F1 Score: 0.8688939572713885, Best Params: {'n_estimators': 200, 'min_samples_split': 1

In [34]:
X_train_rfe = RFE_result.fit_transform(X_train, Y_train)
X_test_rfe = RFE_result.transform(X_test)

In [35]:
def model_optimize(Result_CLF, param_list, scoring_method):
    random_search = RandomizedSearchCV(estimator=Result_CLF, param_distributions=param_list, 
                                        n_iter=20, cv=5, scoring=scoring_method, # the scoring method can be chosen from accuracy to F1-score/recall
                                        random_state=42, n_jobs=-1)
    random_search.fit(X_train_rfe, Y_train)
    return random_search

In [36]:
Stacking_result = model_optimize(stacking_clf, param_grid, 'recall')

In [37]:
def model_evaluation(Evaluated_CLF, sample_test = X_test):
    Y_pred_RFE = Evaluated_CLF.predict(sample_test)
    print(f"Confusion matrix: \n{confusion_matrix(Y_test, Y_pred_RFE)}")
    print(f"Classification Report: \n{classification_report(Y_test, Y_pred_RFE)}")
    print(f"Accuracy: \n{accuracy_score(Y_test, Y_pred_RFE)}")

In [38]:
model_evaluation(Stacking_result, X_train_rfe, X_test_rfe)

Confusion matrix: 
[[882 155]
 [135 894]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.87      0.85      0.86      1037
           1       0.85      0.87      0.86      1029

    accuracy                           0.86      2066
   macro avg       0.86      0.86      0.86      2066
weighted avg       0.86      0.86      0.86      2066

Accuracy: 
0.8596321393998064


In [43]:
"""
df_rfe_columns = df1.copy().drop(columns='Churn', axis=1)
print(f"Remained columns: {list(df_rfe_columns.columns[RFE_result.support_])}")
print(f"Dropped columns: {list(set(list(df_rfe_columns)) - set(list(df_rfe_columns.columns[RFE_result.support_])))}")
"""
RFE_result.support_
################################################################


array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [87]:
# using RFE to select necessary features
#X_train.shape[1]

para_dist = {
    'n_estimators': [50, 100, 200, 300, 400],
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

rf_RFE = RandomForestClassifier(random_state=42)
best_score = -np.inf
best_n_features = None
best_params = None

for n_features in range(1, X_train.shape[1] + 1):
    # RFE followed by RandomsearchCV 
    rfe = RFE(estimator=rf_RFE, n_features_to_select=n_features)
    X_train_rfe = rfe.fit_transform(X_train, Y_train)
    random_search = RandomizedSearchCV(estimator=rf_RFE, param_distributions=para_dist, 
                                       n_iter=20, cv=5, scoring='f1_weighted',  # Use F1-weighted instead of accuracy
                                       random_state=42, n_jobs=-1)
    
    random_search.fit(X_train_rfe, Y_train)
    
    # Get the best score and parameters from the random search
    current_best_score = random_search.best_score_
    current_best_params = random_search.best_params_
    
    print(f"Number of features: {n_features}, Best F1 Score: {current_best_score}, Best Params: {current_best_params}")
    
    # Update the best score, params, and feature count if current model is better
    if current_best_score > best_score:
        best_score = current_best_score
        best_n_features = n_features
        best_params = current_best_params

print("\nBest number of features:", best_n_features)
print("Best hyperparameters:", best_params)
print("Best cross-validation F1 score:", best_score)



# Fit the model using the optimal number of features and best hyperparameters
rfe = RFE(estimator=rf_RFE, n_features_to_select=best_n_features)
X_train_rfe = rfe.fit_transform(X_train, Y_train)
X_test_rfe = rfe.transform(X_test)

# Initialize the RandomForestClassifier with the best hyperparameters
best_rf = RandomForestClassifier(**best_params, random_state=42)

# Fit the final model
best_rf.fit(X_train_rfe, Y_train)

Y_pred_RFE = best_rf.predict(X_test_rfe)
print(f"Confusion matrix: \n{confusion_matrix(Y_test, Y_pred_RFE)}")
print(f"Classification Report: \n{classification_report(Y_test, Y_pred_RFE)}")
print(f"Accuracy: \n{accuracy_score(Y_test, Y_pred_RFE)}")

Number of features: 1, Best F1 Score: 0.6540539507696599, Best Params: {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_depth': 20, 'bootstrap': True}
Number of features: 2, Best F1 Score: 0.7643790213957027, Best Params: {'n_estimators': 50, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': None, 'bootstrap': True}
Number of features: 3, Best F1 Score: 0.8097431408116325, Best Params: {'n_estimators': 400, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_depth': None, 'bootstrap': True}
Number of features: 4, Best F1 Score: 0.8170509988919022, Best Params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 30, 'bootstrap': True}
Number of features: 5, Best F1 Score: 0.8337041410606082, Best Params: {'n_estimators': 400, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_depth': None, 'bootstrap': True}
Number of features: 6, Best F1 Score: 0.8416542129438671, Best Params: {'n_estimators': 50, 'min_samples_split': 1

In [101]:
df_rfe_columns = df1.copy().drop(columns='Churn', axis=1)
print(f"Remained columns: {list(df_rfe_columns.columns[rfe.support_])}")
print(f"Dropped columns: {list(set(list(df_rfe_columns)) - set(list(df_rfe_columns.columns[rfe.support_])))}")

Remained columns: ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'TechSupport', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges', 'TotalCharges']
Dropped columns: ['PhoneService', 'DeviceProtection', 'StreamingTV', 'MultipleLines', 'StreamingMovies']


In [60]:
# Random Forest 
rfc = RandomForestClassifier(n_estimators=700,
        min_samples_split=2,
        min_samples_leaf=2,
        max_features='log2',
        max_depth=11,
        bootstrap=True,
        class_weight='balanced')

rfc.fit(X_train, Y_train)

# Confusion Matrix
Y_pred = rfc.predict(X_test)
print(f"Confusion matrix: \n{confusion_matrix(Y_test, Y_pred)}")
print(f"Classification Report: \n{classification_report(Y_test, Y_pred)}")
print(f"Accuracy: \n{accuracy_score(Y_test, Y_pred)}")

Confusion matrix: 
[[814 223]
 [147 882]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.85      0.78      0.81      1037
           1       0.80      0.86      0.83      1029

    accuracy                           0.82      2066
   macro avg       0.82      0.82      0.82      2066
weighted avg       0.82      0.82      0.82      2066

Accuracy: 
0.8209099709583737


In [62]:
gb = GradientBoostingClassifier(subsample=0.8,
        n_estimators=300, 
        min_samples_split=10,
        min_samples_leaf=2,
        max_features='log2',
        max_depth=7,
        learning_rate=0.5, 
        random_state=42)
gb.fit(X_train, Y_train)
gb_pred = gb.predict(X_test)
print(f"Confusion matrix: \n{confusion_matrix(Y_test, gb_pred)}")
print(f"Classification Report: \n{classification_report(Y_test, gb_pred)}")
print(f"Accuracy: \n{accuracy_score(Y_test, gb_pred)}")

Confusion matrix: 
[[864 173]
 [185 844]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.82      0.83      0.83      1037
           1       0.83      0.82      0.83      1029

    accuracy                           0.83      2066
   macro avg       0.83      0.83      0.83      2066
weighted avg       0.83      0.83      0.83      2066

Accuracy: 
0.8267182962245886


In [59]:
#  stacking

base_models = [
    ('rf',  RandomForestClassifier(
        n_estimators=700,
        min_samples_split=2,
        min_samples_leaf=2,
        max_features='log2',
        max_depth=11,
        bootstrap=True,
        class_weight='balanced')),

    ('gb', GradientBoostingClassifier(
        subsample=0.8,
        n_estimators=300, 
        min_samples_split=10,
        min_samples_leaf=2,
        max_features='log2',
        max_depth=7,
        learning_rate=0.5, 
        random_state=42))
]

meta_model = LogisticRegression()

stacking_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model)


stacking_clf.fit(X_train, Y_train)
Y_pred_3 = stacking_clf.predict(X_test)
print(f"Confusion matrix: \n{confusion_matrix(Y_test, Y_pred_3)}")
print(f"Classification Report: \n{classification_report(Y_test, Y_pred_3)}")
print(f"Accuracy: \n{accuracy_score(Y_test, Y_pred_3)}")

Confusion matrix: 
[[838 199]
 [151 878]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.85      0.81      0.83      1037
           1       0.82      0.85      0.83      1029

    accuracy                           0.83      2066
   macro avg       0.83      0.83      0.83      2066
weighted avg       0.83      0.83      0.83      2066

Accuracy: 
0.8305905130687319


In [66]:
pd.Series(Y_test).value_counts()

0    1037
1    1029
Name: count, dtype: int64

In [69]:

base_models = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('gb', GradientBoostingClassifier(random_state=42))
]

meta_model = LogisticRegression()


stack_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)


param_grid = {
    # RandomForest 
    'rf__n_estimators': [50, 100, 200],
    'rf__max_depth': [None, 10, 20, 30],
    'rf__min_samples_split': [2, 5, 10],
    'rf__min_samples_leaf': [1, 2, 4],
    'rf__bootstrap': [True, False],
    
    # GradientBoostingClassifier
    'gb__n_estimators': [50, 100, 200],
    'gb__learning_rate': [0.01, 0.1, 0.2],
    'gb__max_depth': [3, 5, 7],
    'gb__subsample': [0.8, 1.0],
    
    # LogisticRegression (Meta Model)
    'final_estimator__C': [0.01, 0.1, 1.0, 10.0],
    'final_estimator__penalty': ['l2'],
    'final_estimator__solver': ['lbfgs', 'liblinear']
}


random_search = RandomizedSearchCV(estimator=stack_clf, param_distributions=param_grid,
                                   n_iter=10, cv=5, verbose=2, random_state=42, n_jobs=-1)


random_search.fit(X_train, Y_train)

# 打印最佳超参数组合
print("Best parameters found: ", random_search.best_params_)


Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END final_estimator__C=0.01, final_estimator__penalty=l2, final_estimator__solver=liblinear, gb__learning_rate=0.1, gb__max_depth=3, gb__n_estimators=50, gb__subsample=1.0, rf__bootstrap=True, rf__max_depth=10, rf__min_samples_leaf=1, rf__min_samples_split=2, rf__n_estimators=50; total time=   4.7s
[CV] END final_estimator__C=0.01, final_estimator__penalty=l2, final_estimator__solver=liblinear, gb__learning_rate=0.1, gb__max_depth=3, gb__n_estimators=50, gb__subsample=1.0, rf__bootstrap=True, rf__max_depth=10, rf__min_samples_leaf=1, rf__min_samples_split=2, rf__n_estimators=50; total time=   4.8s
[CV] END final_estimator__C=0.01, final_estimator__penalty=l2, final_estimator__solver=liblinear, gb__learning_rate=0.1, gb__max_depth=3, gb__n_estimators=50, gb__subsample=1.0, rf__bootstrap=True, rf__max_depth=10, rf__min_samples_leaf=1, rf__min_samples_split=2, rf__n_estimators=50; total time=   4.6s
[CV] END final_estimator

In [70]:
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, min_samples_split=2, min_samples_leaf=1, 
                                  max_depth=None, bootstrap=True, random_state=42)),
    ('gb', GradientBoostingClassifier(n_estimators=200, max_depth=5, learning_rate=0.1, 
                                      subsample=1.0, random_state=42))
]

meta_model = LogisticRegression(C=0.01, solver='lbfgs', penalty='l2', random_state=42)


stack_hyperparameter = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)


stack_hyperparameter.fit(X_train, Y_train)

# 进行预测
Y_pred_4 = stack_hyperparameter.predict(X_test)

# 评估模型表现
print(f"Confusion matrix: \n{confusion_matrix(Y_test, Y_pred_4)}")
print(f"Classification Report: \n{classification_report(Y_test, Y_pred_4)}")
print(f"Accuracy: \n{accuracy_score(Y_test, Y_pred_4)}")

Confusion matrix: 
[[855 182]
 [151 878]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.85      0.82      0.84      1037
           1       0.83      0.85      0.84      1029

    accuracy                           0.84      2066
   macro avg       0.84      0.84      0.84      2066
weighted avg       0.84      0.84      0.84      2066

Accuracy: 
0.8388189738625363
