In [54]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer


In [40]:
data = pd.read_csv(r'D:\project\dataset\customer_churn_dataset-testing-master.csv')

In [42]:
X = data.drop(columns=['Churn', 'CustomerID'])
y = data['Churn']

In [44]:
categorical_cols = ['Gender', 'Subscription Type', 'Contract Length']
numerical_cols = X.columns.difference(categorical_cols)

In [46]:
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [48]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [50]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

In [52]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [56]:
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

In [58]:
X_train = pd.DataFrame(X_train, columns=preprocessor.get_feature_names_out())
X_test = pd.DataFrame(X_test, columns=preprocessor.get_feature_names_out())

In [60]:
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
sfs_forward = SFS(model,
                  k_features='best',
                  forward=True,
                  floating=False,
                  scoring='accuracy',
                  cv=5)
sfs_forward = sfs_forward.fit(X_train, y_train)
selected_features_forward = list(sfs_forward.k_feature_names_)
selected_features_forward




['num__Age',
 'num__Last Interaction',
 'num__Payment Delay',
 'num__Support Calls',
 'num__Tenure',
 'num__Total Spend',
 'num__Usage Frequency',
 'cat__Gender_Female',
 'cat__Gender_Male',
 'cat__Subscription Type_Basic',
 'cat__Subscription Type_Premium',
 'cat__Subscription Type_Standard',
 'cat__Contract Length_Annual',
 'cat__Contract Length_Monthly',
 'cat__Contract Length_Quarterly']

In [62]:
sfs_backward = SFS(model,
                   k_features='best',
                   forward=False,
                   floating=False,
                   scoring='accuracy',
                   cv=5)
sfs_backward = sfs_backward.fit(X_train, y_train)
selected_features_backward = list(sfs_backward.k_feature_names_)
selected_features_backward




['num__Age',
 'num__Payment Delay',
 'num__Support Calls',
 'num__Tenure',
 'num__Total Spend',
 'num__Usage Frequency',
 'cat__Gender_Male',
 'cat__Contract Length_Monthly']

In [64]:
sfs_stepwise = SFS(model,
                   k_features='best',
                   forward=True,
                   floating=True,
                   scoring='accuracy',
                   cv=5)
sfs_stepwise = sfs_stepwise.fit(X_train, y_train)
selected_features_stepwise = list(sfs_stepwise.k_feature_names_)
selected_features_stepwise




['num__Age',
 'num__Last Interaction',
 'num__Payment Delay',
 'num__Support Calls',
 'num__Tenure',
 'num__Total Spend',
 'num__Usage Frequency',
 'cat__Gender_Male',
 'cat__Subscription Type_Standard',
 'cat__Contract Length_Monthly']

In [None]:
from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS
model = LogisticRegression(max_iter=50)

# Instantiate the exhaustive feature selector
efs = EFS(model,
          min_features=1,
          max_features=len(X_train.columns),
          scoring='accuracy',
          cv=5)
efs = efs.fit(X_train, y_train)
selected_features_exhaustive = list(efs.best_feature_names_)
selected_features_exhaustive


