In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [None]:
df.info()

In [None]:
target = ['Churn']
features = [x for x in df.columns if not x in ('Churn', 'customerID')]

In [None]:
numeric_features = ['tenure', 'MonthlyCharges', 'TotalCharges']
categorical_features = [x for x in features if x not in numeric_features]

In [None]:
df[['TotalCharges']]

# Create a Base Model

In [None]:
from sklearn.pipeline import Pipeline, FeatureUnion
import xgboost as xgb

In [None]:
X = df[features]
y = df[target[0]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder, StandardScaler

class BasePipeStep(BaseEstimator, TransformerMixin):

        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X = X.copy()
        return X
    
class SelectColumns(BasePipeStep):

    def __init__(self, columns):
        self.columns = columns
    
    def transform(self, X):
        X = X.copy()
        return X[self.columns]
    
class StuHotEncoder(SelectColumns):
    
    def fit(self, X, y=None):
        self.one_hot = OneHotEncoder(handle_unknown='error', drop='first')
        self.one_hot.fit(X[self.columns])
        return self
    
    def transform(self, X):
        X = X.copy()[self.columns]        
        return pd.DataFrame(
            self.one_hot.transform(X).toarray(),
            columns = self.one_hot.get_feature_names(self.columns)
        )

class FillNumericData(SelectColumns):
    
    
    def fit(self, X, y=None):
        self.means = { col: X[col].mean() for col in self.columns}
        return self
        
    def transform(self, X):
        X = X.copy()
        for col in self.columns:
            X[col] = X[col].fillna(self.means[col])
        return X    

class ScaleNumeric(SelectColumns):
    
    
    def fit(self, X, y=None):
        self.scaler = StandardScaler()
        self.scaler.fit(X[self.columns])
        return self
        
    def transform(self, X):
        X = X.copy()
        X[self.columns] = self.scaler.transform(X[self.columns])
        return X

class CastAsType(SelectColumns):
    
    def transform(self, X):
        X = X.copy()
        for col in self.columns:
            X[col] = pd.to_numeric(X[col], errors='coerce')
        return X

In [None]:

cat_prepipe = Pipeline([
    ('select_cols', SelectColumns(categorical_features)),
    ('one_hot',StuHotEncoder(categorical_features))
])

numeric_prepipe = Pipeline([
    ('select_cols', SelectColumns(numeric_features)),
    ('cast_as_float', CastAsType(numeric_features)),
    ('impute', FillNumericData(numeric_features)),
    ('scale_feautes', ScaleNumeric(numeric_features)),
])

preprocessing = FeatureUnion([
    ('numeric_pipe', numeric_prepipe),
    ('cat_prepipe', cat_prepipe)
])

model = Pipeline([
    ('pre', preprocessing),
    ('learn', xgb.XGBClassifier())
])

In [None]:
model.fit(X_train, y_train)

In [None]:
from sklearn import metrics

In [None]:
test_prediction = model.predict_proba(X_test)[:,1]
predictions = model.predict(X_test)

score = metrics.roc_auc_score(y_test, test_prediction)
accuracy = metrics.accuracy_score(y_test, predictions)

print(f'Area under ROC of Model On Test Set - {score:,.2%}')
print(f'Accuracy - {accuracy:,.2%}')
