<center> 

### Customer Churn Preprocessing, Feature Engineering, and Modeling

</center>

In [None]:
# Installing dependencies via requirements.txt 
#%pip install -r ../requirements.txt 


In [None]:
# Importing libraries 
import warnings 
warnings.filterwarnings('ignore') 

import sys 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from pathlib import Path 

from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV  
from sklearn.preprocessing import OneHotEncoder, StandardScaler, FunctionTransformer 
from sklearn.impute import SimpleImputer 
from sklearn.compose import ColumnTransformer 
from sklearn.pipeline import Pipeline 
from sklearn.linear_model import LogisticRegression 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import (
    roc_auc_score, 
    average_precision_score, 
    roc_curve, 
    precision_recall_curve, 
    classification_report, 
    confusion_matrix
)
from imblearn.over_sampling import SMOTE 
from imblearn.pipeline import Pipeline as ImbPipeline 

import joblib 

try: 
    import shap 
except Exception: 
    shap = None 

# Getting the project root 
project_root = Path().resolve().parent 
sys.path.append(str(project_root)) 

from src.feature_engineering import feature_engineering 
from src.utils import model_path, data_path, visuals_path 
from src.preprocessing import create_logistic_pipeline, create_random_forest_pipeline, create_preprocessing_pipeline 


In [None]:
# Setting random_state for reproducibility 
random_state = 42 
np.random.seed(random_state)


In [None]:
# Loading data 
cleaned_raw_data_path = data_path / "cleaned_raw_data.csv" 
cleaned_data = pd.read_csv(cleaned_raw_data_path) 
print(cleaned_data.info()) 


In [None]:
# Making copy of the data 
data = cleaned_data.copy() 


In [None]:
# Dropping customerID column and mapping target column (Churn) 
data.drop(columns=['customerID'], inplace=True, errors='ignore')
data['Churn'] = data['Churn'].map({'Yes':1, 'No':0}) 


In [None]:
# Applying feature engineering to data 
data = feature_engineering(data) 
# Checking features 
data[['tenure','MonthlyCharges','TotalCharges','AvgChargesPerMonth','fiber_internet','electronic_check','month_to_month','internet_services_count']].head() 


In [None]:
# Column 
