In [1]:
# %pip install mlflow

In [2]:
import numpy as np 
import pandas as pd 
data = pd.read_csv("/home/jampanasasank/Desktop/Customer Churn Prediction/Telecom Churn Prediction/archive (7)/WA_Fn-UseC_-Telco-Customer-Churn.csv")


In [3]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import pickle as pkl

class DataPrep:
    def __init__(self, data, artifacts_path) -> None:
        self.data = data
        self.artifacts_path = artifacts_path
        print(self.artifacts_path)
        X = self.data.drop(['Churn'], axis=1)
        Y = self.data[['Churn']]
        self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, random_state=42, test_size=0.2)
    
    def save_artifacts(self, feature, encoder):
        path = f'{self.artifacts_path}/{feature}_estimator.pkl'
        with open(file=path, mode='wb') as f: 
            pkl.dump(encoder, f)
            
    def label_encode(self):
        cat_cols = [
            'gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService',
            'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
            'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
            'PaymentMethod'
        ]
        
        for col in cat_cols:
            label_encoder = LabelEncoder()
            self.X_train[col] = label_encoder.fit_transform(self.X_train[col].copy())
            self.X_test[col] = label_encoder.transform(self.X_test[col].copy())
            feature = f'{col}_label_encoder'
            self.save_artifacts(feature, label_encoder)
            
    def scaling(self):
        self.X_train['TotalCharges'] = pd.to_numeric(self.X_train['TotalCharges'], errors='coerce')
        self.X_test['TotalCharges'] = pd.to_numeric(self.X_test['TotalCharges'], errors='coerce')
        
        num_cols = ['tenure','MonthlyCharges', 'TotalCharges' ]

        
        for col in num_cols:
            scaler = MinMaxScaler()
            self.X_train[col] = scaler.fit_transform(self.X_train[[col]])
            self.X_test[col] = scaler.transform(self.X_test[[col]])
            feature = f'{col}_scaler'
            self.save_artifacts(feature, scaler)
        
    def drop_columns(self,):
        self.X_train.drop(['customerID'], axis=1, inplace=True)
        self.X_test.drop(['customerID'], axis=1, inplace=True)
        
    
    def target_replace(self, ):
        self.Y_train['Churn'].replace({'Yes':1, 'No': 0}, inplace=True)
        self.Y_test['Churn'].replace({'Yes':1, 'No': 0}, inplace=True)
        
            
    def process(self):
        self.label_encode()
        self.scaling()
        self.drop_columns()
        self.target_replace()
        return self.X_train, self.X_test, self.Y_train, self.Y_test


In [4]:
artifacts_path = '/home/jampanasasank/Documents/Deployment/Deployment/telecom_churn_prediction/model'
data = pd.read_csv("/home/jampanasasank/Documents/Deployment/Deployment/telecom_churn_prediction/archive (7)/WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [5]:
obj = DataPrep(data=data, artifacts_path=artifacts_path)
X_train, X_test , Y_train, Y_test = obj.process()

/home/jampanasasank/Documents/Deployment/Deployment/telecom_churn_prediction/model


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  self.Y_train['Churn'].replace({'Yes':1, 'No': 0}, inplace=True)
  self.Y_train['Churn'].replace({'Yes':1, 'No': 0}, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  self.Y_test['Churn'].replace({'Yes':1, 'No': 0}, inplace=True)
  self.Y_test['Churn'].replace({'Yes':1

In [6]:
import xgboost as xgb
import mlflow
import mlflow.sklearn
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
import pickle as pkl

class ModelBuilding:
    def __init__(self, X_train, X_test, Y_train, Y_test, path):
        self.X_train = X_train
        self.X_test = X_test
        self.Y_train = Y_train
        self.Y_test = Y_test
        self.path = path
        self.model = None  # Initialize model attribute

    def build_model(self):
        self.model = xgb.XGBClassifier()  # Create the model
        self.model.fit(self.X_train, self.Y_train)  # Train the model
        self.save_artifacts()  # Save the model

    def evaluate_model(self):
        if self.model is None:
            print("Model is not trained yet!")
            return

        # Make predictions
        predictions = self.model.predict(self.X_test)

        # Log metrics with MLflow
        mlflow.log_metric("accuracy", accuracy_score(self.Y_test, predictions))
        mlflow.log_metric("roc_auc", roc_auc_score(self.Y_test, predictions))

        # Log confusion matrix and classification report
        print("Accuracy score: ", accuracy_score(self.Y_test, predictions), '\n')
        print("Classification report: \n", classification_report(self.Y_test, predictions), '\n')
        print("Confusion Matrix: \n", confusion_matrix(self.Y_test, predictions), '\n')

    def save_artifacts(self):
        if self.model is None:
            print("No model to save!")
            return
        
        # Save the model to MLflow
        with mlflow.start_run():  # Track the experiment
            mlflow.log_param("model_type", "XGBClassifier")  # Log model type as a parameter
            mlflow.sklearn.log_model(self.model, "model")  # Log the trained model to MLflow
            print(f"Model saved to MLflow")

    def build(self):
        self.build_model()  # Build the model
        self.evaluate_model()  # Evaluate the model



In [7]:
path = '/home/jampanasasank/Documents/Deployment/Deployment/Telecom Churn Prediction/model'
obj1 = ModelBuilding(X_train, X_test , Y_train, Y_test, path=path)
obj1.build()



Model saved to MLflow
Accuracy score:  0.7920511000709723 

Classification report: 
               precision    recall  f1-score   support

           0       0.83      0.89      0.86      1036
           1       0.63      0.51      0.56       373

    accuracy                           0.79      1409
   macro avg       0.73      0.70      0.71      1409
weighted avg       0.78      0.79      0.78      1409
 

Confusion Matrix: 
 [[927 109]
 [184 189]] 

