In [2]:
import pandas as pd
import numpy as np
import joblib

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [4]:
df=pd.read_csv("telco.csv")

In [5]:
X=df.drop(["Churn","customerID","Contract","PaymentMethod"],axis=1)
y=df["Churn"]

In [6]:
X["TotalCharges"] = pd.to_numeric(X["TotalCharges"], errors="coerce")

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [8]:
categorical_cols = X_train.select_dtypes(include="object").columns
numerical_cols = X_train.select_dtypes(exclude="object").columns

In [9]:
numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median"))
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numerical_cols),
        ("cat", categorical_transformer, categorical_cols)
    ]
)


In [10]:
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    random_state=42,
    n_jobs=-1
)

In [11]:
pipeline = Pipeline(steps=[
    ("preprocessing", preprocessor),
    ("classifier", model)
])

In [12]:
pipeline.fit(X_train, y_train)
pred = pipeline.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))
joblib.dump(pipeline, "telco_churn_model.joblib")

print("Model saved successfully!")

Accuracy: 0.8012304779933743
Model saved successfully!


In [13]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred))


              precision    recall  f1-score   support

          No       0.83      0.91      0.87      1539
         Yes       0.68      0.52      0.58       574

    accuracy                           0.80      2113
   macro avg       0.75      0.71      0.73      2113
weighted avg       0.79      0.80      0.79      2113

