In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("../WA_Fn-UseC_-Telco-Customer-Churn.csv")

# Drop customerID (not useful for ML)
df.drop(columns=["customerID"], inplace=True)

# Split
train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df["Churn"]
)

# Save CSVs
train_df.to_csv("../dataset/telco_train.csv", index=False)
test_df.to_csv("../dataset/telco_test.csv", index=False)

print("Train & Test CSV files created")


Train & Test CSV files created


In [4]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression


In [6]:
df = pd.read_csv("../dataset/telco_train.csv")

# Fix TotalCharges issue
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df.dropna(inplace=True)

X = df.drop("Churn", axis=1)
y = df["Churn"].map({"No": 0, "Yes": 1})

num_cols = X.select_dtypes(include=["int64", "float64"]).columns
cat_cols = X.select_dtypes(include=["object"]).columns


In [7]:
numeric_transformer = Pipeline(steps=[
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, num_cols),
        ("cat", categorical_transformer, cat_cols)
    ]
)

model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter=1000))
])


In [8]:
model.fit(X, y)
print("Model training completed")


Model training completed


In [9]:
import pickle

with open("../models/churn_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model saved as churn_model.pkl")


Model saved as churn_model.pkl
