In [None]:

# Install required libraries
!pip install pandas scikit-learn joblib -q

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib




In [None]:
url = "https://raw.githubusercontent.com/blastchar/telco-customer-churn/master/WA_Fn-UseC_-Telco-Customer-Churn.csv"
df = pd.read_csv(url)

df.dropna(inplace=True)
df.drop(['customerID'], axis=1, inplace=True)
df['Churn'] = df['Churn'].map({'Yes':1, 'No':0})

X = df.drop('Churn', axis=1)
y = df['Churn']

num_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_features = X.select_dtypes(include=['object']).columns.tolist()

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_features),
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features)
])

pipe = Pipeline([
    ('preprocess', preprocessor),
    ('clf', LogisticRegression(max_iter=1000))
])

params = {
    'clf': [LogisticRegression(max_iter=1000), RandomForestClassifier()],
    'clf__C': [0.1, 1, 10] if isinstance(pipe.named_steps['clf'], LogisticRegression) else [None]
}

grid = GridSearchCV(pipe, param_grid=params, cv=3, scoring='accuracy', n_jobs=-1)
grid.fit(X, y)

print("Best Model:", grid.best_estimator_)
print(classification_report(y, grid.predict(X)))

joblib.dump(grid.best_estimator_, 'telco_churn_pipeline.pkl')
