In [1]:
import pandas as pd

# Replace 'yourfilename.csv' with the exact name of your uploaded file
df = pd.read_csv('telco.csv')

# Show first 5 rows to confirm file loaded properly
print(df.head())


   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... DeviceProtection  \
0  No phone service             DSL             No  ...               No   
1                No             DSL            Yes  ...              Yes   
2                No             DSL            Yes  ...               No   
3  No phone service             DSL            Yes  ...              Yes   
4                No     Fiber optic             No  ...               No   

  TechSupport StreamingTV StreamingMovies        Contract Pape

In [2]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
import joblib

# Prepare features and target
X = df.drop('Churn', axis=1)
y = df['Churn'].apply(lambda x: 1 if x=='Yes' else 0)

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Preprocessing pipeline
categorical_transformer = OneHotEncoder(handle_unknown='ignore')
numerical_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Pipeline with Logistic Regression
pipe_lr = Pipeline(steps=[('preprocessor', preprocessor),
                         ('classifier', LogisticRegression(max_iter=1000))])

# Hyperparameter grid
param_grid_lr = {
    'classifier__C': [0.1, 1.0, 10],
    'classifier__solver': ['lbfgs']
}

# Grid search
grid_lr = GridSearchCV(pipe_lr, param_grid_lr, cv=3, scoring='f1', n_jobs=-1)
grid_lr.fit(X_train, y_train)

print("Best Logistic Regression params:", grid_lr.best_params_)

# Evaluate function
def evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred))

print("Test set performance:")
evaluate(grid_lr.best_estimator_, X_test, y_test)

# Save model pipeline
joblib.dump(grid_lr.best_estimator_, 'churn_model_pipeline.joblib')
print("Model saved as churn_model_pipeline.joblib")


Best Logistic Regression params: {'classifier__C': 1.0, 'classifier__solver': 'lbfgs'}
Test set performance:
Accuracy: 0.8261178140525195
F1 Score: 0.6444121915820029
Model saved as churn_model_pipeline.joblib
