In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
import joblib

# 1. Load the dataset
df = pd.read_csv(r'D:\Documents\Data Science\Dataset\WA_Fn-UseC_-Telco-Customer-Churn.csv')

# 2. Handle 'TotalCharges' which may have empty strings
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')  # convert invalid strings to NaN

# 3. Drop rows with missing values
df = df.dropna()

# 4. Encode categorical variables (e.g., 'Contract', 'InternetService', 'PaymentMethod')
categorical_columns = ['Contract', 'InternetService', 'PaymentMethod']
le = LabelEncoder()
for col in categorical_columns:
    df[col] = le.fit_transform(df[col])

# 5. Select the relevant features (7 features)
selected_features = ['SeniorCitizen', 'tenure', 'MonthlyCharges', 'TotalCharges', 'Contract', 'InternetService', 'PaymentMethod']
X = df[selected_features]  # Use only these 7 features
y = df['Churn']

# 6. Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 7. Train-test split (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# 8. Train the logistic regression model
model = LogisticRegression(max_iter=300, class_weight='balanced')  # Optional class_weight for handling class imbalance
model.fit(X_train, y_train)

# 9. Evaluate the model
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))



Accuracy: 0.72
Classification Report:
              precision    recall  f1-score   support

          No       0.91      0.68      0.78      1549
         Yes       0.48      0.83      0.61       561

    accuracy                           0.72      2110
   macro avg       0.70      0.75      0.69      2110
weighted avg       0.80      0.72      0.73      2110



In [None]:
# 11. Save the trained model
#joblib.dump(model, 'customer_churn_model.joblib')

['customer_churn_model.joblib']

In [None]:
# Save the scaler
#joblib.dump(scaler, 'customer_churn_scaler.joblib')

['customer_churn_scaler.joblib']