In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib


In [None]:
# Load the dataset
df = pd.read_csv("customerChurn.csv")
df.head()


In [None]:
# Convert TotalCharges to numeric (with coercion of errors like blank strings)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Drop rows with missing values in key columns
df = df.dropna(subset=['TotalCharges', 'Churn'])

# Encode target column
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})


In [None]:
# Drop customerID as it's not useful for prediction
df = df.drop(columns=['customerID'])

# Identify categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

# Apply label encoding to categorical columns
le = LabelEncoder()
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

df.head()


In [None]:
# Separate features and target
X = df.drop(columns=['Churn'])
y = df['Churn']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
# Evaluate model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
# Save the model
joblib.dump(model, "churn_model.pkl")
print("Model saved to churn_model.pkl")
