# Customer Churn Prediction
This notebook trains a model to predict customer churn based on usage patterns.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

## Load and Explore Data

In [2]:
# Load data
df = pd.read_csv('data/customers.csv')
print(f"Dataset shape: {df.shape}")
print(df.head())

Dataset shape: (100, 8)
  customer_id  age  monthly_charges  total_usage  months_active  \
0        C001   34            65.50         2340             12   
1        C002   45            89.99         4560             24   
2        C003   28            45.00          890              8   
3        C004   52           120.50         8900             36   
4        C005   31            55.75         1200             10   

   support_calls contract_type  churned  
0              3       monthly        0  
1              1        annual        0  
2              7       monthly        1  
3              2      two_year        0  
4              5       monthly        1  


## Data Preprocessing

In [3]:
# Handle missing values
df = df.dropna()

# Feature engineering
df['usage_per_month'] = df['total_usage'] / df['months_active']
df['support_ratio'] = df['support_calls'] / df['months_active']

# Select features
features = ['age', 'monthly_charges', 'total_usage', 'usage_per_month',
            'support_ratio', 'contract_type']
X = df[features]
y = df['churned']

# Encode categorical variables
X = pd.get_dummies(X, columns=['contract_type'])

## Split and Scale Data

In [4]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

feature_names = X_train.columns.tolist()

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Train Model

In [5]:
# Train Random Forest model
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    random_state=42
)
model.fit(X_train_scaled, y_train)
print("Model trained successfully!")

Model trained successfully!


## Evaluate Model

In [6]:
# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.3f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 1.000

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



## Save Model and Scaler

In [7]:
# Save trained model
joblib.dump(model, 'models/churn_model.pkl')
joblib.dump(scaler, 'models/scaler.pkl')
joblib.dump(feature_names, 'models/feature_names.pkl')
print("Model and scaler saved!")

Model and scaler saved!


## Make Predictions on New Data

In [8]:
# Example prediction function
def predict_churn(customer_data):
    """
    Predict churn for a new customer
    customer_data: dict with customer features
    """
    # Load model and scaler
    model = joblib.load('models/churn_model.pkl')
    scaler = joblib.load('models/scaler.pkl')
    feature_names = joblib.load('models/feature_names.pkl')

    # Prepare features
    features_df = pd.DataFrame([customer_data])
    features_df = pd.get_dummies(features_df)

    for col in feature_names:
        if col not in features_df.columns:
            features_df[col] = 0

    features_df = features_df[feature_names]

    features_scaled = scaler.transform(features_df)

    # Predict
    prediction = model.predict(features_scaled)
    probability = model.predict_proba(features_scaled)

    return prediction[0], probability[0]

# Test prediction
new_customer = {
    'age': 35,
    'monthly_charges': 75.5,
    'total_usage': 450,
    'usage_per_month': 37.5,
    'support_ratio': 0.5,
    'contract_type': 'monthly'
}

pred, prob = predict_churn(new_customer)
print(f"Churn prediction: {pred}")
print(f"Churn probability: {prob[1]:.3f}")

Churn prediction: 1
Churn probability: 0.600
