# Customer Churn Prediction - Part 4: Predict New Data

## Overview
This notebook demonstrates how to use the trained model to predict churn for new customer data.

## Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import joblib
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

## Step 2: Load Trained Model and Scaler

In [None]:
# Load the best model (Random Forest)
model = joblib.load('models/random_forest.pkl')
scaler = joblib.load('models/scaler.pkl')

# Load processed feature names from training data to ensure consistency
X_train = pd.read_csv('data/X_train.csv')
feature_names = X_train.columns.tolist()

print("Model and scaler loaded successfully!")
print(f"\nModel type: Random Forest")
print(f"Number of features: {len(feature_names)}")

## Step 3: Preprocessing Function

In [None]:
def preprocess_new_data(new_customer_data, feature_names):
    """
    Preprocess new customer data to match training data format
    """
    if isinstance(new_customer_data, dict):
        df = pd.DataFrame([new_customer_data])
    else:
        df = new_customer_data.copy()
    
    # 1. Handle Missing Values
    if 'TotalCharges' in df.columns:
        df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
        df['TotalCharges'].fillna(0, inplace=True)
    
    # 2. Standardize Categorical Values
    columns_to_fix = ['OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 
                      'TechSupport', 'StreamingTV', 'StreamingMovies', 'MultipleLines']
    for col in columns_to_fix:
        if col in df.columns:
            df[col] = df[col].replace(['No internet service', 'No phone service'], 'No')
    
    # 3. Feature Engineering
    if 'tenure' in df.columns and 'TotalCharges' in df.columns:
        df['AvgChargePerMonth'] = df.apply(
            lambda x: x['TotalCharges'] / x['tenure'] if x['tenure'] > 0 else 0, axis=1
        )
    
    # 4. Encoding
    binary_cols = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling',
                   'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
                   'TechSupport', 'StreamingTV', 'StreamingMovies']
    for col in binary_cols:
        if col in df.columns:
            df[col] = df[col].map({'Yes': 1, 'No': 0})
    
    if 'gender' in df.columns: df['gender'] = df['gender'].map({'Male': 1, 'Female': 0})
    if 'MultipleLines' in df.columns: df['MultipleLines'] = df['MultipleLines'].map({'Yes': 1, 'No': 0})
    
    # 5. One-Hot Encoding and Alignment
    df = pd.get_dummies(df)
    
    # Ensure all training features are present (add 0s for missing dummies)
    for col in feature_names:
        if col not in df.columns:
            df[col] = 0
    
    return df[feature_names]

print("Preprocessing function created!")

## Step 4: Prediction Function

In [None]:
def predict_churn(new_data, model, feature_names):
    processed = preprocess_new_data(new_data, feature_names)
    pred = model.predict(processed)
    proba = model.predict_proba(processed)[:, 1]
    return pred, proba

print("Prediction function created!")

## Step 5: Example Predictions

In [None]:
example_customer = {
    'gender': 'Male',
    'SeniorCitizen': 0,
    'Partner': 'No',
    'Dependents': 'No',
    'tenure': 2,
    'PhoneService': 'Yes',
    'MultipleLines': 'No',
    'InternetService': 'Fiber optic',
    'OnlineSecurity': 'No',
    'OnlineBackup': 'No',
    'DeviceProtection': 'No',
    'TechSupport': 'No',
    'StreamingTV': 'Yes',
    'StreamingMovies': 'Yes',
    'Contract': 'Month-to-month',
    'PaperlessBilling': 'Yes',
    'PaymentMethod': 'Electronic check',
    'MonthlyCharges': 100.0,
    'TotalCharges': 200.0
}

pred, prob = predict_churn(example_customer, model, feature_names)
print(f"Prediction: {'Churn' if pred[0] == 1 else 'Stay'}")
print(f"Probability: {prob[0]:.2%}")

## Summary
This pipeline allows for real-time predictions. 



### Next Steps:
- Wrap this code into a **FastAPI** or **Flask** endpoint.
- Monitor model performance over time to detect **Data Drift**.