## Testing

In [7]:
import pandas as pd
import joblib

# Load mô hình, scaler và danh sách cột
loaded_model = joblib.load('../models/churn_prediction_model.pkl')
loaded_scaler = joblib.load('../models/scaler.pkl')
model_columns = joblib.load('../models/model_columns.pkl')


In [None]:
case_loyal = {
    'gender': 'Male',
    'SeniorCitizen': 0,
    'Partner': 'Yes',
    'Dependents': 'Yes',
    'tenure': 72,                  # Max tenure -> long-term customer
    'PhoneService': 'Yes',
    'MultipleLines': 'Yes',
    'InternetService': 'No',       # No internet service -> lower risk
    'OnlineSecurity': 'No internet service',
    'OnlineBackup': 'No internet service',
    'DeviceProtection': 'No internet service',
    'TechSupport': 'No internet service',
    'StreamingTV': 'No internet service',
    'StreamingMovies': 'No internet service',
    'Contract': 'Two year',        # Long-term contract
    'PaperlessBilling': 'No',
    'PaymentMethod': 'Mailed check',
    'MonthlyCharges': 20.0,        # Low charges
    'TotalCharges': 1440.0
}

In [None]:
case_risky = {
    'gender': 'Female',
    'SeniorCitizen': 1,            # Elderly customers tend to churn more
    'Partner': 'No',
    'Dependents': 'No',
    'tenure': 1,                   # New customer
    'PhoneService': 'Yes',
    'MultipleLines': 'No',
    'InternetService': 'Fiber optic', # Service often complained about
    'OnlineSecurity': 'No',
    'OnlineBackup': 'No',
    'DeviceProtection': 'No',
    'TechSupport': 'No',
    'StreamingTV': 'Yes',
    'StreamingMovies': 'Yes',
    'Contract': 'Month-to-month',  # Highest risk
    'PaperlessBilling': 'Yes',
    'PaymentMethod': 'Electronic check',
    'MonthlyCharges': 100.0,       # High charges
    'TotalCharges': 100.0
}

In [None]:
case_confused = {
    'gender': 'Female',
    'SeniorCitizen': 0,
    'Partner': 'Yes',
    'Dependents': 'Yes',
    'tenure': 60,                  # Stayed long 
    'PhoneService': 'Yes',
    'MultipleLines': 'Yes',
    'InternetService': 'Fiber optic', # (Bad)
    'OnlineSecurity': 'No',
    'OnlineBackup': 'Yes',
    'DeviceProtection': 'Yes',
    'TechSupport': 'No',           # No tech support (Bad)
    'StreamingTV': 'Yes',
    'StreamingMovies': 'Yes',
    'Contract': 'Month-to-month',  # Short-term contract (Bad) - Contradicts tenure
    'PaperlessBilling': 'Yes',
    'PaymentMethod': 'Bank transfer (automatic)',
    'MonthlyCharges': 105.0,
    'TotalCharges': 6300.0
}

In [15]:
def predict_churn_with_confidence(input_data, model, scaler, columns):
    # Convert input data to DataFrame
    df_new = pd.DataFrame([input_data])

    # One-Hot Encoding
    df_new = pd.get_dummies(df_new)

    # Reindex to match columns (important)
    df_new = df_new.reindex(columns=columns, fill_value=0)

    # Scaling
    cols_to_scale = ['tenure', 'MonthlyCharges', 'TotalCharges']
    df_new[cols_to_scale] = scaler.transform(df_new[cols_to_scale])

    #Get probability (Confidence)
    # proba[0] is probability of No (Stay), proba[1] is probability of Yes (Churn)
    proba = model.predict_proba(df_new)[0]
    churn_prob = proba[1]

    # Logic to make conclusion
    print(f"--- ANALYSIS RESULT ---")
    if churn_prob >= 0.7:
        print(f"Prediction: CHURN")
        print(f"Confidence: {churn_prob*100:.2f}% (Very risky)")
    elif churn_prob >= 0.4 and churn_prob < 0.7:
        print(f"Prediction: CHURN")
        print(f"Confidence: {churn_prob*100:.2f}% (Considering to leave)")
    else:
        print(f"Prediction: STAY")
        print(f"Confidence: {(1-churn_prob)*100:.2f}% (Safe customer)")

    return churn_prob

In [16]:

print("=== TEST CASE 1: LOYAL CUSTOMER ===")
predict_churn_with_confidence(case_loyal, loaded_model, loaded_scaler, model_columns)

print("\n=== TEST CASE 2: HIGH-RISK CUSTOMER ===")
predict_churn_with_confidence(case_risky, loaded_model, loaded_scaler, model_columns)

print("\n=== TEST CASE 3: CONFUSED CASE (MIXED) ===")
predict_churn_with_confidence(case_confused, loaded_model, loaded_scaler, model_columns)

=== TEST CASE 1: LOYAL CUSTOMER ===
--- ANALYSIS RESULT ---
Prediction: STAY
Confidence: 100.00% (Safe customer)

=== TEST CASE 2: HIGH-RISK CUSTOMER ===
--- ANALYSIS RESULT ---
Prediction: CHURN
Confidence: 89.98% (Very risky)

=== TEST CASE 3: CONFUSED CASE (MIXED) ===
--- ANALYSIS RESULT ---
Prediction: CHURN
Confidence: 62.68% (Considering to leave)


np.float64(0.6268470418470418)