In [16]:
import pandas as pd
import numpy as np
import joblib
import warnings
import sys

# Suppress minor warnings for cleaner output
warnings.filterwarnings('ignore')

In [15]:
# Sets pandas to display all columns without truncation
pd.set_option("display.max_columns", None)
# Sets pandas to display floating point numbers with 2 decimal places
pd.set_option('display.float_format', lambda x: '%.2f' % x)
# Sets pandas display width to 500 characters
pd.set_option('display.width', 500)

In [17]:
# --- 1. Define Assets and New Data (Simulation) ---

FINAL_COLUMNS = [
    'CreditScore', 'Age', 'Tenure', 'Balance', 'EstimatedSalary', 'Point Earned',
    'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'Complain', 'Satisfaction Score',
    'Geography_Germany', 'Geography_Spain', 'Gender_Male', 'Card Type_GOLD', 
    'Card Type_PLATINUM', 'Card Type_SILVER']

numeric_cols = ['CreditScore', 'Age', 'Tenure', 'Balance', 'EstimatedSalary', 'Point Earned']
categorical_cols_ohe = ['Geography', 'Gender', 'Card Type']
ordinal_col_name = 'Satisfaction Score'

In [21]:
# Raw data for a new customer (simulated input)
new_customer_raw = pd.DataFrame({
    'CustomerID': [15634602], 
    'CreditScore': [619], 'Age': [42], 'Tenure': [2], 'Balance': [0],
    'EstimatedSalary': [101348.88], 'Point Earned': [464], 'Geography': ['France'],
    'Gender': ['Female'], 'NumOfProducts': [1], 'HasCrCard': [1],
    'IsActiveMember': [1], 'Complain': [1], 'Satisfaction Score': [2], 
    'Card Type': ['DIAMOND']})
new_customer_raw

Unnamed: 0,CustomerID,CreditScore,Age,Tenure,Balance,EstimatedSalary,Point Earned,Geography,Gender,NumOfProducts,HasCrCard,IsActiveMember,Complain,Satisfaction Score,Card Type
0,15634602,619,42,2,0,101348.88,464,France,Female,1,1,1,1,2,DIAMOND


In [23]:
# Take only CustomerID
customer_id_df = new_customer_raw[['CustomerID']].copy() 
customer_id_df

Unnamed: 0,CustomerID
0,15634602


In [24]:
# --- 2. Load Deployment Assets (Model and Scaler) ---

try:
    best_model = joblib.load('model.pkl') 
    scaler = joblib.load('scaler.pkl')
    training_columns = FINAL_COLUMNS
    print("Assets (Model, Scaler, Columns) loaded successfully.")
except FileNotFoundError:
    print("Error: Ensure model.pkl and scaler.pkl files are in the same directory.")
    sys.exit(1)

Assets (Model, Scaler, Columns) loaded successfully.


In [25]:
# --- 3. Preprocessing New Data (Preventing Schema Drift) ---

new_data = new_customer_raw.drop(columns=['CustomerID']).copy() 

# A. Log1p Transformation 
new_data['Age'] = np.log1p(new_data['Age'])
new_data['Balance'] = np.log1p(new_data['Balance'])

# B. Ordinal Encoding 
new_data[ordinal_col_name] = new_data[ordinal_col_name].astype('category').cat.codes
new_data

Unnamed: 0,CreditScore,Age,Tenure,Balance,EstimatedSalary,Point Earned,Geography,Gender,NumOfProducts,HasCrCard,IsActiveMember,Complain,Satisfaction Score,Card Type
0,619,3.76,2,0.0,101348.88,464,France,Female,1,1,1,1,0,DIAMOND


In [26]:
# C. Scaling Numerical Features
new_data_scaled = scaler.transform(new_data[numeric_cols])
new_data_scaled_df = pd.DataFrame(new_data_scaled, columns=numeric_cols, index=new_data.index)
new_data_scaled_df

Unnamed: 0,CreditScore,Age,Tenure,Balance,EstimatedSalary,Point Earned
0,-0.34,0.45,-1.04,-1.32,0.02,-0.64


In [28]:
# D. Categorical Encoding and Concatenation
data_non_numeric = new_data.drop(columns=numeric_cols) 
new_data_ohe = pd.get_dummies(data_non_numeric, columns=categorical_cols_ohe, drop_first=True)
new_data_processed = pd.concat([new_data_scaled_df, new_data_ohe], axis=1)
new_data_processed

Unnamed: 0,CreditScore,Age,Tenure,Balance,EstimatedSalary,Point Earned,NumOfProducts,HasCrCard,IsActiveMember,Complain,Satisfaction Score
0,-0.34,0.45,-1.04,-1.32,0.02,-0.64,1,1,1,1,0


In [29]:
# E. Finalization: Reindex 
X_predict_final = new_data_processed.reindex(columns=training_columns, fill_value=0)
X_predict_final

Unnamed: 0,CreditScore,Age,Tenure,Balance,EstimatedSalary,Point Earned,NumOfProducts,HasCrCard,IsActiveMember,Complain,Satisfaction Score,Geography_Germany,Geography_Spain,Gender_Male,Card Type_GOLD,Card Type_PLATINUM,Card Type_SILVER
0,-0.34,0.45,-1.04,-1.32,0.02,-0.64,1,1,1,1,0,0,0,0,0,0,0


In [30]:
print("New customer data processed and aligned to model schema.")
print(f"Final Data Shape: {X_predict_final.shape}")

New customer data processed and aligned to model schema.
Final Data Shape: (1, 17)


In [33]:
# -------------------------------------------------------------
# --- 4. Prediction Using Loaded Model (MODIFIED) ---
# -------------------------------------------------------------
try:
    # Predict the class (0 or 1)
    prediction = best_model.predict(X_predict_final)[0]

    # Predict the probability array (e.g., [[0.9, 0.1]])
    all_probas = best_model.predict_proba(X_predict_final) 

    # Predict the probability of Churn (Class 1) - Used for summary
    proba = all_probas[0][1]

    # --- 5. Final Results (Summary) ---
    status = "CHURN" if prediction == 1 else "TIDAK CHURN (Stay)"

    print("\n--- MODEL PREDICTION RESULTS (SUMMARY) ---")
    print(f"Model ({type(best_model).__name__}) predicted:")
    print(f"Customer Status: {status}")
    print(f"Churn Probability (Class 1): {proba:.4f}")

    # -------------------------------------------------------------
    # --- 6. Create Prediction DataFrame (NEW STEP) ---
    # -------------------------------------------------------------
    
    # 1. Ambil kolom probabilitas untuk Class 1 (index 1) dan jadikan DataFrame
    predict_proba_xgb = pd.DataFrame(
        all_probas[:, 1], 
        columns=['PREDICT_PROBA_RESULT']
    )
    
    # 2. Gabungkan ID Pelanggan dengan Hasil Probabilitas
    df_final_proba = pd.concat([customer_id_df.reset_index(drop=True), predict_proba_xgb], axis=1)
    
   
    
    # Filter opsional (sesuai permintaan awal Anda, meskipun probabilitas 0 jarang terjadi di XGBoost)
    # df_filtered = df_final_proba[df_final_proba['PREDICT_PROBA_RESULT']>0]

except ValueError as e:
    print(f"\nPREDICTION FAILED.")
    print(f"Error: {e}")

In [34]:
print("\n--- PREDICTION OUTPUT DATAFRAME ---")
df_final_proba

Unnamed: 0,CustomerID,PREDICT_PROBA_RESULT
0,15634602,1.0
