# Escalation Model ML Pipeline

This notebook provides a complete feature engineering and prediction pipeline for the escalation model.

## Pipeline Overview:
1. **Load Dataset**: Load the raw dataset from CSV
2. **Feature Engineering**: Create derived features (IS_ND, IS_MnI, IS_PFQ, IS_OSI, etc.)
3. **Data Preprocessing**: Handle missing values, type conversions, and log transformations
4. **Model Loading**: Load the pre-trained LightGBM model
5. **Inference**: Generate escalation predictions
6. **Evaluation**: Compute performance metrics (if labels available)


## 1. Environment Setup


In [1]:
# Standard library
from datetime import datetime, timedelta
import math
import warnings
warnings.filterwarnings('ignore')

# Data manipulation
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning
from sklearn.metrics import (
    roc_curve,
    precision_recall_curve,
    average_precision_score,
    roc_auc_score,
    auc,
    confusion_matrix,
    classification_report
)
import lightgbm as lgb

# Utilities
from functools import reduce
import operator
import json
import os
import pickle

print("All packages imported successfully!")


All packages imported successfully!


## 2. Configuration


In [2]:
# ============================================================
# CONFIGURATION - Modify these paths as needed
# ============================================================

# Dataset path
DATASET_PATH = '/Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/dataset/processed_dataset/dataset.csv'

# Model path
MODEL_PATH = '/Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/model/v1_lgb_1125_fold3.pkl'

# Target column
TARGET_COLUMN = 'IS_ESCALATED'

# ============================================================
# Features from (Clone) 3-v2-training.ipynb - top30_features (actually 70 features)
# ============================================================

FINAL_FEATURES = [
    'SH_CNR', 'SH_IS_CREDITS', 'DEFECT_CATEGORY', 'SH_IS_REFUND', 'SH_IS_REDELIVERY',
    'IS_MnI', 'SH_FIRST_REPORT_ISSUE', 'SH_LATEST_REPORT_ISSUE', 'MTO_ORDER_COUNT_L90D',
    'FRAUD_CNR_REQUEST_RATIO_L60D', 'MTO_ORDER_COUNT_L12M', 'SH_IS_REJET',
    'FRAUD_CNR_APPROVED_REQUESTS_COUNT_L60D', 'AVG_VP_LIFETIME', 'IS_OSI',
    'MTO_ORDER_COUNT_L28D', 'FRAUD_CNR_AMOUNT_L60D', 'ACTUAL_VP_RAW_AMT_L12M',
    'MTO_ORDER_COUNT_LIFETIME', 'CREDIT_REFUND_ORDER_COUNT_L90D', 'IS_ND',
    'IS_20_MIN_LATE', 'MOST_FREQ_MTO_COUNT', 'FRAUD_CNR_REQUEST_RATIO_L180D',
    'NEVER_DELIVERED_COUNT_L90D', 'DEFECT_DELIVERY_COUNT_L12M',
    'HIGH_QUALITY_DELIVERY_COUNT_L12M', 'NEVER_DELIVERED_COUNT_L12M',
    'ORDER_COUNT_L12M', 'IS_PFQ', 'ACTUAL_DELIVERIES_COUNT_L12M',
    'FRAUD_CNR_AMOUNT_L180D', 'AVG_VP_LIFETIME_CATEGORY', 'ORDER_COUNT_L90D',
    'MOST_FREQ_MTO_ISSUE', 'FRAUD_CNR_REQUESTED_DELIVERIES_COUNT_L180D',
    'MTO_ORDER_COUNT_L7D', 'FRAUD_CNR_APPROVED_REQUESTS_COUNT_L180D',
    'CREDIT_REFUND_ORDER_COUNT_L12M', 'ORDER_COUNT_LIFETIME',
    'ML_CX_CNR_RISK_V1_SCORE', 'LATEST_MTO_ISSUE', 'SUBTOTAL',
    'HIGH_QUALITY_DELIVERY_COUNT_L90D', 'NEVER_DELIVERED_COUNT_L28D',
    'TOTAL_ITEM_COUNT', 'IS_TOP_95_PERCENT_VP', 'FRAUD_CNR_ND_ORDERS_COUNT_L60D',
    'NEVER_DELIVERED_COUNT_LIFETIME', 'ORDER_COUNT_L28D', 'PROMOTIONS',
    'AVG_SPEND_LIFETIME', 'DEFECT_DELIVERY_COUNT_L90D', 'DEFAULT_ZIP_CODE',
    'CREDIT_REFUND_ORDER_COUNT_L28D', 'AVG_GOV_LIFETIME',
    'CREDIT_REFUND_ORDER_COUNT_LIFETIME', 'TOTAL_MAIN_VISITOR_COUNT_L90D',
    'NEVER_DELIVERED_COUNT_L7D', 'FRAUD_CNR_ISSUANCE_AMOUNT_LIFETIME',
    'DEFECT_DELIVERY_COUNT_LIFETIME', 'HIGH_QUALITY_DELIVERY_COUNT_L28D',
    'FRAUD_CNR_GOV_AMOUNT_LIFETIME', 'SUBMIT_PLATFORM', 'AVG_SPEND_LIFETIME_CATEGORY',
    'IS_ELITE_CX', 'HOMEPAGE_SESSION_COUNT_L90D', 'EARLY_MORNING_COUNT_RATIO_LIFETIME',
    'CANCEL_COUNT_LIFETIME', 'HIGH_QUALITY_DELIVERY_COUNT_LIFETIME'
]

# Categorical features (intersection of top30_features and cat_features from training)
# These are the categorical features from the training that are present in FINAL_FEATURES
CATEGORICAL_FEATURES = [
    'DEFECT_CATEGORY', 'SH_FIRST_REPORT_ISSUE', 'SH_LATEST_REPORT_ISSUE',
    'DEFAULT_ZIP_CODE', 'IS_TOP_95_PERCENT_VP', 'AVG_SPEND_LIFETIME_CATEGORY',
    'AVG_VP_LIFETIME_CATEGORY', 'MOST_FREQ_MTO_ISSUE', 'LATEST_MTO_ISSUE', 'SUBMIT_PLATFORM'
]

# Numeric features (all FINAL_FEATURES except CATEGORICAL_FEATURES)
NUMERIC_FEATURES = [f for f in FINAL_FEATURES if f not in CATEGORICAL_FEATURES]

print(f"Total features: {len(FINAL_FEATURES)}")
print(f"Categorical features: {len(CATEGORICAL_FEATURES)}")
print(f"Numeric features: {len(NUMERIC_FEATURES)}")


Total features: 70
Categorical features: 10
Numeric features: 60


## 3. Load Dataset


In [3]:
def load_dataset(path):
    """
    Load dataset from CSV file with appropriate dtype handling.
    
    Args:
        path: Path to the CSV file
        
    Returns:
        pd.DataFrame: Loaded dataset
    """
    print(f"Loading dataset from: {path}")
    df = pd.read_csv(path, low_memory=False)
    
    # Convert DELIVERY_ID to float64 for consistency
    if 'DELIVERY_ID' in df.columns:
        df['DELIVERY_ID'] = df['DELIVERY_ID'].astype(np.float64)
    
    print(f"Dataset shape: {df.shape}")
    print(f"Columns: {len(df.columns)}")
    
    return df

# Load the dataset
df = load_dataset(DATASET_PATH)
df.head()


Loading dataset from: /Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/dataset/processed_dataset/dataset.csv
Dataset shape: (1414041, 195)
Columns: 195


Unnamed: 0.1,Unnamed: 0,DELIVERY_ID,CX_ID,STORE_ID,DEFECT_DATE,DEFECT_CATEGORY,DEFECT_TIMESTAMP_UTC,IS_DP_CX,IS_ELITE_CX,IS_WHALE_CX,...,IS_ESCALATED,ESCALATION_TIME,AGENT_ISSUED_AC,CHATBOT_ISSUED_AC,SH_ISSUED_AC,CONVERSATION,AGENT_ISSUED_AC_FLAG,ACTUAL_AC_CONVERSATION,Parsed_AC,CONVERSATION_HUMAN_AGENT
0,0,217985700000.0,1949223925,29736287,2025-09-05,Unknown or Unspecified Issue,2025-09-05 18:10:40.536000000,1,0,0,...,1,2025-09-05 18:14:48.000000000,-1.0,-1.0,-1.0,"Chatbot: Hi Deepak, I'm your DoorDash virtual ...",0,0,0.0,1
1,2,356023100000.0,1884080059,1130627,2025-09-03,Unknown or Unspecified Issue,2025-09-03 17:18:12.930000000,0,0,0,...,1,2025-09-03 19:59:40.000000000,-1.0,-1.0,-1.0,"Chatbot: Hi aj, I'm your DoorDash virtual assi...",0,0,0.0,1
2,11,262182600000.0,76381204,1130627,2025-09-03,Missing or Incorrect Items,2025-09-03 23:13:03.000000000,1,0,0,...,1,2025-09-03 23:13:03.000000000,-1.0,-1.0,-1.0,"Chatbot: Hi Luke, I'm your DoorDash virtual as...",0,0,0.0,1
3,12,276872300000.0,258188252,1130627,2025-09-03,Unknown or Unspecified Issue,2025-09-04 01:42:29.205000000,0,0,0,...,1,2025-09-04 01:43:06.000000000,-1.0,-1.0,-1.0,"Chatbot: Hi Colin, I'm your DoorDash virtual a...",0,0,0.0,1
4,43,318424200000.0,1965318695,281439,2025-09-06,Unknown or Unspecified Issue,2025-09-06 17:12:46.960000000,1,0,0,...,1,2025-09-07 14:43:45.988205000,-1.0,-1.0,-1.0,"Chatbot: Hi Cassandra, I'm your DoorDash virtu...",0,0,0.0,1


In [4]:
delivery_ids = df['DELIVERY_ID'].unique().tolist()

In [5]:
# Display basic dataset info
print("=" * 60)
print("DATASET INFO")
print("=" * 60)
print(f"Total rows: {len(df):,}")
print(f"Total columns: {len(df.columns)}")
print(f"\nTarget column distribution ({TARGET_COLUMN}):")
if TARGET_COLUMN in df.columns:
    print(df[TARGET_COLUMN].value_counts())
    print(f"\nEscalation rate: {df[TARGET_COLUMN].mean():.2%}")
else:
    print(f"Warning: Target column '{TARGET_COLUMN}' not found in dataset")


DATASET INFO
Total rows: 1,414,041
Total columns: 195

Target column distribution (IS_ESCALATED):
IS_ESCALATED
0    867967
1    546074
Name: count, dtype: int64

Escalation rate: 38.62%


## 4. Feature Engineering


In [6]:
def create_defect_category_features(df):
    """
    Create binary indicator features from DEFECT_CATEGORY.
    
    Features created:
    - IS_ND: Never Delivered
    - IS_MnI: Missing or Incorrect Items
    - IS_PFQ: Order Quality Issue (Poor Food Quality)
    - IS_OSI: Order Status Inquiry
    - IS_LATE: Delivery Too Late / Early
    - IS_WOD: Wrong Order Received
    
    Args:
        df: Input DataFrame
        
    Returns:
        pd.DataFrame: DataFrame with new features
    """
    df = df.copy()
    
    # IS_ND: Never Delivered
    df['IS_ND'] = (df['DEFECT_CATEGORY'] == 'Never Delivered').astype(int)
    
    # IS_MnI: Missing or Incorrect Items
    df['IS_MnI'] = (df['DEFECT_CATEGORY'] == 'Missing or Incorrect Items').astype(int)
    
    # IS_PFQ: Order Quality Issue (Poor Food Quality)
    df['IS_PFQ'] = (df['DEFECT_CATEGORY'] == 'Order Quality Issue').astype(int)
    
    # IS_OSI: Order Status Inquiry
    df['IS_OSI'] = (df['DEFECT_CATEGORY'] == 'Order Status Inquiry').astype(int)
    
    # IS_LATE: Delivery Too Late / Early
    df['IS_LATE'] = (df['DEFECT_CATEGORY'] == 'Delivery Too Late / Early').astype(int)
    
    # IS_WOD: Wrong Order Received
    df['IS_WOD'] = (df['DEFECT_CATEGORY'] == 'Wrong Order Received').astype(int)
    
    print("Created defect category features:")
    print(f"  - IS_ND (Never Delivered): {df['IS_ND'].sum():,} cases")
    print(f"  - IS_MnI (Missing/Incorrect): {df['IS_MnI'].sum():,} cases")
    print(f"  - IS_PFQ (Quality Issue): {df['IS_PFQ'].sum():,} cases")
    print(f"  - IS_OSI (Status Inquiry): {df['IS_OSI'].sum():,} cases")
    print(f"  - IS_LATE (Late/Early): {df['IS_LATE'].sum():,} cases")
    print(f"  - IS_WOD (Wrong Order): {df['IS_WOD'].sum():,} cases")
    
    return df


# Apply feature engineering
df = create_defect_category_features(df)


Created defect category features:
  - IS_ND (Never Delivered): 39,357 cases
  - IS_MnI (Missing/Incorrect): 78,908 cases
  - IS_PFQ (Quality Issue): 51,102 cases
  - IS_OSI (Status Inquiry): 155,767 cases
  - IS_LATE (Late/Early): 23,334 cases
  - IS_WOD (Wrong Order): 13,120 cases


In [7]:
def handle_missing_values(df, features):
    """
    Handle missing values for the specified features.
    
    Strategy:
    - Numeric features: Fill with 0 (or median if preferred)
    - Categorical features: Fill with 'Unknown' or mode
    
    Args:
        df: Input DataFrame
        features: List of feature columns to process
        
    Returns:
        pd.DataFrame: DataFrame with handled missing values
    """
    df = df.copy()
    
    # Check for missing features
    missing_features = [f for f in features if f not in df.columns]
    if missing_features:
        print(f"Warning: The following features are missing from the dataset:")
        for f in missing_features:
            print(f"  - {f}")
    
    # Get available features
    available_features = [f for f in features if f in df.columns]
    
    print(f"\nHandling missing values for {len(available_features)} features...")
    
    null_filled_count = 0
    for col in available_features:
        null_count = df[col].isnull().sum()
        if null_count > 0:
            null_pct = null_count / len(df) * 100
            null_filled_count += 1
            
            if col in CATEGORICAL_FEATURES:
                # For categorical: fill with 'Unknown'
                df[col] = df[col].fillna('Unknown')
                print(f"  {col}: {null_count:,} nulls ({null_pct:.1f}%) -> filled with 'Unknown'")
            else:
                # For numeric: fill with 0
                df[col] = df[col].fillna(0)
                print(f"  {col}: {null_count:,} nulls ({null_pct:.1f}%) -> filled with 0")
    
    if null_filled_count == 0:
        print("  No missing values found in the feature columns!")
    
    return df

# Handle missing values
df = handle_missing_values(df, FINAL_FEATURES)



Handling missing values for 70 features...
  SH_IS_CREDITS: 1,204,982 nulls (85.2%) -> filled with 0
  SH_IS_REFUND: 1,204,982 nulls (85.2%) -> filled with 0
  SH_IS_REDELIVERY: 1,204,982 nulls (85.2%) -> filled with 0
  SH_FIRST_REPORT_ISSUE: 1,204,982 nulls (85.2%) -> filled with 'Unknown'
  SH_LATEST_REPORT_ISSUE: 1,204,982 nulls (85.2%) -> filled with 'Unknown'
  MTO_ORDER_COUNT_L90D: 476,493 nulls (33.7%) -> filled with 0
  FRAUD_CNR_REQUEST_RATIO_L60D: 10,609 nulls (0.8%) -> filled with 0
  MTO_ORDER_COUNT_L12M: 476,493 nulls (33.7%) -> filled with 0
  SH_IS_REJET: 1,204,982 nulls (85.2%) -> filled with 0
  FRAUD_CNR_APPROVED_REQUESTS_COUNT_L60D: 9,625 nulls (0.7%) -> filled with 0
  AVG_VP_LIFETIME: 5,248 nulls (0.4%) -> filled with 0
  MTO_ORDER_COUNT_L28D: 476,493 nulls (33.7%) -> filled with 0
  FRAUD_CNR_AMOUNT_L60D: 60,099 nulls (4.3%) -> filled with 0
  ACTUAL_VP_RAW_AMT_L12M: 7,455 nulls (0.5%) -> filled with 0
  MTO_ORDER_COUNT_LIFETIME: 476,493 nulls (33.7%) -> filled 

## 5. Data Type Conversions


In [8]:
def convert_feature_dtypes(df, categorical_features, numeric_features):
    """
    Convert features to appropriate data types for LightGBM.
    
    NOTE: For LightGBM Booster objects (not LGBMClassifier), we need to be careful
    with categorical features. The categories must match exactly what was seen 
    during training. To avoid category mismatch errors, we keep categorical 
    features as strings and let LightGBM handle them during prediction.
    
    Args:
        df: Input DataFrame
        categorical_features: List of categorical feature names
        numeric_features: List of numeric feature names
        
    Returns:
        pd.DataFrame: DataFrame with converted dtypes
    """
    df = df.copy()
    
    # Convert categorical features to string (NOT pandas category)
    # This avoids "train and valid dataset categorical_feature do not match" error
    # when using LightGBM Booster objects
    available_cat = [f for f in categorical_features if f in df.columns]
    for col in available_cat:
        df[col] = df[col].astype(str)
    print(f"Converted {len(available_cat)} categorical features to 'str' dtype")
    
    # Convert numeric features
    available_num = [f for f in numeric_features if f in df.columns]
    for col in available_num:
        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0).astype('float64')
    print(f"Converted {len(available_num)} numeric features to 'float64' dtype")
    
    return df

# Convert data types
df = convert_feature_dtypes(df, CATEGORICAL_FEATURES, NUMERIC_FEATURES)


Converted 10 categorical features to 'str' dtype
Converted 60 numeric features to 'float64' dtype


## 6. Log Transformations (For Skewed Features)


In [9]:
def identify_skewed_features(df, numeric_features, skew_threshold=1.0):
    """
    Identify highly skewed numeric features.
    
    Args:
        df: Input DataFrame
        numeric_features: List of numeric feature names
        skew_threshold: Threshold for considering a feature as skewed
        
    Returns:
        tuple: (skewed_cols, negative_cols)
    """
    available_num = [f for f in numeric_features if f in df.columns]
    
    # Calculate skewness
    skew_values = df[available_num].skew()
    
    # Identify skewed columns
    skewed_cols = skew_values[skew_values > skew_threshold].index.tolist()
    
    # Identify columns with negative values (need special handling)
    negative_cols = []
    for col in skewed_cols:
        if (df[col] < 0).any():
            negative_cols.append(col)
    
    # Remove negative columns from skewed_cols for standard log1p
    skewed_cols = [col for col in skewed_cols if col not in negative_cols]
    
    print(f"Identified {len(skewed_cols)} skewed features for log1p transformation")
    print(f"Identified {len(negative_cols)} features with negative values (sign-preserving log)")
    
    return skewed_cols, negative_cols

# Identify skewed features
skewed_cols, negative_cols = identify_skewed_features(df, NUMERIC_FEATURES)


Identified 52 skewed features for log1p transformation
Identified 5 features with negative values (sign-preserving log)


In [10]:
def apply_log_transformations(df, skewed_cols, negative_cols):
    """
    Apply log transformations to skewed features.
    
    - For non-negative skewed columns: log1p transformation
    - For columns with negative values: sign-preserving log transformation
    
    Args:
        df: Input DataFrame
        skewed_cols: Columns for standard log1p
        negative_cols: Columns needing sign-preserving transformation
        
    Returns:
        pd.DataFrame: Transformed DataFrame
    """
    df = df.copy()
    
    print("Applying log transformations...")
    
    # Standard log1p for non-negative skewed columns
    transformed_count = 0
    for col in skewed_cols:
        if col in df.columns:
            df[col] = np.log1p(df[col].astype(float))
            transformed_count += 1
    print(f"  Applied log1p to {transformed_count} features")
    
    # Sign-preserving log for columns with negative values
    sign_transformed_count = 0
    for col in negative_cols:
        if col in df.columns:
            df[col] = np.sign(df[col]) * np.log1p(np.abs(df[col].astype(float)))
            sign_transformed_count += 1
    print(f"  Applied sign-preserving log to {sign_transformed_count} features")
    
    return df

# Apply log transformations
df.to_csv('/Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/dataset/processed_dataset/dataset_non_log.csv')
df_transformed = apply_log_transformations(df, skewed_cols, negative_cols)


Applying log transformations...
  Applied log1p to 52 features
  Applied sign-preserving log to 5 features


## 7. Feature Validation


In [25]:
def validate_features(df, required_features):
    """
    Validate that all required features are present in the DataFrame.
    
    Args:
        df: Input DataFrame
        required_features: List of required feature names
        
    Returns:
        tuple: (available_features, missing_features)
    """
    available = [f for f in required_features if f in df.columns]
    missing = [f for f in required_features if f not in df.columns]
    
    print("=" * 60)
    print("FEATURE VALIDATION REPORT")
    print("=" * 60)
    print(f"Required features: {len(required_features)}")
    print(f"Available features: {len(available)}")
    print(f"Missing features: {len(missing)}")
    
    if missing:
        print(f"\nMissing features:")
        for f in missing:
            print(f"  - {f}")
    else:
        print("\n✓ All required features are available!")
    
    return available, missing

# Validate features
available_features, missing_features = validate_features(df_transformed, FINAL_FEATURES)


FEATURE VALIDATION REPORT
Required features: 70
Available features: 70
Missing features: 0

✓ All required features are available!


In [26]:
# Display feature statistics
print("\nFeature Statistics (Sample of first 10 features):")
print("=" * 60)
sample_features = available_features[:10]  # Show first 10 features
df_transformed[sample_features].describe()



Feature Statistics (Sample of first 10 features):


Unnamed: 0,SH_CNR,SH_IS_CREDITS,SH_IS_REFUND,SH_IS_REDELIVERY,IS_MnI,MTO_ORDER_COUNT_L90D,FRAUD_CNR_REQUEST_RATIO_L60D
count,1414041.0,1414041.0,1414041.0,1414041.0,1414041.0,1414041.0,1414041.0
mean,-0.2739978,0.07704495,0.02217811,0.003255344,0.03867982,0.3799907,0.1169246
std,1.048549,0.2178706,0.121987,0.04739026,0.1591059,0.7019965,0.1422012
min,-0.6931472,0.0,0.0,0.0,0.0,0.0,0.0
25%,-0.6931472,0.0,0.0,0.0,0.0,0.0,0.0
50%,-0.6931472,0.0,0.0,0.0,0.0,0.0,0.07550755
75%,-0.6931472,0.0,0.0,0.0,0.0,0.6931472,0.1670541
max,5.809883,0.6931472,0.6931472,0.6931472,0.6931472,5.384495,1.403994


## 8. Load Pre-trained Model


In [27]:
def load_model(model_path):
    """
    Load pre-trained LightGBM model from pickle file.
    
    Args:
        model_path: Path to the pickle file
        
    Returns:
        Loaded model object
    """
    print(f"Loading model from: {model_path}")
    
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model file not found: {model_path}")
    
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    
    print(f"Model loaded successfully!")
    print(f"Model type: {type(model).__name__}")
    
    return model

# Load the model
model = load_model(MODEL_PATH)


Loading model from: /Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/model/v1_lgb_1125_fold3.pkl
Model loaded successfully!
Model type: Booster


In [28]:
# Display model information
print("\nModel Information:")
print("=" * 60)

# Try to get model features (depends on model type)
try:
    if hasattr(model, 'feature_name_'):
        model_features = model.feature_name_
        print(f"Model expects {len(model_features)} features")
        print(f"First 10 features: {model_features[:10]}")
    elif hasattr(model, 'feature_name'):
        model_features = model.feature_name()
        print(f"Model expects {len(model_features)} features")
        print(f"First 10 features: {model_features[:10]}")
    else:
        print("Could not retrieve model feature names directly")
        print("Will use FINAL_FEATURES configuration")
except Exception as e:
    print(f"Error getting model info: {e}")



Model Information:
Model expects 48 features
First 10 features: ['SH_LATEST_REPORT_ISSUE', 'TOTAL_ITEM_COUNT', 'SH_FIRST_REPORT_ISSUE', 'PAYMENT_METHOD', 'DEFECT_CATEGORY', 'SH_CNR', 'FRAUD_CNR_APPROVED_REQUESTS_COUNT_L60D', 'SH_IS_CREDITS', 'SUBTOTAL', 'TIP']


In [29]:
# Inspect the model to understand its structure
print("=" * 60)
print("MODEL INSPECTION")
print("=" * 60)

# Get feature names from the model
model_feature_names = model.feature_name()
print(f"\nModel was trained with {len(model_feature_names)} features")

# Check if our features match the model features
model_feature_set = set(model_feature_names)
our_feature_set = set(FINAL_FEATURES)

matching = model_feature_set.intersection(our_feature_set)
in_model_not_ours = model_feature_set - our_feature_set
in_ours_not_model = our_feature_set - model_feature_set

print(f"\nFeature comparison:")
print(f"  - Features matching: {len(matching)}")
print(f"  - In model but not in our list: {len(in_model_not_ours)}")
print(f"  - In our list but not in model: {len(in_ours_not_model)}")

if in_model_not_ours:
    print(f"\nFeatures in model but not in our FINAL_FEATURES:")
    for f in sorted(in_model_not_ours):
        print(f"  - {f}")

if in_ours_not_model:
    print(f"\nFeatures in our FINAL_FEATURES but not in model:")
    for f in sorted(in_ours_not_model):
        print(f"  - {f}")

# IMPORTANT: Use the model's feature list for prediction
print(f"\n{'='*60}")
print("USING MODEL'S FEATURE LIST FOR PREDICTION")
print("="*60)
available_features = model_feature_names
print(f"Will use these {len(available_features)} features in exact order from model")


MODEL INSPECTION

Model was trained with 48 features

Feature comparison:
  - Features matching: 34
  - In model but not in our list: 14
  - In our list but not in model: 36

Features in model but not in our FINAL_FEATURES:
  - AVG_TIP_LIFETIME
  - CREDIT_REFUND_ORDER_COST_LIFETIME
  - FRAUD_CNR_REQUEST_RATIO_LD_L180D
  - FREQ_CATEGORY
  - IF_DASHER_MANUAL_ASSIGNED
  - IS_LATE
  - IS_NON_ABUSER
  - IS_WHALE_CX
  - ORDER_COUNT_L7D
  - ORDER_TIME_OF_DAY
  - PAYMENT_METHOD
  - SH_CNR_RATIO
  - TIP
  - TIP_PERCENTAGE

Features in our FINAL_FEATURES but not in model:
  - ACTUAL_DELIVERIES_COUNT_L12M
  - AVG_GOV_LIFETIME
  - AVG_SPEND_LIFETIME_CATEGORY
  - AVG_VP_LIFETIME
  - AVG_VP_LIFETIME_CATEGORY
  - CANCEL_COUNT_LIFETIME
  - CREDIT_REFUND_ORDER_COUNT_L12M
  - CREDIT_REFUND_ORDER_COUNT_L28D
  - CREDIT_REFUND_ORDER_COUNT_L90D
  - CREDIT_REFUND_ORDER_COUNT_LIFETIME
  - DEFECT_DELIVERY_COUNT_L12M
  - DEFECT_DELIVERY_COUNT_L90D
  - DEFECT_DELIVERY_COUNT_LIFETIME
  - EARLY_MORNING_COUNT_RATIO

## 9. Generate Predictions


In [30]:
def generate_predictions(model, df, features, categorical_features):
    """
    Generate predictions using the trained LightGBM Booster model.
    
    The key to avoiding "train and valid dataset categorical_feature do not match" error
    is to convert the DataFrame to a numpy array. LightGBM Booster objects store
    categorical feature information from training, and when you pass a pandas DataFrame,
    it checks if the categories match. By converting to numpy array, we bypass this check.
    
    Args:
        model: Trained LightGBM model (Booster object)
        df: DataFrame with features
        features: List of feature names to use (in exact order expected by model)
        categorical_features: List of categorical feature names
        
    Returns:
        np.array: Prediction probabilities
    """
    from sklearn.preprocessing import LabelEncoder
    
    # Check which features are available
    available = [f for f in features if f in df.columns]
    missing = [f for f in features if f not in df.columns]
    
    print(f"Required features: {len(features)}")
    print(f"Available features: {len(available)}")
    
    if missing:
        print(f"\nWARNING: Missing {len(missing)} features!")
        print("Missing features:", missing)
        print("\nCreating missing features with default values...")
        for f in missing:
            if f in categorical_features:
                df[f] = 'Unknown'
            else:
                df[f] = 0.0
        available = features  # Now all features should be available
    
    # Prepare feature matrix - IMPORTANT: maintain exact column order from model
    X = df[features].copy()
    
    # Encode categorical features as numeric (required for numpy array approach)
    label_encoders = {}
    cat_cols_in_features = [c for c in features if c in categorical_features]
    
    for col in features:
        if col in categorical_features:
            le = LabelEncoder()
            X[col] = X[col].fillna('Unknown').astype(str)
            X[col] = le.fit_transform(X[col])
            label_encoders[col] = le
        else:
            X[col] = pd.to_numeric(X[col], errors='coerce').fillna(0).astype(np.float64)
    
    print(f"\nGenerating predictions for {len(X):,} samples...")
    print(f"Using {len(features)} features in model's expected order")
    print(f"Encoded {len(label_encoders)} categorical features as numeric")
    
    # Convert to numpy array - this bypasses pandas categorical mismatch issues
    X_array = X.values.astype(np.float64)
    
    # Generate predictions
    pred_probs = model.predict(X_array)
    
    print(f"\n✓ Predictions generated successfully!")
    print(f"Prediction range: [{pred_probs.min():.4f}, {pred_probs.max():.4f}]")
    print(f"Mean prediction: {pred_probs.mean():.4f}")
    
    return pred_probs

# Generate predictions using the model's exact feature list
predictions = generate_predictions(model, df_transformed, available_features, CATEGORICAL_FEATURES)


Required features: 48
Available features: 47

Missing features: ['SH_CNR_RATIO']

Creating missing features with default values...

Generating predictions for 1,414,041 samples...
Using 48 features in model's expected order
Encoded 7 categorical features as numeric

✓ Predictions generated successfully!
Prediction range: [0.0016, 0.8534]
Mean prediction: 0.1323


In [31]:
# Add predictions to DataFrame
df_transformed['PREDICTED_ESCALATION_PROB'] = predictions
df_transformed['PREDICTED_ESCALATION'] = (predictions >= 0.5).astype(int)

print("\nPrediction Distribution:")
print(df_transformed['PREDICTED_ESCALATION'].value_counts())
print(f"\nPredicted escalation rate: {df_transformed['PREDICTED_ESCALATION'].mean():.2%}")



Prediction Distribution:
PREDICTED_ESCALATION
0    1400910
1      13131
Name: count, dtype: int64

Predicted escalation rate: 0.93%


In [38]:
df_transformed['DELIVERY_ID'] = delivery_ids
df_transformed[['DELIVERY_ID']+ FINAL_FEATURES + ['PREDICTED_ESCALATION_PROB','PREDICTED_ESCALATION']].to_csv('/Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/dataset/processed_dataset/dataset_predictions.csv')

In [39]:
df_transformed.head()

Unnamed: 0.1,Unnamed: 0,DELIVERY_ID,CX_ID,STORE_ID,DEFECT_DATE,DEFECT_CATEGORY,DEFECT_TIMESTAMP_UTC,IS_DP_CX,IS_ELITE_CX,IS_WHALE_CX,...,CONVERSATION_HUMAN_AGENT,IS_ND,IS_MnI,IS_PFQ,IS_OSI,IS_LATE,IS_WOD,SH_CNR_RATIO,PREDICTED_ESCALATION_PROB,PREDICTED_ESCALATION
0,0,217985700000.0,1949223925,29736287,2025-09-05,Unknown or Unspecified Issue,2025-09-05 18:10:40.536000000,1,0.0,0,...,1,0.0,0.0,0.0,0.0,0,0,0.0,0.102361,0
1,2,356023100000.0,1884080059,1130627,2025-09-03,Unknown or Unspecified Issue,2025-09-03 17:18:12.930000000,0,0.0,0,...,1,0.0,0.0,0.0,0.0,0,0,0.0,0.008399,0
2,11,262182600000.0,76381204,1130627,2025-09-03,Missing or Incorrect Items,2025-09-03 23:13:03.000000000,1,0.0,0,...,1,0.0,0.693147,0.0,0.0,0,0,0.0,0.349462,0
3,12,276872300000.0,258188252,1130627,2025-09-03,Unknown or Unspecified Issue,2025-09-04 01:42:29.205000000,0,0.0,0,...,1,0.0,0.0,0.0,0.0,0,0,0.0,0.198509,0
4,43,318424200000.0,1965318695,281439,2025-09-06,Unknown or Unspecified Issue,2025-09-06 17:12:46.960000000,1,0.0,0,...,1,0.0,0.0,0.0,0.0,0,0,0.0,0.18531,0


In [None]:
#df_transformed[['DELIVERY_ID','PREDICTED_ESCALATION']].to_csv('/Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/dataset/processed_dataset/dataset_predictions.csv')

## 10. Model Evaluation (If Labels Available)


In [None]:
def evaluate_model(df, target_col, pred_prob_col):
    """
    Evaluate model performance if true labels are available.
    
    Args:
        df: DataFrame with predictions and labels
        target_col: Name of the target column
        pred_prob_col: Name of the prediction probability column
        
    Returns:
        dict: Performance metrics
    """
    if target_col not in df.columns:
        print(f"Target column '{target_col}' not found. Skipping evaluation.")
        return None
    
    # Get valid samples (non-null labels)
    valid_mask = df[target_col].notna()
    y_true = df.loc[valid_mask, target_col].astype(int)
    y_pred_prob = df.loc[valid_mask, pred_prob_col]
    
    print(f"Evaluating on {len(y_true):,} samples with valid labels")
    print("=" * 60)
    
    # ROC AUC
    roc_auc = roc_auc_score(y_true, y_pred_prob)
    print(f"ROC AUC: {roc_auc:.4f}")
    
    # PR AUC
    precision, recall, _ = precision_recall_curve(y_true, y_pred_prob)
    pr_auc = auc(recall, precision)
    print(f"PR AUC: {pr_auc:.4f}")
    
    # Metrics at different thresholds
    print("\nPrecision & Recall at different thresholds:")
    threshold_list = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
    results = []
    
    for th in threshold_list:
        y_pred = (y_pred_prob >= th).astype(int)
        tp = ((y_pred == 1) & (y_true == 1)).sum()
        fp = ((y_pred == 1) & (y_true == 0)).sum()
        fn = ((y_pred == 0) & (y_true == 1)).sum()
        
        precision_val = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall_val = tp / (tp + fn) if (tp + fn) > 0 else 0
        
        results.append({
            'threshold': th,
            'precision': round(precision_val, 4),
            'recall': round(recall_val, 4)
        })
    
    metrics_df = pd.DataFrame(results)
    print(metrics_df.to_string(index=False))
    
    return {
        'roc_auc': roc_auc,
        'pr_auc': pr_auc,
        'threshold_metrics': results
    }

# Evaluate model
metrics = evaluate_model(df_transformed, TARGET_COLUMN, 'PREDICTED_ESCALATION_PROB')


In [None]:
# Plot ROC and PR curves
if TARGET_COLUMN in df_transformed.columns:
    valid_mask = df_transformed[TARGET_COLUMN].notna()
    y_true = df_transformed.loc[valid_mask, TARGET_COLUMN].astype(int)
    y_pred_prob = df_transformed.loc[valid_mask, 'PREDICTED_ESCALATION_PROB']
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # ROC Curve
    fpr, tpr, _ = roc_curve(y_true, y_pred_prob)
    roc_auc = roc_auc_score(y_true, y_pred_prob)
    
    axes[0].plot(fpr, tpr, 'b-', linewidth=2, label=f'ROC (AUC = {roc_auc:.3f})')
    axes[0].plot([0, 1], [0, 1], 'r--', linewidth=1, label='Random')
    axes[0].set_xlabel('False Positive Rate', fontsize=12)
    axes[0].set_ylabel('True Positive Rate', fontsize=12)
    axes[0].set_title('ROC Curve', fontsize=14)
    axes[0].legend(loc='lower right')
    axes[0].grid(True, alpha=0.3)
    
    # PR Curve
    precision, recall, _ = precision_recall_curve(y_true, y_pred_prob)
    pr_auc = auc(recall, precision)
    
    axes[1].plot(recall, precision, 'b-', linewidth=2, label=f'PR (AUC = {pr_auc:.3f})')
    axes[1].set_xlabel('Recall', fontsize=12)
    axes[1].set_ylabel('Precision', fontsize=12)
    axes[1].set_title('Precision-Recall Curve', fontsize=14)
    axes[1].legend(loc='lower left')
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("Target column not available. Skipping curve plots.")


In [None]:
# Plot prediction probability distribution
if TARGET_COLUMN in df_transformed.columns:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    valid_mask = df_transformed[TARGET_COLUMN].notna()
    df_valid = df_transformed[valid_mask]
    
    # Histogram by class
    for label, color in [(0, 'blue'), (1, 'red')]:
        subset = df_valid[df_valid[TARGET_COLUMN] == label]['PREDICTED_ESCALATION_PROB']
        axes[0].hist(subset, bins=50, alpha=0.6, color=color, 
                     label=f'Class {label}', density=True)
    
    axes[0].set_xlabel('Predicted Probability', fontsize=12)
    axes[0].set_ylabel('Density', fontsize=12)
    axes[0].set_title('Prediction Probability Distribution', fontsize=14)
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Box plot by class
    df_valid.boxplot(column='PREDICTED_ESCALATION_PROB', by=TARGET_COLUMN, ax=axes[1])
    axes[1].set_xlabel('Actual Label', fontsize=12)
    axes[1].set_ylabel('Predicted Probability', fontsize=12)
    axes[1].set_title('Prediction Distribution by Class', fontsize=14)
    plt.suptitle('')  # Remove automatic title
    
    plt.tight_layout()
    plt.show()
else:
    # Just show overall distribution
    plt.figure(figsize=(8, 5))
    plt.hist(df_transformed['PREDICTED_ESCALATION_PROB'], bins=50, alpha=0.7, color='blue')
    plt.xlabel('Predicted Probability', fontsize=12)
    plt.ylabel('Count', fontsize=12)
    plt.title('Prediction Probability Distribution', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()


## 11. Results Summary


In [None]:
# Display final results summary
print("=" * 60)
print("ESCALATION MODEL PIPELINE - RESULTS SUMMARY")
print("=" * 60)
print(f"\nDataset: {DATASET_PATH}")
print(f"Model: {MODEL_PATH}")
print(f"\nTotal samples processed: {len(df_transformed):,}")
print(f"Features used: {len(available_features)}")

print(f"\nPrediction Statistics:")
print(f"  - Mean probability: {df_transformed['PREDICTED_ESCALATION_PROB'].mean():.4f}")
print(f"  - Median probability: {df_transformed['PREDICTED_ESCALATION_PROB'].median():.4f}")
print(f"  - Std deviation: {df_transformed['PREDICTED_ESCALATION_PROB'].std():.4f}")
print(f"  - Min: {df_transformed['PREDICTED_ESCALATION_PROB'].min():.4f}")
print(f"  - Max: {df_transformed['PREDICTED_ESCALATION_PROB'].max():.4f}")

print(f"\nPredicted Escalations (threshold=0.5):")
print(f"  - Escalated: {(df_transformed['PREDICTED_ESCALATION'] == 1).sum():,}")
print(f"  - Not Escalated: {(df_transformed['PREDICTED_ESCALATION'] == 0).sum():,}")
print(f"  - Predicted escalation rate: {df_transformed['PREDICTED_ESCALATION'].mean():.2%}")

if metrics:
    print(f"\nModel Performance:")
    print(f"  - ROC AUC: {metrics['roc_auc']:.4f}")
    print(f"  - PR AUC: {metrics['pr_auc']:.4f}")


In [None]:
# Display sample predictions
print("\nSample Predictions:")
print("=" * 60)

display_cols = ['DELIVERY_ID', 'DEFECT_CATEGORY', 'PREDICTED_ESCALATION_PROB', 'PREDICTED_ESCALATION']
if TARGET_COLUMN in df_transformed.columns:
    display_cols.append(TARGET_COLUMN)

available_display = [c for c in display_cols if c in df_transformed.columns]
df_transformed[available_display].head(10)


## 12. Export Results (Optional)


In [None]:
# Uncomment to export predictions to CSV
# output_path = '/Users/shekhar.tanwar/Documents/Projects/NegotiatonAgent/dataset/predictions/escalation_predictions.csv'
# 
# export_cols = ['DELIVERY_ID', 'DEFECT_CATEGORY', 'PREDICTED_ESCALATION_PROB', 'PREDICTED_ESCALATION']
# if TARGET_COLUMN in df_transformed.columns:
#     export_cols.append(TARGET_COLUMN)
# 
# # Create output directory if it doesn't exist
# os.makedirs(os.path.dirname(output_path), exist_ok=True)
# 
# df_transformed[export_cols].to_csv(output_path, index=False)
# print(f"Predictions exported to: {output_path}")


---

## Pipeline Complete!

This notebook has:
1. ✅ Loaded the dataset from CSV
2. ✅ Created derived features (IS_ND, IS_MnI, IS_PFQ, IS_OSI, etc.)
3. ✅ Handled missing values
4. ✅ Converted data types for LightGBM
5. ✅ Applied log transformations to skewed features
6. ✅ Loaded the pre-trained model (v1_lgb_1125_fold3.pkl)
7. ✅ Generated escalation predictions
8. ✅ Evaluated model performance (if labels available)
9. ✅ Visualized results
