# Modeling and interpretability

In [4]:
from sklearn.compose import ColumnTransformer 
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import plotly.express as px

In [8]:
import numpy as np
import pandas as pd

df = pd.read_csv("customer_booking.csv", encoding="ISO-8859-1").copy()

# Lead bins (create as 'lead_bin' to match cat_cols)
lead_bins = [-np.inf, 3, 7, 14, 30, 90, np.inf]
lead_bins_label = ['0-3','4-7','8-14','15-30','31-90','90+']
df['lead_bin'] = pd.cut(df['purchase_lead'].clip(lower=0),
                        bins=lead_bins, labels=lead_bins_label, include_lowest=True)

# Length of stay bins (already named 'stay_bin')
stay_bins = [-np.inf, 3, 7, 14, 30, 90, np.inf]
stay_bins_label = ['0-3','4-7','8-14','15-30','31-90','90+']
df['stay_bin'] = pd.cut(df['length_of_stay'].clip(lower=0),
                        bins=stay_bins, labels=stay_bins_label, include_lowest=True)

# Part of day (consistent label case helps later one-hot)
def day_part(h):
    if 0 <= h <= 5:   return 'Night'
    if 6 <= h <= 11:  return 'Morning'
    if 12 <= h <= 17: return 'Afternoon'
    return 'Evening'
df['daypart'] = df['flight_hour'].apply(day_part)

target = 'booking_complete'

cat_cols = [
    'lead_bin','stay_bin','daypart','sales_channel','trip_type','flight_day',
    'route','booking_origin','wants_extra_baggage','wants_preferred_seat','wants_in_flight_meals'
]
num_cols = ['purchase_lead','length_of_stay','flight_hour','flight_duration','num_passengers']

# Optional safety: strip spaces and check existence
df.columns = df.columns.str.strip()

missing = [c for c in cat_cols + num_cols if c not in df.columns]
if missing:
    raise KeyError(f"Missing columns: {missing}")

X = df[cat_cols + num_cols]
y = df[target].astype(int)


## train/test(stratified)

## Pipeline and encoding

## Baseline: Logistic Regression

## Stronger Model: Random Forest/XGBoost

## Threshold Tuning for F1

## Feature Importance (LR + Tree)

## Top 5 Drivers + 1-line interpretations

## Business Actions

## Model Card (short)