# User Funnel & Drop-off Analysis

**Dataset:** `./cleaned_ecommerce_dataset.csv`

## Goal:
Analyze how users move through a standard e-commerce conversion funnel:
1) homepage/browse â†’ 2) product_view â†’ 3) add_to_cart â†’ 4) checkout â†’ 5) payment_success/purchase

**NOTE:** This code automatically detects which column represents user actions.
It works even if column names differ (e.g., "event", "page_name", "activity_type", "action").


## 0. IMPORT LIBRARIES


In [None]:
import pandas as pd
import numpy as np


## 1. LOAD DATA AND IDENTIFY ACTION COLUMN


In [None]:
df = pd.read_csv("../Data/cleaned_ecommerce_dataset.csv")

# Try to detect a user identifier column
user_candidates = [c for c in df.columns if any(k in c.lower() for k in ["user", "customer", "client", "buyer"])]
user_col = user_candidates[0] if user_candidates else None

# Try to detect action/event/activity column
action_candidates = [c for c in df.columns if any(k in c.lower() for k in ["action", "activity", "event", "page", "screen", "activity_type"])]
action_col = action_candidates[0] if action_candidates else None

print("Detected user column   :", user_col)
print("Detected action column :", action_col)

if user_col is None or action_col is None:
    raise Exception("User or action column not detected. Cannot run funnel analysis.")


## 2. DEFINE FUNNEL STEPS (order matters)


In [None]:
funnel_steps = [
    ("homepage", ["homepage", "home", "browse"]),
    ("product_view", ["product_view", "view_product", "view"]),
    ("add_to_cart", ["add_to_cart", "cart", "add"]),
    ("checkout", ["checkout", "billing", "address"]),
    ("payment_success", ["payment_success", "success", "purchase", "order_complete"])
]


## 3. ASSIGN USERS TO FUNNEL STEPS


In [None]:
# Convert all action values to lowercase to improve matching
df[action_col] = df[action_col].astype(str).str.lower()

# Track how many unique users reached each step
step_counts = {}

for step_name, keywords in funnel_steps:
    # Select actions that match any keyword for this step
    mask = df[action_col].apply(lambda x: any(k in x for k in keywords))
    users_at_step = df.loc[mask, user_col].nunique()  # unique user count
    step_counts[step_name] = users_at_step


## 4. CALCULATE DROP-OFFS BETWEEN STEPS


In [None]:
funnel_data = []
previous_users = None

for step_name in step_counts:
    users_reached = step_counts[step_name]

    if previous_users is None:  # first step
        drop_off = 0
        drop_percent = 0
    else:
        drop_off = previous_users - users_reached
        drop_percent = (drop_off / previous_users * 100) if previous_users > 0 else 0

    funnel_data.append([step_name, users_reached, drop_off, round(drop_percent, 2)])
    previous_users = users_reached

# Convert to DataFrame
funnel_df = pd.DataFrame(funnel_data, columns=["step_name", "users_reached", "drop_off_count", "drop_off_percent"])

print("\n===== USER FUNNEL ANALYSIS TABLE =====")
print(funnel_df)


## 5. INTERPRET RESULTS


In [None]:
# Identify step with highest drop-off %
max_drop_step = funnel_df.loc[1:, :].sort_values("drop_off_percent", ascending=False).iloc[0]  # skip first step

step = max_drop_step['step_name']
drop_pct = max_drop_step['drop_off_percent']

print("\n===== INTERPRETATION =====")
print(f"ðŸ”¹ Highest drop-off happens at step: **{step}** ({drop_pct}% users drop here).")

print("\nPossible reasons for this drop-off:")
if step == "product_view":
    print("- Users are not finding products interesting or relevant.")
    print("- Product images/descriptions/pricing may be unclear.")
elif step == "add_to_cart":
    print("- Users are evaluating but not ready to commit.")
    print("- Technical issues or friction in adding items (size, color, availability).")
elif step == "checkout":
    print("- Unexpected charges (shipping/taxes).")
    print("- Account login requirement before checkout.")
    print("- Complex form filling or missing payment options.")
elif step == "payment_success":
    print("- Payment gateway failure or slow loading.")
    print("- Limited payment modes / OTP errors / transaction timeout.")

print("\nBusiness Areas that may need improvement:")
if step == "product_view":
    print("â†’ Recommendation system, product detail pages, pricing clarity.")
elif step == "add_to_cart":
    print("â†’ Improve add-to-cart usability and prompts, reassure users of return/refund policy.")
elif step == "checkout":
    print("â†’ Simplify checkout form, provide multiple fast payment options, reduce hidden costs.")
elif step == "payment_success":
    print("â†’ Optimize payment gateway reliability and reduce transaction friction.")


## 6. SAVE REPORT


In [None]:
output_file = "../Data/user_funnel_report.csv"
funnel_df.to_csv(output_file, index=False)

print(f"\n===== FILE EXPORTED SUCCESSFULLY =====")
print(f"Saved as: {output_file}")
