In [None]:
import pandas as pd
import plotly.express as px

# Step 1: Load dataset
df = pd.read_csv("/content/ncr_ride_bookings.csv")

# Step 2: Clean and prepare data
# Replace missing values in Revenue column (Booking Value is revenue)
df['Revenue'] = df['Booking Value'].fillna(0)

# Ensure categorical fields don't have NaN
df['Vehicle Type'] = df['Vehicle Type'].fillna("Unknown")
df['Pickup Location'] = df['Pickup Location'].fillna("Unknown")
df['Booking Status'] = df['Booking Status'].fillna("Unknown")

# Filter out rows where Revenue is zero or null
df_filtered = df[df['Revenue'] > 0].copy()

# ---------------------------
# TreeMap Visualization
# ---------------------------
fig_treemap = px.treemap(
    df_filtered,
    path=['Vehicle Type', 'Pickup Location'],  # hierarchy
    values='Revenue',                          # size by revenue
    color='Revenue',                           # color scale
    color_continuous_scale='Viridis',
    title="Treemap of Revenue by Vehicle Type and Pickup Location"
)
fig_treemap.show()

# ---------------------------
# Sunburst Visualization
# ---------------------------
fig_sunburst = px.sunburst(
    df_filtered,
    path=['Booking Status', 'Vehicle Type'],  # hierarchy
    values='Revenue',
    color='Revenue',
    color_continuous_scale='Plasma',
    title="Sunburst of Revenue by Booking Status and Vehicle Type"
)
fig_sunburst.show()

In [None]:
df.describe()

Unnamed: 0,Avg VTAT,Avg CTAT,Cancelled Rides by Customer,Cancelled Rides by Driver,Incomplete Rides,Booking Value,Ride Distance,Driver Ratings,Customer Rating,Revenue
count,139500.0,102000.0,10500.0,27000.0,9000.0,102000.0,102000.0,93000.0,93000.0,150000.0
mean,8.456352,29.149636,1.0,1.0,1.0,508.295912,24.637012,4.230992,4.404584,345.64122
std,3.773564,8.902577,0.0,0.0,0.0,395.805774,14.002138,0.436871,0.437819,403.423487
min,2.0,10.0,1.0,1.0,1.0,50.0,1.0,3.0,3.0,0.0
25%,5.3,21.6,1.0,1.0,1.0,234.0,12.46,4.1,4.2,0.0
50%,8.3,28.8,1.0,1.0,1.0,414.0,23.72,4.3,4.5,244.0
75%,11.3,36.8,1.0,1.0,1.0,689.0,36.82,4.6,4.8,521.0
max,20.0,45.0,1.0,1.0,1.0,4277.0,50.0,5.0,5.0,4277.0
