In [None]:
import pandas as pd

# Load the data
df = pd.read_csv('ncr_ride_bookings.csv')

# Inspect the data
print(df.head())
print(df.info())

import matplotlib.pyplot as plt
import seaborn as sns

# Convert Date to datetime
df['Date'] = pd.to_datetime(df['Date'])
df['Month'] = df['Date'].dt.strftime('%b') # Month abbreviation
df['Month_Num'] = df['Date'].dt.month # For sorting

# 1. Ride Status Distribution
plt.figure(figsize=(10, 6))
status_counts = df['Booking Status'].value_counts()
sns.barplot(x=status_counts.index, y=status_counts.values, palette='viridis')
plt.title('Ride Status Distribution')
plt.xlabel('Booking Status')
plt.ylabel('Number of Rides')
plt.xticks(rotation=45)
plt.savefig('ride_status_distribution.png')
plt.close()

# 2. Bookings by Vehicle Type
plt.figure(figsize=(10, 6))
vehicle_counts = df['Vehicle Type'].value_counts().sort_values(ascending=False)
sns.barplot(x=vehicle_counts.index, y=vehicle_counts.values, palette='magma')
plt.title('Most Popular Vehicle Types')
plt.xlabel('Vehicle Type')
plt.ylabel('Number of Bookings')
plt.xticks(rotation=45)
plt.savefig('vehicle_type_popularity.png')
plt.close()

# 3. Monthly Ride Volume Trend
plt.figure(figsize=(10, 6))
monthly_rides = df.groupby(['Month_Num', 'Month']).size().reset_index(name='Counts').sort_values('Month_Num')
sns.lineplot(data=monthly_rides, x='Month', y='Counts', marker='o', color='blue')
plt.title('Monthly Ride Bookings Trend (2024)')
plt.xlabel('Month')
plt.ylabel('Number of Rides')
plt.grid(True)
plt.savefig('monthly_ride_trend.png')
plt.close()

# 4. Total Revenue by Vehicle Type (only for Completed rides)
plt.figure(figsize=(10, 6))
revenue_by_vehicle = df[df['Booking Status'] == 'Completed'].groupby('Vehicle Type')['Booking Value'].sum().sort_values(ascending=False)
sns.barplot(x=revenue_by_vehicle.index, y=revenue_by_vehicle.values, palette='rocket')
plt.title('Total Revenue (Booking Value) by Vehicle Type')
plt.xlabel('Vehicle Type')
plt.ylabel('Total Revenue')
plt.xticks(rotation=45)
plt.savefig('revenue_by_vehicle.png')
plt.close()

# 5. Customer Cancellation Reasons
plt.figure(figsize=(8, 8))
cancel_reasons = df['Reason for cancelling by Customer'].value_counts()
plt.pie(cancel_reasons, labels=cancel_reasons.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette('pastel'))
plt.title('Reasons for Customer Cancellations')
plt.savefig('customer_cancellation_reasons.png')
plt.close()

# Calculate some summary stats for the response
summary_stats = {
    'Total Rides': len(df),
    'Total Completed Rides': len(df[df['Booking Status'] == 'Completed']),
    'Total Revenue': df['Booking Value'].sum(),
    'Avg Ride Distance': df['Ride Distance'].mean(),
    'Top Pickup Location': df['Pickup Location'].mode()[0],
    'Top Drop Location': df['Drop Location'].mode()[0]
}
print(summary_stats)