# Railway Seat Occupancy Analysis

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
# Load Data
df = pd.read_csv('../data/railway_booking_data.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.head()

In [None]:
# Basic Stats
print(df.info())
print(df['Status'].value_counts(normalize=True))

In [None]:
# Calculate Occupancy
# Group by Train, Date, Coach, Class
occupancy = df.groupby(['Train_ID', 'Date', 'Coach', 'Class'])['Status'].apply(lambda x: (x == 'Booked').mean()).reset_index(name='Occupancy_Rate')
occupancy.head()

In [None]:
# Heatmap: Occupancy by Coach
plt.figure(figsize=(15, 8))
pivot_table = occupancy.pivot_table(index='Coach', columns='Date', values='Occupancy_Rate', aggfunc='mean')
sns.heatmap(pivot_table, cmap='YlOrRd', cbar_kws={'label': 'Occupancy Rate'})
plt.title('Average Seat Occupancy by Coach Over Time')
plt.show()

In [None]:
# Occupancy by Class
plt.figure(figsize=(10, 6))
sns.boxplot(x='Class', y='Occupancy_Rate', data=occupancy)
plt.title('Occupancy Distribution by Class')
plt.show()

In [None]:
# Create Day of Week column
occupancy['Day_of_Week'] = occupancy['Date'].dt.day_name()
order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

plt.figure(figsize=(12, 6))
sns.barplot(x='Day_of_Week', y='Occupancy_Rate', data=occupancy, order=order, ci=None)
plt.title('Average Occupancy by Day of Week')
plt.show()