#**Mapping the Pulse of a City: Traffic & Transit Trends Using Python**

---



#**1. Installing Required Libraries and Packages**

In [None]:
#Installing required libraries
!pip install geopandas folium plotly scikit-learn osmnx pyarrow fastparquet streamlit-folium

#**2. Mounting Drive**

In [None]:
from google.colab import drive
import os

# Mounting Google Drive
drive.mount('/content/drive', force_remount=True)

# Defining project path inside Drive
project_path = "/content/drive/MyDrive/urban_mobility/"

# Creating folders
os.makedirs(project_path + "data/raw", exist_ok=True)
os.makedirs(project_path + "data/processed", exist_ok=True)
os.makedirs(project_path + "outputs/maps", exist_ok=True)
os.makedirs(project_path + "outputs/plots", exist_ok=True)

print("Project folders created at:", project_path)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Defining project path inside Google Drive
project_path = "/content/drive/MyDrive/urban_mobility"



---



#**3. Loading Datasets - Metro Station + Metro Daily Ridership**

In [None]:
#Now loading the dataset for Metro Stations
import geopandas as gpd

stations_path = "/content/drive/MyDrive/bengaluru-metro-stations.kml"
stations = gpd.read_file(stations_path, driver='KML')

print("Metro stations loaded successfully.")
print(stations.head())

In [None]:
#Now loading the dataset for Metro Daily Ridership
import pandas as pd

ridership_path = "/content/drive/MyDrive/NammaMetro_Ridership_Dataset.csv"
ridership = pd.read_csv(ridership_path, parse_dates=['Record Date'])

print("Daily ridership data loaded successfully.")
print(ridership.head())
print("\nColumns:", ridership.columns)
print("\nData types:\n", ridership.dtypes)



---



#**4. Datasets Cleaning**

In [None]:
#Metro Stations Dataset

# Checking basic info
print(stations.info())
print(stations.head())

# Renaming columns for clarity
stations = stations.rename(columns={"Name": "station_name"})

# Removing duplicates or missing values
stations = stations.drop_duplicates(subset="station_name")
stations = stations.dropna()

print("Cleaned Metro Stations")
print(stations.head())

In [None]:
#Metro Daily Ridership

# Checking structure
print(ridership.info())
print(ridership.head())

# Ensuring 'Record Date' column is datetime
ridership['Record Date'] = pd.to_datetime(ridership['Record Date'], errors='coerce')

# Dropping nulls in 'Record Date' if any coercion failed
ridership = ridership.dropna(subset=['Record Date'])

# Sorting by date
ridership = ridership.sort_values('Record Date')

# Resetting index
ridership = ridership.reset_index(drop=True)

print("Cleaned Daily Ridership")
print(ridership.head())



---



#A small visual representation

In [None]:
#Ridership Trends

import matplotlib.pyplot as plt

# Calculating total ridership by summing relevant columns
ridership['total_ridership'] = ridership['Total Smart Cards'] + ridership['Total Tokens'] + ridership['Total QR']

plt.figure(figsize=(12,6))
plt.plot(ridership['Record Date'], ridership['total_ridership'], color='olive')
plt.title("Bengaluru Metro - Daily Ridership Over Time")
plt.xlabel("Date")
plt.ylabel("Total Passengers")
plt.grid(True)
plt.show()

In [None]:
#Summary
print("Ridership Summary Stats")
print(ridership['total_ridership'].describe())



---



#**5. Doing Exploratory Data Analysis(EDA) for Bengaluru Metro**

#Exploring Ridership Patterns

In [None]:
#1. Monthly and Weekly Averages

# Adding month and weekday columns
ridership['month'] = ridership['Record Date'].dt.to_period('M')
ridership['weekday'] = ridership['Record Date'].dt.day_name()

# Monthly trend
monthly = ridership.groupby('month')['total_ridership'].mean().reset_index()

plt.figure(figsize=(12,6))
plt.plot(monthly['month'].astype(str), monthly['total_ridership'], marker='o', color='green')
plt.title("Average Monthly Ridership - Bengaluru Metro")
plt.xticks(rotation=45)
plt.ylabel("Passengers")
plt.grid(True)
plt.show()

# Weekday trend
weekday = ridership.groupby('weekday')['total_ridership'].mean().reindex(
    ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]
)

plt.figure(figsize=(10,5))
weekday.plot(kind='bar', color='olive')
plt.title("Average Ridership by Weekday")
plt.ylabel("Passengers")
plt.show()

#Exploring Metro Stations Dataset

In [None]:
#1.Plotting all Metro Stations on a map

import folium

# Getting center of Bengaluru
bengaluru_center = [12.9716, 77.5946]

# Creating map
m = folium.Map(location=bengaluru_center, zoom_start=12)

# Adding station markers
for _, row in stations.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['station_name'],
        icon=folium.Icon(color='blue', icon='train', prefix='fa')
    ).add_to(m)

m



---



#**6. Advanced Analysis**

#A. Clustering Metro Stations(K-Means)

In [None]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Extracting the coordinates
coords = stations['geometry'].apply(lambda x: (x.y, x.x)).tolist()
X = pd.DataFrame(coords, columns=['lat','lon'])

# Running K-Means ( 3 clusters)
kmeans = KMeans(n_clusters=3, random_state=42)
X['cluster'] = kmeans.fit_predict(X[['lat','lon']])

# Adding cluster back to stations
stations['cluster'] = X['cluster']

print(stations.head())

# Plotting clusters
plt.figure(figsize=(8,6))
plt.scatter(X['lon'], X['lat'], c=X['cluster'], cmap='tab10', s=80)
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("Station Clusters - Bengaluru Metro")
plt.show()

#B. Forecasting Daily Riderships(Time Series)

In [None]:
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

# Use only total_ridership series
ts = ridership.set_index('Record Date')['total_ridership']

# Train-test split
train = ts[:-30]
test = ts[-30:]

# Fit ARIMA model
model = ARIMA(train, order=(5,1,0))
fit = model.fit()

# Forecast next 30 days
forecast = fit.forecast(steps=30)

plt.figure(figsize=(12,6))
plt.plot(train.index, train, label="Train")
plt.plot(test.index, test, label="Test", color='red')
plt.plot(forecast.index, forecast, label="Forecast", color='olive')
plt.legend()
plt.title("Daily Ridership Forecast - Bengaluru Metro")
plt.show()

#C. Heatmap of Metro Stations

In [None]:
from folium.plugins import HeatMap

# Prepare heatmap data
heat_data = [[row.geometry.y, row.geometry.x] for idx, row in stations.iterrows()]

m_heat = folium.Map(location=bengaluru_center, zoom_start=12)
HeatMap(heat_data).add_to(m_heat)

m_heat



---



#**7. Dashboard**

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Loading & cleaning data
ridership = pd.read_csv("/content/drive/MyDrive/NammaMetro_Ridership_Dataset.csv")

# Standardizing column names
ridership.columns = ridership.columns.str.strip().str.lower().str.replace(" ", "_")

# Converting 'record_date' column to datetime
ridership['record_date'] = pd.to_datetime(ridership['record_date'], dayfirst=True, errors='coerce')

# List of ticket columns
ticket_cols = [
    "total_smart_cards", "stored_value_card", "one_day_pass", "three_day_pass",
    "five_day_pass", "total_tokens", "total_ncmc", "group_ticket",
    "total_qr", "qr_nammametro", "qr_whatsapp", "qr_paytm"
]

# Total ridership
ridership['total_ridership'] = ridership[ticket_cols].sum(axis=1)

# Additional breakdowns
ridership['month'] = ridership['record_date'].dt.to_period("M")
ridership['weekday'] = ridership['record_date'].dt.day_name()
ridership['week'] = ridership['record_date'].dt.to_period("W")

# Summary KPIs
busiest_day = ridership.loc[ridership["total_ridership"].idxmax(), "record_date"].strftime("%d-%m-%Y")
max_ridership = ridership["total_ridership"].max()
popular_ticket = ridership[ticket_cols].sum().idxmax()

# Visualizations

# 1. Daily Ridership
fig_daily = px.line(ridership, x="record_date", y="total_ridership",
                    title="Daily Metro Ridership", labels={"total_ridership":"Passengers"},
                    template="plotly_white")

# 2. Monthly Ridership
monthly = ridership.groupby("month")["total_ridership"].sum().reset_index()
monthly["month"] = monthly["month"].astype(str)
fig_monthly = px.line(monthly, x="month", y="total_ridership",
                      title="Monthly Ridership", labels={"total_ridership":"Passengers"},
                      template="plotly_white")

# 3. Weekly Ridership
weekly = ridership.groupby("week")["total_ridership"].sum().reset_index()
weekly["week"] = weekly["week"].astype(str)
fig_weekly = px.line(weekly, x="week", y="total_ridership",
                     title="Weekly Ridership", labels={"total_ridership":"Passengers"},
                     template="plotly_white")

# 4. Weekday Average Ridership
weekday_avg = ridership.groupby("weekday")["total_ridership"].mean().reindex(
    ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]
)
fig_weekday = px.bar(x=weekday_avg.index, y=weekday_avg.values,
                     title="Average Ridership by Weekday",
                     labels={"x":"Weekday","y":"Avg Passengers"},
                     template="plotly_white")

# 5. Ticket Type Distribution
ticket_sum = ridership[ticket_cols].sum().sort_values(ascending=False)
fig_ticket = px.pie(names=ticket_sum.index, values=ticket_sum.values,
                    title="Ridership by Ticket Type", template="plotly_white")

# 6. Detailed Heatmap (Day vs Weekday)
heatmap_df = ridership.groupby(['record_date','weekday'])['total_ridership'].sum().reset_index()
fig_heatmap = px.density_heatmap(heatmap_df, x='record_date', y='weekday', z='total_ridership',
                                 title="Detailed Ridership Heatmap",
                                 template="plotly_white", nbinsx=30, nbinsy=7,
                                 color_continuous_scale="Viridis")


# Dashboard Layout (Card-style)

dashboard = make_subplots(
    rows=3, cols=3,
    subplot_titles=("Daily Ridership","Monthly Ridership","Weekly Ridership",
                    "Weekday Avg","Ticket Type Distribution","Heatmap",
                    "Max Ridership","Busiest Day","Popular Ticket"),
    specs=[[{"type":"xy"},{"type":"xy"},{"type":"xy"}],
           [{"type":"xy"},{"type":"domain"},{"type":"xy"}],
           [{"type":"indicator"},{"type":"indicator"},{"type":"indicator"}]],
    horizontal_spacing=0.08,
    vertical_spacing=0.12
)

# Adding charts
for trace in fig_daily.data: dashboard.add_trace(trace, row=1, col=1)
for trace in fig_monthly.data: dashboard.add_trace(trace, row=1, col=2)
for trace in fig_weekly.data: dashboard.add_trace(trace, row=1, col=3)
for trace in fig_weekday.data: dashboard.add_trace(trace, row=2, col=1)
for trace in fig_ticket.data: dashboard.add_trace(trace, row=2, col=2)
for trace in fig_heatmap.data: dashboard.add_trace(trace, row=2, col=3)

# Adding KPI cards
dashboard.add_trace(go.Indicator(mode="number+delta", value=max_ridership,
                                 title={"text":"Max Ridership"}, delta={"reference":0}),
                    row=3, col=1)
dashboard.add_trace(go.Indicator(mode="number", value=pd.to_datetime(busiest_day).day,
                                 title={"text":f"Busiest Day ({busiest_day})"},
                                 number={"valueformat":".0f"}),
                    row=3, col=2)
dashboard.add_trace(go.Indicator(mode="number", value=ticket_sum[popular_ticket],
                                 title={"text":f"Popular Ticket: {popular_ticket}"}),
                    row=3, col=3)

# Layout updates

dashboard.update_layout(
    template="plotly_white",
    title="<b><span style='font-size:28px'>Bengaluru Metro Ridership Dashboard</span></b>",
    title_x=0.5,
    showlegend=True,
    height=1000, width=1500
)

dashboard.show()

$$..........................Thank You..........................$$