# GTFS Analytics

This notebook is for analyzing the GTFS data and creating visualizations with Plotly.

In [8]:
import pandas as pd
import plotly.express as px
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client(project='regal-dynamo-470908-v9')

sql = """
    WITH route_counts AS (
        SELECT
            route_short_name,
            COUNT(*) as num_days
        FROM `auckland_data_dev.agg_position_route_day`
        GROUP BY route_short_name
        ORDER BY num_days DESC
        LIMIT 10
    )
    SELECT
        t1.*
    FROM
        `auckland_data_dev.agg_position_route_day` AS t1
    JOIN
        route_counts ON t1.route_short_name = route_counts.route_short_name
    ORDER BY
        t1.service_date DESC,
        t1.route_short_name
"""

df = client.query(sql).to_dataframe()

df

E0000 00:00:1762246629.636347 8140867 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


Unnamed: 0,service_date,route_id,route_short_name,route_long_name,route_mode,avg_speed_kmh,avg_update_interval_seconds,position_count,unmonitored_movement_count,unmonitored_movement_seconds,unmonitored_movement_distance_m
0,2025-11-04,321-202,321,321,bus,19.246654,8.553602,33549,2,277,1097.133598
1,2025-11-04,363-203,363,363,bus,22.087244,9.984774,5135,0,0,0.0
2,2025-11-04,38-203,38,38,bus,21.815772,7.95986,48934,0,0,0.0
3,2025-11-04,502-217,502,502,bus,22.443187,10.343879,9540,19,3030,11616.575711
4,2025-11-04,755-202,755,755,bus,20.213483,8.574266,19758,0,0,0.0
5,2025-11-04,814-202,814,814,bus,20.00378,8.682256,24341,0,0,0.0
6,2025-11-04,845-203,845,845,bus,18.593223,9.549975,6463,0,0,0.0
7,2025-11-04,966-203,966,966,bus,21.036315,8.301715,21864,2,312,2056.455115
8,2025-11-04,998-203,998,998,bus,46.368207,8.698378,5171,6,934,8238.368705


In [9]:
fig = px.bar(df, x='route_short_name', y='avg_speed_kmh', title='Average Speed by Route')
fig.show()

In [10]:
df['unmonitored_movement_percentage'] = (df['unmonitored_movement_count'] / df['position_count']) * 100

fig = px.scatter(
    df,
    x='avg_speed_kmh',
    y='unmonitored_movement_percentage',
    hover_data=['route_short_name', 'service_date'],
    title='Unmonitored Movement vs. Average Speed'
)
fig.show()

In [11]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Get the first route from the dataframe to analyze
route_to_analyze = df['route_short_name'].unique()[0]
df_route = df[df['route_short_name'] == route_to_analyze]

print(f"Data for route: {route_to_analyze}")
display(df_route)

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=df_route['service_date'], y=df_route['avg_speed_kmh'], name="Avg Speed (km/h)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df_route['service_date'], y=df_route['unmonitored_movement_percentage'], name="Unmonitored Movement %"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text=f"Daily Metrics for Route {route_to_analyze}"
)

# Set x-axis title
fig.update_xaxes(title_text="Service Date")

# Set y-axes titles
fig.update_yaxes(title_text="Avg Speed (km/h)", secondary_y=False)
fig.update_yaxes(title_text="Unmonitored Movement %", secondary_y=True)

fig.show()

Data for route: 321


Unnamed: 0,service_date,route_id,route_short_name,route_long_name,route_mode,avg_speed_kmh,avg_update_interval_seconds,position_count,unmonitored_movement_count,unmonitored_movement_seconds,unmonitored_movement_distance_m,unmonitored_movement_percentage
0,2025-11-04,321-202,321,321,bus,19.246654,8.553602,33549,2,277,1097.133598,0.005961
