In [1]:
pip install pandas numpy shapely pyproj tqdm requests

Note: you may need to restart the kernel to use updated packages.


In [1]:
import io, os, zipfile, requests
from datetime import date
import pandas as pd
import numpy as np
from shapely.geometry import LineString
from shapely.ops import substring
from pyproj import Geod
from tqdm import tqdm

# -----------------------------
# CONFIG — edit these first
# -----------------------------
# Portland (TriMet) static GTFS
GTFS_URL = "https://developer.trimet.org/schedule/gtfs.zip"
GTFS_ZIP = "trimet_gtfs.zip"

# Pick a valid weekday in the feed window
SERVICE_DATE = "2025-09-10"        # YYYY-MM-DD

# Time window to render (None = full day)
TIME_START = "06:00:00"            # e.g., AM peak
TIME_END   = "10:00:00"

# Smoothness (smaller = smoother, larger = faster to compute)
TIME_STEP_SEC   = 5               # 5–10s looks great
TRAIL_MINUTES   = 2                # how long a dot lingers (for subtle trails)

# Filter which modes (None = all). Common GTFS route_type:
# 0 Tram/Streetcar • 1 Subway/Metro • 2 Rail/CR • 3 Bus • 4 Ferry • (often 0/1 used for MAX/Streetcar)
ROUTE_TYPES_INCLUDE = [0,1,2,3]      # streetcar+MAX+bus; add 2 or 4 if present and you want them

# Outputs
OUT_POINTS = "portland_transit_points.csv"   # QGIS points
OUT_FRAMES = "portland_frames.csv"           # one row per frame time (optional for Atlas)
# -----------------------------

ID_DTYPE = {
    "agency_id": str, "route_id": str, "trip_id": str, "service_id": str,
    "shape_id": str, "stop_id": str, "block_id": str
}

def download_gtfs(url, out_path):
    if os.path.exists(out_path):
        print(f"Using existing: {out_path}")
        return
    print(f"Downloading GTFS → {out_path}")
    r = requests.get(url, timeout=90)
    r.raise_for_status()
    with open(out_path, "wb") as f: f.write(r.content)

def _read_csv_from_zip(zf, name, extra_dtype=None, usecols=None):
    dtype = dict(ID_DTYPE)
    if extra_dtype: dtype.update(extra_dtype)
    with zf.open(name) as f:
        return pd.read_csv(f, dtype=dtype, usecols=usecols, low_memory=False)

def hms_to_sec(s: str) -> int:
    h, m, sec = map(int, s.split(":"))
    return h*3600 + m*60 + sec

def to_sec_or_none(hms):
    if hms is None: return None
    return hms_to_sec(hms)

def service_ids_by_date(cal, cald, the_date: date):
    wkcols = ["monday","tuesday","wednesday","thursday","friday","saturday","sunday"]
    wkcol  = wkcols[the_date.weekday()]
    active = set(cal[
        (cal["start_date"] <= pd.Timestamp(the_date)) &
        (cal["end_date"]   >= pd.Timestamp(the_date)) &
        (cal[wkcol] == 1)
    ]["service_id"].astype(str))
    if cald is not None and len(cald):
        sel = cald[cald["date"] == pd.Timestamp(the_date)]
        for _, r in sel.iterrows():
            sid = str(r["service_id"])
            if r["exception_type"] == 1: active.add(sid)     # added
            elif r["exception_type"] == 2: active.discard(sid) # removed
    return active

# 1) Download
download_gtfs(GTFS_URL, GTFS_ZIP)

# 2) Read GTFS (TriMet zip is flat)
with zipfile.ZipFile(GTFS_ZIP) as zf:
    names = set(zf.namelist())
    need = {"routes.txt","trips.txt","stop_times.txt","shapes.txt","calendar.txt","calendar_dates.txt"}
    missing = need - names
    if missing: raise RuntimeError(f"GTFS missing: {missing}")

    routes = _read_csv_from_zip(zf, "routes.txt", extra_dtype={"route_type": "Int64"})
    trips  = _read_csv_from_zip(zf, "trips.txt", usecols=["route_id","trip_id","service_id","shape_id","block_id"])
    st     = _read_csv_from_zip(zf, "stop_times.txt", extra_dtype={
        "arrival_time": str, "departure_time": str, "stop_sequence": "Int64",
        "shape_dist_traveled": "float64"
    })
    shapes = _read_csv_from_zip(zf, "shapes.txt", extra_dtype={
        "shape_pt_lat": "float64", "shape_pt_lon": "float64", "shape_pt_sequence": "Int64"
    })
    cal    = _read_csv_from_zip(zf, "calendar.txt")
    cald   = _read_csv_from_zip(zf, "calendar_dates.txt")

# 3) Calendar
for c in ("start_date","end_date"):
    cal[c] = pd.to_datetime(cal[c], format="%Y%m%d")
cald["date"] = pd.to_datetime(cald["date"], format="%Y%m%d")
svc_date = pd.to_datetime(SERVICE_DATE).date()
active_sids = service_ids_by_date(cal, cald, svc_date)

# 4) Filter routes (optional)
if ROUTE_TYPES_INCLUDE is not None and "route_type" in routes.columns:
    routes = routes[routes["route_type"].isin(ROUTE_TYPES_INCLUDE)]

# 5) Join trips/routes → keep active services
tr = trips.merge(routes[["route_id","route_type","agency_id"]], on="route_id", how="inner")
tr = tr[tr["service_id"].astype(str).isin(active_sids)].copy()

# 6) Stop times
has_sdt = "shape_dist_traveled" in st.columns
keep = ["trip_id","arrival_time","departure_time","stop_sequence"]
if has_sdt: keep.append("shape_dist_traveled")
st = st[keep].dropna(subset=["trip_id"]).copy()
st["stop_sequence"] = st["stop_sequence"].astype("Int64")
st = st.sort_values(["trip_id","stop_sequence"])

# 7) Shapes → LineStrings
shapes = shapes.sort_values(["shape_id","shape_pt_sequence"])
shape_lines = {}
for sid, grp in shapes.groupby("shape_id", sort=False):
    coords = grp[["shape_pt_lon","shape_pt_lat"]].to_numpy()
    shape_lines[str(sid)] = LineString(coords)

# 8) Geodesic shape length (meters)
geod = Geod(ellps="WGS84")
def geod_length_m(line: LineString) -> float:
    lons, lats = line.xy
    if len(lons) < 2: return 0.0
    return sum(geod.line_length([lons[i], lons[i+1]], [lats[i], lats[i+1]]) for i in range(len(lons)-1))
shape_lengths = {sid: geod_length_m(ln) for sid, ln in shape_lines.items()}

# 9) Window seconds
ws_sec = to_sec_or_none(TIME_START)
we_sec = to_sec_or_none(TIME_END)

def interpolate_trip(trip_id: str, shape_id: str, times_df: pd.DataFrame) -> pd.DataFrame:
    sid = str(shape_id)
    if sid not in shape_lines: return pd.DataFrame()
    line = shape_lines[sid]
    if line.length == 0: return pd.DataFrame()

    df = times_df.sort_values("stop_sequence").copy()
    df["t_str"] = df["departure_time"].where(df["departure_time"].notna(), df["arrival_time"])
    df = df[df["t_str"].notna()].copy()
    mask = df["t_str"].astype(str).str.match(r"^\d{1,2}:\d{2}:\d{2}$")
    df = df[mask].copy()
    if df.empty: return pd.DataFrame()
    df["t_sec"] = df["t_str"].astype(str).apply(hms_to_sec)

    # Fractions along shape
    if has_sdt and df.get("shape_dist_traveled") is not None:
        Lm = shape_lengths.get(sid, None)
        if Lm and Lm > 0 and df["shape_dist_traveled"].notna().any():
            df["f"] = df["shape_dist_traveled"].astype(float) / Lm
            df["f"] = df["f"].clip(0, 1)
        else:
            df["f"] = np.linspace(0, 1, len(df))
    else:
        df["f"] = np.linspace(0, 1, len(df))

    # Clamp to trip + window
    df = df.sort_values("t_sec").drop_duplicates(subset=["t_sec"])
    t0, tN = int(df["t_sec"].iloc[0]), int(df["t_sec"].iloc[-1])
    start_sec = max(t0, ws_sec if ws_sec is not None else t0)
    end_sec   = min(tN, we_sec if we_sec is not None else tN)
    if end_sec <= start_sec: return pd.DataFrame()

    T = df["t_sec"].to_numpy()
    F = df["f"].to_numpy()
    ts = np.arange(start_sec, end_sec+1, TIME_STEP_SEC, dtype=int)

    rows = []
    for tt in ts:
        j = np.searchsorted(T, tt) - 1
        if j < 0: j = 0
        if j >= len(T)-1: j = len(T)-2
        t1, t2 = T[j], T[j+1]
        f1, f2 = F[j], F[j+1]
        f = float(f2) if t2==t1 else float(f1 + (f2 - f1) * ((tt - t1) / (t2 - t1)))
        f = max(0.0, min(1.0, f))
        x, y = substring(shape_lines[sid], 0, f, normalized=True).coords[-1]
        rows.append((tt, x, y))
    out = pd.DataFrame(rows, columns=["t_sec","lon","lat"])
    out["trip_id"] = str(trip_id)
    out["shape_id"] = sid
    return out

# 10) Interpolate all trips
meta = tr.set_index("trip_id")[["route_id","route_type","agency_id","shape_id"]]
points = []
for tid, g in tqdm(st.groupby("trip_id"), desc="Interpolating trips"):
    tid = str(tid)
    if tid not in meta.index: continue
    shp = meta.loc[tid, "shape_id"]
    dfp = interpolate_trip(tid, shp, g)
    if len(dfp):
        dfp["route_id"]   = meta.loc[tid, "route_id"]
        dfp["route_type"] = meta.loc[tid, "route_type"]
        dfp["agency_id"]  = meta.loc[tid, "agency_id"]
        points.append(dfp)

if not points:
    raise SystemExit("No points generated. Check SERVICE_DATE/time window.")

pts = pd.concat(points, ignore_index=True)

# 11) Compose absolute times (+ short trail window)
service_day = pd.Timestamp(SERVICE_DATE)
pts["datetime"] = service_day + pd.to_timedelta(pts["t_sec"], unit="s")
pts["end_datetime"] = pts["datetime"] + pd.to_timedelta(TRAIL_MINUTES, unit="m")

# (Optional) drop truly static trips (never move)
static = (pts.groupby("trip_id").agg(lu=("lon","nunique"), uu=("lat","nunique")))
static_ids = static[(static.lu==1) & (static.uu==1)].index
pts = pts[~pts["trip_id"].isin(static_ids)].copy()

# 12) Save QGIS-ready points
pts = pts[["lon","lat","datetime","end_datetime","route_type","agency_id","trip_id","shape_id","route_id"]]
pts = pts.sort_values("datetime")
pts.to_csv(OUT_POINTS, index=False)
print(f"Wrote {len(pts):,} points → {OUT_POINTS}")

# 13) Build Atlas frames (optional)
tmin, tmax = pts["datetime"].min(), pts["datetime"].max()
frames = pd.date_range(tmin, tmax, freq=f"{TIME_STEP_SEC}s")
pd.DataFrame({"frame_time": frames}).to_csv(OUT_FRAMES, index=False)
print(f"Wrote {len(frames):,} frames → {OUT_FRAMES}")


Downloading GTFS → trimet_gtfs.zip


Interpolating trips: 100%|██████████| 55659/55659 [15:52<00:00, 58.45it/s]   


Wrote 880,623 points → portland_transit_points.csv
Wrote 2,881 frames → portland_frames.csv


In [1]:
# save as make_active_series.py (or run in a notebook)
# pip install pandas

import pandas as pd

POINTS_CSV = "portland_transit_points.csv"   # your interpolated output
BIN = "5min"   # match your sampling (5 seconds). Use "1min" if you prefer coarser bars.

df = pd.read_csv(POINTS_CSV, parse_dates=["datetime"])

# Bin to time slices and count distinct active vehicles
df["time_bin"] = df["datetime"].dt.floor(BIN)
active = (df.groupby("time_bin")["trip_id"]
            .nunique()
            .reset_index(name="active_vehicles"))

active.to_csv("portland_active_vehicles.csv", index=False)
print(f"Saved {len(active)} bins → portland_active_vehicles.csv")


Saved 49 bins → portland_active_vehicles.csv


In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

ACTIVE_CSV = "portland_active_vehicles.csv"
OUT_DIR = "chart_frames"
ACCENT = "#00FFFF"   # cyan highlight
BAR_COLOR = "#FFFFFF"
AX_COLOR = "#FFFFFF"

FIG_W, FIG_H = 4, 2   # smaller to save memory
DPI = 100

os.makedirs(OUT_DIR, exist_ok=True)
active = pd.read_csv(ACTIVE_CSV, parse_dates=["time_bin"])

ymax = int(active["active_vehicles"].max() * 1.1)
labels = active["time_bin"].dt.strftime("%H:%M")
tick_every = max(1, len(labels)//6)
xticks_idx = list(range(0, len(labels), tick_every))

for i in tqdm(range(len(active)), desc="Rendering chart frames"):
    fig, ax = plt.subplots(figsize=(FIG_W, FIG_H), dpi=DPI)

    fig.patch.set_alpha(0.0)
    ax.set_facecolor((0, 0, 0, 0))

    ax.bar(range(len(active)), active["active_vehicles"],
           color=BAR_COLOR, width=0.9)
    ax.bar(i, active["active_vehicles"].iloc[i],
           color=ACCENT, width=0.9)

    ax.spines[:].set_color(AX_COLOR)
    ax.tick_params(colors=AX_COLOR, labelsize=8)
    ax.set_ylabel("Vehicles", color=AX_COLOR)
    ax.set_xlabel("Time", color=AX_COLOR)

    ax.set_xticks(xticks_idx)
    ax.set_xticklabels([labels[j] for j in xticks_idx], rotation=0, color=AX_COLOR)

    ax.set_ylim(0, ymax)

    # Save transparent
    plt.savefig(os.path.join(OUT_DIR, f"chart_{i:04d}.png"),
                dpi=DPI, transparent=True)
    plt.close(fig)

print(f"Wrote {len(active)} transparent PNGs → {OUT_DIR}")


Rendering chart frames: 100%|██████████| 49/49 [00:03<00:00, 15.14it/s]

Wrote 49 transparent PNGs → chart_frames





In [3]:
# make_active_series.py
# pip install pandas

import pandas as pd

POINTS_CSV = "portland_transit_points.csv"   # your interpolated output (5s cadence)
MAP_STEP = "5S"   # match QGIS Map time step (5 seconds)

df = pd.read_csv(POINTS_CSV, parse_dates=["datetime"])

# Count distinct active vehicles per MAP_STEP
df["frame_time"] = df["datetime"].dt.floor(MAP_STEP)
series = (df.groupby("frame_time")["trip_id"]
            .nunique()
            .rename("active_vehicles")
            .to_frame()
            .reset_index())

# Smooth line (rolling window). Try 5–15 minutes.
series["smooth"] = (series.set_index("frame_time")["active_vehicles"]
                    .rolling("5min", center=True, min_periods=1).mean()
                    .reset_index(drop=True))

series.to_csv("portland_active_series_5s.csv", index=False)
print(f"Saved series with {len(series)} frames → portland_active_series_5s.csv")


Saved series with 2881 frames → portland_active_series_5s.csv


  df["frame_time"] = df["datetime"].dt.floor(MAP_STEP)


In [5]:
# render_chart_frames_5s.py
# pip install pandas matplotlib tqdm

import os, pandas as pd, matplotlib.pyplot as plt
from tqdm import tqdm

CSV = "portland_active_series_5s.csv"
OUT = "chart_frames_5s"
ACCENT = "#047272"
LINE_COLOR = "#FFFFFF"
AX_COLOR = "#FFFFFF"

FIG_W, FIG_H, DPI = 4, 2, 100
os.makedirs(OUT, exist_ok=True)

s = pd.read_csv(CSV, parse_dates=["frame_time"])
ymax = int(max(s["active_vehicles"].max(), s["smooth"].max()) * 1.1)

# Build sparse x ticks ~6 labels
labels = s["frame_time"].dt.strftime("%H:%M")
tick_every = max(1, len(labels)//6)
xticks_idx = list(range(0, len(labels), tick_every))

for i in tqdm(range(len(s)), desc="Render (5-sec)"):
    fig, ax = plt.subplots(figsize=(FIG_W, FIG_H), dpi=DPI)

    fig.patch.set_alpha(0.0)
    ax.set_facecolor((0,0,0,0))

    # Full smooth curve (white line)
    ax.plot(range(len(s)), s["smooth"], color=LINE_COLOR, linewidth=2)

    # Moving point (cyan)
    ax.scatter(i, s["smooth"].iloc[i], s=25, color=ACCENT, zorder=3)
    
    # Faint area fill under the curve
    ax.fill_between(range(len(s)), 0, s["smooth"], color="#047272", alpha=0.15, linewidth=0)

    # Title
    ax.set_title("Public Transit Vehicles in Service", color=AX_COLOR, fontsize=10, pad=8)
    
    # Axes styling (white)
    for spine in ax.spines.values(): spine.set_color(AX_COLOR)
    ax.tick_params(colors=AX_COLOR, labelsize=8)
    ax.set_ylabel("Transit Vehicles in Service", color=AX_COLOR)
    ax.set_xlabel("Time", color=AX_COLOR)

    ax.set_xticks(xticks_idx)
    ax.set_xticklabels([labels[j] for j in xticks_idx], color=AX_COLOR)

    ax.set_ylim(0, ymax)

    plt.savefig(os.path.join(OUT, f"chart_{i:05d}.png"), transparent=True)
    plt.close(fig)

print(f"Wrote {len(s)} PNGs → {OUT}")


Render (5-sec): 100%|██████████| 2881/2881 [02:53<00:00, 16.62it/s]

Wrote 2881 PNGs → chart_frames_5s





In [6]:
# render_chart_frames_5s.py
# pip install pandas matplotlib tqdm
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

CSV = "portland_active_series_5s.csv"  # from make_active_series.py
OUT = "chart_frames_5s"
ACCENT = "#047272"     # moving point color
LINE_COLOR = "#FFFFFF" # curve + fill color
AX_COLOR = "#FFFFFF"   # axes/ticks/labels/title

# Figure size kept modest to save memory; adjust as needed
FIG_W, FIG_H, DPI = 4, 2, 100

os.makedirs(OUT, exist_ok=True)
s = pd.read_csv(CSV, parse_dates=["frame_time"])

# y-range (pad a bit)
ymax = float(max(s["active_vehicles"].max(), s["smooth"].max()) * 1.1)

# x axis ticks ~6 labels
labels = s["frame_time"].dt.strftime("%H:%M")
tick_every = max(1, len(labels)//6)
xticks_idx = list(range(0, len(labels), tick_every))

# Precompute arrays for speed
x = np.arange(len(s))
y = s["smooth"].to_numpy()

for i in tqdm(range(len(s)), desc="Rendering 5s frames"):
    fig, ax = plt.subplots(figsize=(FIG_W, FIG_H), dpi=DPI)

    # Transparent background
    fig.patch.set_alpha(0.0)
    ax.set_facecolor((0, 0, 0, 0))

    # Full curve (white)
    ax.plot(x, y, color=LINE_COLOR, linewidth=2)

    # Progressive fill up to current frame (soft)
    ax.fill_between(x[:i+1], 0, y[:i+1], color=LINE_COLOR, alpha=0.18, linewidth=0)

    # Optional: brighter short cap near the head (fade look)
    N = 8  # last N frames (~40s if 5s steps)
    j0 = max(0, i - N + 1)
    ax.fill_between(x[j0:i+1], 0, y[j0:i+1], color=LINE_COLOR, alpha=0.28, linewidth=0)

    # Moving point (cyan)
    ax.scatter(i, y[i], s=22, color=ACCENT, zorder=3)

    # Axes / labels / title (white)
    for spine in ax.spines.values():
        spine.set_color(AX_COLOR)
    ax.tick_params(colors=AX_COLOR, labelsize=8)
    ax.set_ylabel("Public Transit Vehicle Count", color=AX_COLOR, fontsize=8)
    ax.set_xlabel("Time", color=AX_COLOR)
    ax.set_title("Public Transit Vehicles in Service", color=AX_COLOR, fontsize=10, pad=8)

    # X ticks (sparse, readable)
    ax.set_xticks(xticks_idx)
    ax.set_xticklabels([labels[j] for j in xticks_idx], color=AX_COLOR)

    ax.set_ylim(0, ymax)

    # Save transparent PNG
    out_path = os.path.join(OUT, f"chart_{i:05d}.png")
    plt.savefig(out_path, transparent=True)
    plt.close(fig)

print(f"Wrote {len(s)} PNGs → {OUT}")


Rendering 5s frames: 100%|██████████| 2881/2881 [02:58<00:00, 16.18it/s]

Wrote 2881 PNGs → chart_frames_5s





In [7]:
import pandas as pd

POINTS_CSV = "portland_transit_points.csv"   # 5s interpolated points
FRAME = "5S"
SMOOTH_WIN = "10min"                          # ← 10-minute window

MODE_MAP = {0:"MAX", 1:"MAX", 2:"Commuter Rail", 3:"Bus"}

df = pd.read_csv(POINTS_CSV, parse_dates=["datetime"])
df["mode"] = df["route_type"].map(MODE_MAP)
df = df[df["mode"].notna()].copy()
df["frame_time"] = df["datetime"].dt.floor(FRAME)

# counts per frame & mode
wide = (df.groupby(["frame_time","mode"])["trip_id"]
          .nunique().unstack("mode").fillna(0).sort_index())

for col in ["MAX","Bus","Commuter Rail"]:
    if col not in wide.columns: wide[col] = 0

# total across modes
wide["Total"] = wide[["MAX","Bus","Commuter Rail"]].sum(axis=1)

# smooth with 10-min rolling window
smooth = wide.rolling(SMOOTH_WIN, min_periods=1).mean()
smooth.columns = [f"smooth_{c}" for c in smooth.columns]

out = pd.concat([wide, smooth], axis=1).reset_index()
out.to_csv("portland_active_modes_5s.csv", index=False)
print("Saved → portland_active_modes_5s.csv")


Saved → portland_active_modes_5s.csv


  df["frame_time"] = df["datetime"].dt.floor(FRAME)


In [8]:
import os, numpy as np, pandas as pd, matplotlib.pyplot as plt
from tqdm import tqdm

CSV   = "portland_active_modes_5s.csv"
OUT   = "chart_frames_modes_5s"
AX_C  = "#FFFFFF"
COLS  = {"MAX":"#46a6d7", "Bus":"#e6397e", "Commuter Rail":"#fff6a2"}  # per-mode colors
FIG_W, FIG_H, DPI = 4, 2, 100

os.makedirs(OUT, exist_ok=True)
s = pd.read_csv(CSV, parse_dates=["frame_time"]).reset_index(drop=True)

x = np.arange(len(s))
ys = {m: s[f"smooth_{m}"].to_numpy() for m in ["MAX","Bus","Commuter Rail","Total"]}
ymax = float(max(ys[m].max() for m in ys) * 1.1)

labels = s["frame_time"].dt.strftime("%H:%M")
tick_every = max(1, len(labels)//6)
xticks_idx = list(range(0, len(labels), tick_every))

for i in tqdm(range(len(s)), desc="Render 5s multi-mode + total"):
    fig, ax = plt.subplots(figsize=(FIG_W, FIG_H), dpi=DPI)
    fig.patch.set_alpha(0.0)
    ax.set_facecolor((0,0,0,0))

    # --- TOTAL as thin white background curve + subtle progressive fill ---
    ytot = ys["Total"]
    ax.fill_between(x[:i+1], 0, ytot[:i+1], color="#FFFFFF", alpha=0.06, linewidth=0)
    ax.plot(x, ytot, color="#FFFFFF", linewidth=1.5, alpha=0.9, zorder=1)

    # --- Per-mode curves, fills, and moving points (on top) ---
    for mode in ["MAX","Bus","Commuter Rail"]:
        y = ys[mode]
        color = COLS[mode]
        ax.fill_between(x[:i+1], 0, y[:i+1], color=color, alpha=0.12, linewidth=0)
        j0 = max(0, i-8)  # brighter head cap (~40s)
        ax.fill_between(x[j0:i+1], 0, y[j0:i+1], color=color, alpha=0.22, linewidth=0)
        ax.plot(x, y, color=color, linewidth=2, zorder=2)
        ax.scatter(i, y[i], s=22, color=color, zorder=3)

    # Axes / labels / title
    for sp in ax.spines.values(): sp.set_color(AX_C)
    ax.tick_params(colors=AX_C, labelsize=8)
    ax.set_ylabel("Public Transit Vehicle Count", color=AX_C)
    ax.set_xlabel("Time", color=AX_C)
    ax.set_title("Transit Vehicles in Service by Mode", color=AX_C, fontsize=10, pad=8)

    ax.set_xticks(xticks_idx)
    ax.set_xticklabels([labels[j] for j in xticks_idx], color=AX_C)
    ax.set_ylim(0, ymax)

    # Legend (includes Total)
    handles = [
        plt.Line2D([0],[0], color="#FFFFFF", lw=1.5, label="Total"),
        plt.Line2D([0],[0], color=COLS["MAX"], lw=2, label="MAX"),
        plt.Line2D([0],[0], color=COLS["Bus"], lw=2, label="Bus"),
        plt.Line2D([0],[0], color=COLS["Commuter Rail"], lw=2, label="Commuter Rail"),
    ]
    ax.legend(handles=handles, loc="upper left", frameon=False, fontsize=8, labelcolor=AX_C)

    plt.savefig(os.path.join(OUT, f"chart_{i:05d}.png"), transparent=True)
    plt.close(fig)

print(f"Wrote {len(s)} PNGs → {OUT}")


Render 5s multi-mode + total: 100%|██████████| 2881/2881 [05:02<00:00,  9.52it/s]

Wrote 2881 PNGs → chart_frames_modes_5s





Series by MAX line (Blue/Green/Orange/Red/Yellow) + Bus + Commuter Rail

This builds a new per-frame time series straight from your points CSV and TriMet GTFS zip to identify each MAX line by route_short_name.

In [12]:
import zipfile, pandas as pd

POINTS_CSV = "portland_transit_points.csv"
GTFS_ZIP   = "trimet_gtfs.zip"

pts = pd.read_csv(POINTS_CSV, parse_dates=["datetime"])
if "trip_id" in pts: pts["trip_id"] = pts["trip_id"].astype(str)
if "route_id" in pts: pts["route_id"] = pts["route_id"].astype(str)
pts["frame_time"] = pts["datetime"].dt.floor("5S")

with zipfile.ZipFile(GTFS_ZIP) as zf:
    routes = pd.read_csv(
        zf.open("routes.txt"),
        dtype={"route_id":str,"agency_id":str,"route_type":"Int64",
               "route_short_name":str,"route_long_name":str}
    )[["route_id","agency_id","route_type","route_short_name","route_long_name"]]
    if "route_id" not in pts.columns or pts["route_id"].isna().all():
        trips = pd.read_csv(zf.open("trips.txt"), dtype={"trip_id":str,"route_id":str})[["trip_id","route_id"]]
        pts = pts.merge(trips, on="trip_id", how="left")

pts["route_id"] = pts["route_id"].astype(str)
pts = pts.merge(routes, on="route_id", how="left")

import pandas as pd

# --- normalize merged columns ---
pts["route_type"] = pts.get("route_type_y")
pts.loc[pts["route_type"].isna(), "route_type"] = pts.get("route_type_x")

pts["agency_id"] = pts.get("agency_id_y")
pts.loc[pts["agency_id"].isna(), "agency_id"] = pts.get("agency_id_x")

# helpers
def as_lower(v):
    if pd.isna(v): return ""
    return str(v).lower()

def classify(row):
    ag  = (row.get("agency_id") or "").upper()
    rt  = row.get("route_type")
    rln = as_lower(row.get("route_long_name"))
    rsn = as_lower(row.get("route_short_name"))

    # robust text fallbacks (work even if rt/agency missing)
    if "wes" in rln or "commuter" in rln:
        return "Commuter Rail"
    if "streetcar" in rln or ag == "PSC":
        if "ns" in rln:      return "Streetcar NS"
        if "a loop" in rln:  return "Streetcar A"
        if "b loop" in rln:  return "Streetcar B"
        return "Streetcar"

    # if we can read route_type/agency, use them
    if not pd.isna(rt):
        try:
            rt = int(rt)
        except Exception:
            rt = None

    # TriMet MAX (light rail/tram)
    if ag == "TRIMET" and rt in (0, 1):
        name = rln or rsn
        if "max" in name:
            if   "blue"   in name: return "MAX Blue"
            elif "green"  in name: return "MAX Green"
            elif "orange" in name: return "MAX Orange"
            elif "red"    in name: return "MAX Red"
            elif "yellow" in name: return "MAX Yellow"
            else: return "MAX Other"

    # Bus (TriMet)
    if ag == "TRIMET" and rt == 3:
        return "Bus"

    # Last-chance text match for MAX if agency/rt missing but name is clear
    if "max" in rln:
        if   "blue"   in rln: return "MAX Blue"
        elif "green"  in rln: return "MAX Green"
        elif "orange" in rln: return "MAX Orange"
        elif "red"    in rln: return "MAX Red"
        elif "yellow" in rln: return "MAX Yellow"
        else: return "MAX Other"

    return None

pts["series"] = pts.apply(classify, axis=1)

# quick check before exporting
print("series counts:\n", pts["series"].value_counts(dropna=False).head(20))

# --- export QGIS-ready points with series ---
qgis_cols = ["lon","lat","datetime","trip_id","shape_id","route_id","series"]
missing = [c for c in qgis_cols if c not in pts.columns]
if missing:
    raise RuntimeError(f"Missing required columns: {missing}")

pts_qgis = pts[pts["series"].notna()][qgis_cols].copy()
pts_qgis.to_csv("portland_points_modes.csv", index=False)
print("Saved → portland_points_modes.csv rows:", len(pts_qgis))
print("Unique series:", sorted(pts_qgis["series"].unique()))


  pts["frame_time"] = pts["datetime"].dt.floor("5S")


series counts:
 series
Bus              748952
MAX Blue          36523
MAX Red           24809
MAX Green         17408
Streetcar NS      13380
MAX Orange        10887
MAX Yellow        10591
Streetcar B        7657
Streetcar A        7491
Commuter Rail      2925
Name: count, dtype: int64
Saved → portland_points_modes.csv rows: 880623
Unique series: ['Bus', 'Commuter Rail', 'MAX Blue', 'MAX Green', 'MAX Orange', 'MAX Red', 'MAX Yellow', 'Streetcar A', 'Streetcar B', 'Streetcar NS']


In [14]:
# trim_points_modes_6to9.py
# Input:  portland_points_modes.csv  (columns: lon, lat, datetime, trip_id, shape_id, route_id, series)
# Output: portland_points_modes_6to9.csv

import pandas as pd

IN_CSV  = "portland_points_modes.csv"
OUT_CSV = "portland_points_modes_6to9.csv"

df = pd.read_csv(IN_CSV, parse_dates=["datetime"])

# Infer the date from the data (uses the first row’s date)
date0 = df["datetime"].dt.normalize().iloc[0]

start = date0 + pd.Timedelta(hours=6)  # 06:00
end   = date0 + pd.Timedelta(hours=9)  # 09:00

mask = (df["datetime"] >= start) & (df["datetime"] <= end)
out = df.loc[mask].copy()

out.to_csv(OUT_CSV, index=False)
print(f"Trimmed to {start} → {end}  |  rows: {len(out)}  → {OUT_CSV}")


Trimmed to 2025-09-10 06:00:00 → 2025-09-10 09:00:00  |  rows: 648724  → portland_points_modes_6to9.csv


In [17]:
import pandas as pd

IN = "portland_points_modes_6to9.csv"
OUT = "portland_points_modegroups_6to9.csv"

df = pd.read_csv(IN, parse_dates=["datetime"])

def group_mode(s):
    s = str(s).lower()
    if s.startswith("max"): return "MAX"
    if "streetcar" in s: return "Streetcar"
    if "commuter" in s: return "Commuter Rail"
    if s == "bus": return "Bus"
    return None

df["mode_group"] = df["series"].map(group_mode)
df = df[df["mode_group"].notna()]

df.to_csv(OUT, index=False)
print("Saved →", OUT, "rows:", len(df))
print("Mode counts:", df["mode_group"].value_counts())


Saved → portland_points_modegroups_6to9.csv rows: 648724
Mode counts: mode_group
Bus              551416
MAX               74294
Streetcar         20414
Commuter Rail      2600
Name: count, dtype: int64


In [18]:
# make_active_modegroups_10s_6to9.py
import pandas as pd

IN  = "portland_points_modegroups_6to9.csv"
OUT = "portland_active_modegroups_10s_6to9.csv"

FRAME = "10S"
SMOOTH = "10min"
SERIES = ["MAX","Streetcar","Commuter Rail","Bus"]

df = pd.read_csv(IN, parse_dates=["datetime"])
df["frame_time"] = df["datetime"].dt.floor(FRAME)

g = (df.groupby(["frame_time","mode_group"])["trip_id"]
       .nunique()
       .unstack("mode_group")
       .reindex(columns=SERIES, fill_value=0)
       .fillna(0))

smooth = g.rolling(SMOOTH, min_periods=1).mean()
smooth.columns = [f"smooth_{c}" for c in smooth.columns]

out = pd.concat([g, smooth], axis=1).reset_index()
out.to_csv(OUT, index=False)
print("Wrote:", OUT, "frames:", len(out))


Wrote: portland_active_modegroups_10s_6to9.csv frames: 1081


  df["frame_time"] = df["datetime"].dt.floor(FRAME)


In [20]:
# render_chart_frames_modegroups.py
import os, numpy as np, pandas as pd, matplotlib.pyplot as plt
from tqdm import tqdm

CSV = "portland_active_modegroups_10s_6to9.csv"
OUT = "chart_frames_modegroups"

COLS = {
    "MAX": "#1E88E5",          # Blue
    "Bus": "#FF00FF",          # Magenta
    "Streetcar": "#FFA500",    # Orange
    "Commuter Rail": "#00FFFF" # Cyan
}

AX_C = "#FFFFFF"
FIG_W, FIG_H, DPI = 4, 2, 100

os.makedirs(OUT, exist_ok=True)
s = pd.read_csv(CSV, parse_dates=["frame_time"]).reset_index(drop=True)

series_order = [k for k in COLS.keys() if f"smooth_{k}" in s.columns]
x = np.arange(len(s))
ys = {name: s[f"smooth_{name}"].to_numpy() for name in series_order}
ymax = float(max((ys[name].max() for name in ys), default=1) * 1.1)

# Hour ticks
tick_idx = s.index[(s["frame_time"].dt.minute.eq(0)) & (s["frame_time"].dt.second.eq(0))].tolist()
tick_lbl = s.loc[tick_idx, "frame_time"].dt.strftime("%H:%M").tolist()

for i in tqdm(range(len(s))):
    fig, ax = plt.subplots(figsize=(FIG_W, FIG_H), dpi=DPI)
    fig.patch.set_alpha(0.0)
    ax.set_facecolor((0,0,0,0))

    for name in series_order:
        y = ys[name]; color = COLS[name]
        ax.fill_between(x[:i+1], 0, y[:i+1], color=color, alpha=0.12, linewidth=0, zorder=1)
        j0 = max(0, i-8)
        ax.fill_between(x[j0:i+1], 0, y[j0:i+1], color=color, alpha=0.28, linewidth=0, zorder=2)
        
        ax.plot(x[:i+1], y[:i+1], color=color, lw=2, zorder=3)
        ax.scatter(i, y[i], s=22, color=color, zorder=4)

    for sp in ax.spines.values(): sp.set_color(AX_C)
    ax.tick_params(colors=AX_C, labelsize=8)
    ax.set_ylabel("Transit Vehicles in Service", color=AX_C)
    ax.set_xlabel("Time", color=AX_C)
    ax.set_title("Transit Vehicles by Mode (06–09)", color=AX_C, fontsize=10, pad=8)
    ax.set_xticks(tick_idx); ax.set_xticklabels(tick_lbl, color=AX_C)
    ax.set_ylim(0, ymax)

    handles = [plt.Line2D([0],[0], color=COLS[k], lw=2, label=k) for k in series_order]
    ax.legend(handles=handles, loc="upper left", frameon=False, fontsize=8, labelcolor=AX_C)

    plt.savefig(os.path.join(OUT, f"chart_{i:05d}.png"), transparent=True)
    plt.close(fig)

print(f"Wrote {len(s)} frames → {OUT}")


100%|██████████| 1081/1081 [01:35<00:00, 11.26it/s]

Wrote 1081 frames → chart_frames_modegroups



