# Mobi Vancouver Bike Share Data

Download and process complete history (2018-2025, ~7.6M trips).

Works on **Databricks** or locally.

In [None]:
%pip install requests pandas pyarrow beautifulsoup4 openpyxl

In [None]:
import re
from pathlib import Path
import pandas as pd
import requests
from bs4 import BeautifulSoup

## Download Trip Data

In [None]:
# Use mobi module
from mobi import download_all_trip_data, combine_trip_data, save_to_parquet

# Download all files
output_dir = Path("/dbfs/mobi_data")  # Databricks
# output_dir = Path("./data")  # Local

print("Downloading trip data...")
files = download_all_trip_data(output_dir / "raw")
print(f"Downloaded {len(files)} files")

# Process and combine
print("\nProcessing...")
trips_df = combine_trip_data(files)
print(f"Total trips: {len(trips_df):,}")

# Save
save_to_parquet(trips_df, output_dir / "mobi_trips.parquet")

## Download Station Data

In [None]:
from mobi import fetch_station_info_from_gbfs, fetch_station_status_from_gbfs

# Fetch from GBFS API
print("Fetching stations...")
stations = fetch_station_info_from_gbfs()
status = fetch_station_status_from_gbfs()

# Combine
stations = stations.merge(status, on="station_id", how="left")
print(f"Total stations: {len(stations)}")

# Save
stations.to_parquet(output_dir / "mobi_stations.parquet", index=False)
stations.to_csv(output_dir / "mobi_stations.csv", index=False)

## Load and Explore

In [None]:
# Load
trips = pd.read_parquet(output_dir / "mobi_trips.parquet")
stations = pd.read_parquet(output_dir / "mobi_stations.parquet")

print(f"Trips: {len(trips):,}")
print(f"Stations: {len(stations)}")
print(f"Date range: {trips['departure_time'].min()} to {trips['departure_time'].max()}")