# Mobi Vancouver GBFS Tools (Unity Catalog)

Use these simple, copy/pasteable functions to explore Mobi Vancouver trip history (bronze tables)
and live station availability (GBFS) from SQL. Perfect for a hackathon/tutorial.

Steps:
1) Setup
2) Create Tools (SQL and Python functions)
3) Try Queries

Prereq: Run `01_data.ipynb` first to create `bronze_trips` and `bronze_stations` in your configured catalog/schema.

In [0]:
# Setup: minimal deps + add src to sys.path
%pip install -q mlflow requests

import sys
from pathlib import Path
src_path = Path.cwd() / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))


In [0]:
import mlflow
config = mlflow.models.ModelConfig(development_config='config.yaml')

dbutils.widgets.text('catalog', config.get('catalog') or 'hive_metastore')
dbutils.widgets.text('schema',  config.get('schema')  or 'default')

catalog = dbutils.widgets.get('catalog')
schema = dbutils.widgets.get('schema')
print(f"Using catalog.schema: {catalog}.{schema}")

In [0]:
%sql
SELECT station_id, name, lat, lon
FROM `${catalog}`.`${schema}`.`bronze_stations`
LIMIT 10


In [0]:
%sql
CREATE OR REPLACE FUNCTION `${catalog}`.`${schema}`.recent_trips_by_station(
  station_id STRING
)
RETURNS TABLE (
  departure_time TIMESTAMP,
  departure_station_id STRING,
  return_time TIMESTAMP,
  return_station_id STRING,
  duration_sec INT
)
RETURN
SELECT
  departure_time,
  CAST(departure_station_id AS STRING) AS departure_station_id,
  return_time,
  CAST(return_station_id AS STRING) AS return_station_id,
  CAST(duration_sec AS INT) AS duration_sec
FROM `${catalog}`.`${schema}`.`bronze_trips`
WHERE CAST(departure_station_id AS STRING) = station_id
ORDER BY departure_time DESC
LIMIT 10;

In [0]:
%sql
SELECT * FROM `${catalog}`.`${schema}`.recent_trips_by_station('0152')

In [0]:
%sql
CREATE OR REPLACE FUNCTION `${catalog}`.`${schema}`.station_info(
  station_id STRING
)
RETURNS TABLE (
  station_id STRING,
  name STRING,
  lat DOUBLE,
  lon DOUBLE,
  capacity BIGINT
)
RETURN
SELECT
  CAST(station_id AS STRING) AS station_id,
  name,
  CAST(lat AS DOUBLE) AS lat,
  CAST(lon AS DOUBLE) AS lon,
  CAST(capacity AS BIGINT) AS capacity
FROM `${catalog}`.`${schema}`.`bronze_stations`
WHERE CAST(station_id AS STRING) = station_id;

In [0]:
%sql
CREATE OR REPLACE FUNCTION `${catalog}`.`${schema}`.live_station_status(
  station_id STRING
)
RETURNS TABLE (
  station_id STRING,
  num_bikes_available INT,
  num_docks_available INT,
  is_renting BOOLEAN,
  is_returning BOOLEAN,
  last_reported BIGINT
)
LANGUAGE PYTHON
HANDLER 'LiveStationStatus'
AS $$
class LiveStationStatus:

    def ensure_station_fields(self, station):
        fields = [
            'station_id',
            'num_bikes_available',
            'num_docks_available',
            'is_renting',
            'is_returning',
            'last_reported'
        ]
        defaults = {
            'station_id': None,
            'num_bikes_available': 0,
            'num_docks_available': 0,
            'is_renting': True,
            'is_returning': True,
            'last_reported': 0
        }
        result = {k: station.get(k, defaults[k]) for k in fields}
        # Type conversions
        result['station_id'] = str(result['station_id']) if result['station_id'] is not None else None
        result['num_bikes_available'] = int(result['num_bikes_available'])
        result['num_docks_available'] = int(result['num_docks_available'])
        result['is_renting'] = bool(result['is_renting'])
        result['is_returning'] = bool(result['is_returning'])
        result['last_reported'] = int(result['last_reported'])
        return result

    def eval(self, station_id: str):
        import requests
        url = "https://gbfs.kappa.fifteen.eu/gbfs/2.2/mobi/en/station_status.json"
        try:
            r = requests.get(url, timeout=10)
            data = r.json()
            stations = data.get('data', {}).get('stations', [])
            matches = [s for s in stations if str(s.get('station_id')) == str(station_id)]
            if not matches:
                return []
            return [self.ensure_station_fields(matches[0])]
        except Exception:
            return []
$$

In [0]:
%sql
SELECT * FROM `${catalog}`.`${schema}`.live_station_status('0152')

In [0]:
%sql
CREATE OR REPLACE FUNCTION `${catalog}`.`${schema}`.nearby_stations(
  target_lat DOUBLE,
  target_lon DOUBLE,
  radius_km DOUBLE
)
RETURNS TABLE (
  station_id STRING,
  name STRING,
  lat DOUBLE,
  lon DOUBLE,
  distance_km DOUBLE
)
LANGUAGE PYTHON
HANDLER 'Nearby'
AS $$
class Nearby:
    def haversine(self, lat1, lon1, lat2, lon2):
        import math
        R = 6371.0
        dlat = math.radians(lat2 - lat1)
        dlon = math.radians(lon2 - lon1)
        a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
        return R * c
    def eval(self, target_lat, target_lon, radius_km):
        import requests
        url = "https://gbfs.kappa.fifteen.eu/gbfs/2.2/mobi/en/station_information.json"
        try:
            r = requests.get(url, timeout=10)
            stations = r.json().get('data', {}).get('stations', [])
            res = []
            for s in stations:
                try:
                    lat = float(s.get('lat'))
                    lon = float(s.get('lon'))
                    dist = self.haversine(float(target_lat), float(target_lon), lat, lon)
                    if dist <= float(radius_km):
                        res.append({
                            'station_id': str(s.get('station_id')),
                            'name': s.get('name'),
                            'lat': lat,
                            'lon': lon,
                            'distance_km': dist
                        })
                except Exception:
                    continue
            return res
        except Exception:
            return []
$$

In [0]:
%sql
SELECT * FROM `${catalog}`.`${schema}`.nearby_stations(49.2827, -123.1207, 1.0)