## Testing Access of CTA Bus Tracker

In [1]:
pip install requests




### Pulls
Limit of 100,000 pulls/day/key. There are 107 bus routes. We can query up to 10 routes at once.

### Features

- vid:
- tms
- ...

In [2]:
# imports

import requests

In [3]:
# liv's api key: xYZ6rHMweaTy5Ejzahk5Y4eQs

url_bus = 'https://www.ctabustracker.com/bustime/api/v3/?key=xYZ6rHMweaTy5Ejzahk5Y4eQs'
url_predictions = 'https://www.ctabustracker.com/bustime/api/v3/getvehicles?key=xYZ6rHMweaTy5Ejzahk5Y4eQs&rt=201,55&format=json'

r = requests.get(url_predictions)


# print(r.text)



In [4]:
bus = r.json()

In [15]:
bus

{'bustime-response': {'vehicle': [{'vid': '8064',
    'tmstmp': '20260218 15:07',
    'lat': '41.79307011195591',
    'lon': '-87.74134308951241',
    'hdg': '88',
    'pid': 5424,
    'rt': '55',
    'des': 'Museum of Science & Industry',
    'pdist': 4445,
    'dly': False,
    'tatripid': '204587',
    'origtatripno': '273985418',
    'tablockid': '55 -610',
    'zone': '',
    'mode': 1,
    'psgld': 'N/A',
    'stst': 53070,
    'stsd': '2026-02-18'},
   {'vid': '1570',
    'tmstmp': '20260218 15:07',
    'lat': '41.79307174682617',
    'lon': '-87.73733234405518',
    'hdg': '87',
    'pid': 5424,
    'rt': '55',
    'des': 'Museum of Science & Industry',
    'pdist': 5539,
    'dly': False,
    'tatripid': '204589',
    'origtatripno': '273985420',
    'tablockid': '55 -651',
    'zone': '',
    'mode': 1,
    'psgld': 'N/A',
    'stst': 54090,
    'stsd': '2026-02-18'},
   {'vid': '1498',
    'tmstmp': '20260218 15:07',
    'lat': '41.79331970214844',
    'lon': '-87.7159313625

In [None]:
# import requests
# import pandas as pd
# import time
# from datetime import datetime
# from zoneinfo import ZoneInfo

# ####### Main Functions - Pedro's Version #######

# ##############################
# # FUNCTION 1 - API EXTRACTING
# ##############################

# def get_api(url):
#   r = requests.get(url_predictions)

#   if r.ok:
#     data = r.json()
#     print('API Request complete')
#     return data

#   else:
#     raise ValueError('Could not complete API Request')

# ##############################
# # FUNCTION 2 - SAVING DATA
# ##############################

# def save_data(data):
#   try:
#     vehicles = data['bustime-response']['vehicle']
#     df = pd.DataFrame(vehicles)

#     chicago_tz = ZoneInfo("America/Chicago")
#     timestamp = datetime.now(chicago_tz).strftime("%Y-%m-%d_%H-%M-%S")
#     filename = f"{timestamp}_bus_data.csv"

#     df.to_csv(filename, index=False)
#     print(f'Data saved to {filename}')

#     return filename, len(df)

#   except:
#     raise ValueError('No active vehicle data')


# ##############################
# # FUNCTION 3 - MAIN RUN
# ##############################
# def main(url, sleep_time=30, runtime_seconds=3600):
#   n_calls = 0
#   try:
#     start_time = time.time()
#     end_time = start_time + runtime_seconds

#     while time.time() - start_time < end_time:
#       data = get_api(url)
#       filename, n_rows = save_data(data)
#       n_calls += 1
#       print(f'{n_rows} rows were saved to {filename}')

#       time.sleep(sleep_time)

#   except Exception as e:
#       print(f"[{n_calls+1}] Error: {e}")


In [16]:
import os
import requests
import pandas as pd
import time
from datetime import datetime
from zoneinfo import ZoneInfo

CHICAGO_TZ = ZoneInfo("America/Chicago")

def chunk_list(xs, n=10):
    return [xs[i:i+n] for i in range(0, len(xs), n)]

def get_routes(api_key):
    print("Getting routes...")
    url = f"https://www.ctabustracker.com/bustime/api/v3/getroutes?key={api_key}&format=json"
    r = requests.get(url, timeout=30)
    r.raise_for_status()
    data = r.json()

    routes = data.get("bustime-response", {}).get("routes", [])
    if not routes:
        err = data.get("bustime-response", {}).get("error", [])
        raise ValueError(f"No routes returned. Error: {err}")

    return [rt["rt"] for rt in routes if "rt" in rt]

def get_api(url):
    r = requests.get(url, timeout=30)
    if r.ok:
        return r.json()
    raise ValueError(f"API request failed (status={r.status_code})")

def append_vehicles_to_csv(data, outfile, pulled_at, rt_chunk):
    vehicles = data.get("bustime-response", {}).get("vehicle", None)
    if not vehicles:
        # no vehicles is normal sometimes; don't crash the whole run
        return 0

    df = pd.DataFrame(vehicles)

    # add metadata columns so you can trace pulls later
    df["pulled_at"] = pulled_at
    df["rt_chunk"] = rt_chunk

    file_exists = os.path.exists(outfile)
    df.to_csv(outfile, mode="a", header=not file_exists, index=False)
    return len(df)

def main(api_key, per_chunk_sleep=5, per_sweep_sleep=30, runtime_hours=1200, break_hours=300, out_dir="."):
    routes = get_routes(api_key)
    chunks = chunk_list(routes, n=10)

    start_stamp = datetime.now(CHICAGO_TZ).strftime("%Y-%m-%d_%H-%M-%S")
    outfile = os.path.join(out_dir, f"bus_data_{start_stamp}_chicago.csv")

    print(f"Found {len(routes)} routes -> {len(chunks)} chunks")
    print(f"Writing EVERYTHING to one file:\n  {outfile}\n")
    print(f"Chunk sleep: {per_chunk_sleep}s | Sweep sleep: {per_sweep_sleep}s | Runtime: {runtime_hours} hours\n")

    start = time.time()
    end = start + runtime_hours*3600

    sweep_num = 0
    call_num = 0
    total_rows = 0

    while time.time() < end:
        sweep_num += 1
        print(f"--- Sweep {sweep_num} @ {datetime.now(CHICAGO_TZ).strftime('%Y-%m-%d %H:%M:%S %Z')} ---")

        for i, chunk in enumerate(chunks):
            if time.time() >= end:
                break

            rt_param = ",".join(chunk)
            url = (
                "https://www.ctabustracker.com/bustime/api/v3/getvehicles"
                f"?key={api_key}&rt={rt_param}&format=json"
            )

            pulled_at = datetime.now(CHICAGO_TZ).strftime("%Y-%m-%d %H:%M:%S %Z")

            try:
                data = get_api(url)
                n_rows = append_vehicles_to_csv(data, outfile, pulled_at=pulled_at, rt_chunk=rt_param)
                call_num += 1
                total_rows += n_rows
                print(f"[Call {call_num}] chunk {i+1}/{len(chunks)}: appended {n_rows} rows (total {total_rows})")
            except Exception as e:
                call_num += 1
                print(f"[Call {call_num}] chunk {i+1}/{len(chunks)} ERROR: {e}")

            if time.time() < end:
                time.sleep(min(per_chunk_sleep, max(0, end - time.time())))

        if time.time() < end:
            sleep_now = min(per_sweep_sleep, max(0, end - time.time()))
            print(f"--- Sweep {sweep_num} complete. Sleeping {sleep_now:.0f}s ---\n")
            time.sleep(sleep_now)

    print(f"\nDone. Sweeps: {sweep_num}, calls: {call_num}, total rows written: {total_rows}")
    print(f"Output file: {outfile}")
    return outfile


In [None]:
key = 'NPeYcrgS6Pt432G5F64u8jgQD'
main(key)

Getting routes...
Found 124 routes -> 13 chunks
Writing EVERYTHING to one file:
  ./bus_data_2026-02-18_15-25-57_chicago.csv

Chunk sleep: 5s | Sweep sleep: 30s | Runtime: 3600s

--- Sweep 1 @ 2026-02-18 15:25:57 CST ---
[Call 1] chunk 1/13: appended 112 rows (total 112)
[Call 2] chunk 2/13: appended 132 rows (total 244)
[Call 3] chunk 3/13: appended 93 rows (total 337)
[Call 4] chunk 4/13: appended 102 rows (total 439)
[Call 5] chunk 5/13: appended 99 rows (total 538)
[Call 6] chunk 6/13: appended 107 rows (total 645)
[Call 7] chunk 7/13: appended 144 rows (total 789)
[Call 8] chunk 8/13: appended 154 rows (total 943)
[Call 9] chunk 9/13: appended 92 rows (total 1035)
[Call 10] chunk 10/13: appended 42 rows (total 1077)
[Call 11] chunk 11/13: appended 32 rows (total 1109)
[Call 12] chunk 12/13: appended 86 rows (total 1195)
[Call 13] chunk 13/13: appended 12 rows (total 1207)
--- Sweep 1 complete. Sleeping 30s ---

--- Sweep 2 @ 2026-02-18 15:27:35 CST ---
[Call 14] chunk 1/13: append

In [None]:
def delay_percentage(file):
  '''
  This function analyzes the percentage of time a route is delayed
  '''

  important_cols = ['rt', 'dly']
  important_df = file[important_cols]

  percentage = important_df.groupby('rt')['dly']
