In [None]:
import requests
import pandas as pd
from datetime import datetime
import time

URL = "https://data.ny.gov/resource/wujg-7c2s.json"

APP_TOKEN = None  

session = requests.Session()
if APP_TOKEN:
    session.headers.update({"X-App-Token": APP_TOKEN})

batch_size = 50_000  
start = "2020-07-01T00:00:00"
end   = "2020-12-31T23:59:59"

all_rows = []

last_day = None            
last_station_id = None     

while True:
    
    where_parts = [
        f"transit_timestamp BETWEEN '{start}' AND '{end}'"
    ]

    
    if last_day is not None:
        where_parts.append(
            "("
            f"date_trunc_ymd(transit_timestamp) > '{last_day}' "
            f"OR (date_trunc_ymd(transit_timestamp) = '{last_day}' "
            f"AND station_complex_id > '{last_station_id}')"
            ")"
        )

    where_clause = " AND ".join(where_parts)

    query = f"""
        SELECT
            date_trunc_ymd(transit_timestamp) AS day,
            station_complex_id,
            SUM(ridership) AS total_ridership
        WHERE {where_clause}
        GROUP BY day, station_complex_id
        ORDER BY day, station_complex_id
        LIMIT {batch_size}
    """

    try:
        r = session.get(URL, params={"$query": query}, timeout=60)
        r.raise_for_status()
        page = r.json()
    except requests.HTTPError as e:
        
        status = getattr(e.response, "status_code", None)
        if status in (429, 500, 502, 503, 504):
            time.sleep(1.5)
            continue
        raise

    if not page:
        break

    all_rows.extend(page)

    
    last = page[-1]
    last_day = last["day"]                    
    last_station_id = last["station_complex_id"] 

    print(f"Leidos {len(page):,} registros (hasta {last_day} / {last_station_id})")

 
    if len(page) < batch_size:
        break


df = pd.DataFrame(all_rows)
if not df.empty:
    df["day"] = pd.to_datetime(df["day"]).dt.date
    df["total_ridership"] = pd.to_numeric(df["total_ridership"], errors="coerce")
    

print(f"\nTotal de filas: {len(df):,}")
print(df.head())

In [None]:
display(df)

In [None]:
df.to_csv("mta_metro_ridership_2020_2.csv", index=False)
print("Archivo guardado")