In [32]:
import requests
import pandas as pd
import folium
from branca.colormap import LinearColormap

# ───────────────────────────────────────────────────────────────
# PARAMETERS
FETCH_LIMIT      = 50000    # rows to fetch
SAMPLE_LOCATIONS = 200      # distinct spots to plot
BASE_RADIUS      = 2        # minimum circle radius
SCALE            = 1.2      # multiplier for √count → radius
# ───────────────────────────────────────────────────────────────

# 1. Fetch centroid data
url    = "https://data.cityofchicago.org/resource/ajtu-isnz.json"
params = {"$limit": FETCH_LIMIT}
resp   = requests.get(url, params=params)
resp.raise_for_status()
df     = pd.DataFrame(resp.json())

# 2. Coerce coords to numeric, drop invalid
df['pickup_lat']  = pd.to_numeric(df['pickup_centroid_latitude'],  errors='coerce')
df['pickup_lng']  = pd.to_numeric(df['pickup_centroid_longitude'], errors='coerce')
df['dropoff_lat'] = pd.to_numeric(df['dropoff_centroid_latitude'],  errors='coerce')
df['dropoff_lng'] = pd.to_numeric(df['dropoff_centroid_longitude'], errors='coerce')
df = df.dropna(subset=['pickup_lat','pickup_lng','dropoff_lat','dropoff_lng'])

# 3. Aggregate trip counts
pickup_counts = df.groupby(['pickup_lat','pickup_lng']).size().reset_index(name='count')
dropoff_counts = df.groupby(['dropoff_lat','dropoff_lng']).size().reset_index(name='count')

# 4. Sample for performance
pickup_counts  = pickup_counts.sample(n=min(SAMPLE_LOCATIONS, len(pickup_counts)), random_state=42)
dropoff_counts = dropoff_counts.sample(n=min(SAMPLE_LOCATIONS, len(dropoff_counts)), random_state=42)

# 5. Sort so smaller circles draw first, larger on top
pickup_counts  = pickup_counts.sort_values('count')
dropoff_counts = dropoff_counts.sort_values('count')

# ───────────── PICKUP MAP ──────────────────────────────────────
p_min, p_max = pickup_counts['count'].min(), pickup_counts['count'].max()
pickup_map = folium.Map(location=[pickup_counts['pickup_lat'].mean(),
                                  pickup_counts['pickup_lng'].mean()],
                        zoom_start=11)

# use yellow→orange→red for stronger midtones
pickup_cmap = LinearColormap(['yellow','orange','red'],
                             vmin=p_min, vmax=p_max,
                             caption='Pickups')
pickup_cmap.add_to(pickup_map)

for _, row in pickup_counts.iterrows():
    folium.CircleMarker(
        location=[row['pickup_lat'], row['pickup_lng']],
        radius= BASE_RADIUS + (row['count']**0.3) * SCALE,
        color=pickup_cmap(row['count']),
        fill=True,
        fill_color=pickup_cmap(row['count']),
        fill_opacity=0.7,
        popup=f"Pickups: {row['count']}"
    ).add_to(pickup_map)

#pickup_map.save("chicago_taxi_pickups_scaled.html")


# ──────────── DROPOFF MAP ───────────────────────────────────────
d_min, d_max = dropoff_counts['count'].min(), dropoff_counts['count'].max()
dropoff_map = folium.Map(location=[dropoff_counts['dropoff_lat'].mean(),
                                   dropoff_counts['dropoff_lng'].mean()],
                         zoom_start=11)

dropoff_cmap = LinearColormap(['yellow','orange','red'],
                              vmin=d_min, vmax=d_max,
                              caption='Drop-offs')
dropoff_cmap.add_to(dropoff_map)

for _, row in dropoff_counts.iterrows():
    folium.CircleMarker(
        location=[row['dropoff_lat'], row['dropoff_lng']],
        radius= BASE_RADIUS + (row['count']**0.3) * SCALE,
        color=dropoff_cmap(row['count']),
        fill=True,
        fill_color=dropoff_cmap(row['count']),
        fill_opacity=0.7,
        popup=f"Drop-offs: {row['count']}"
    ).add_to(dropoff_map)

#dropoff_map.save("chicago_taxi_dropoffs_scaled.html")

pickup_map



In [33]:
dropoff_map

In [34]:
import requests
import pandas as pd
import folium
import numpy as np
from branca.colormap import LinearColormap

# ───────────────────────────────────────────────────────────────
# PARAMETERS
CSV_PATH       = '/Users/markmatlin/Transit_Hacks_2025/CTA_-_System_Information_-_List_of__L__Stops_20250426.csv'
RIDERSHIP_URL  = 'https://data.cityofchicago.org/resource/5neh-572f.json'
FETCH_LIMIT    = 50000     # max rows per Socrata page
MIN_RADIUS     = 2         # px
MAX_RADIUS     = 20        # px
START_DATE     = '2024-01-01T00:00:00'
# ───────────────────────────────────────────────────────────────

# 1. Load station CSV & parse coords
stations = pd.read_csv(CSV_PATH)[['MAP_ID','STATION_NAME','Location']].drop_duplicates()
stations.columns = ['station_id','stationname','location']
stations['station_id'] = pd.to_numeric(stations['station_id'], errors='coerce').astype(int)
stations[['lat','lng']] = (
    stations['location']
            .str.extract(r'\(\s*([\d\.\-]+)\s*,\s*([\d\.\-]+)\s*\)')
            .astype(float)
)
stations_df = stations[['station_id','stationname','lat','lng']]

# 2. Fetch ridership JSON *since* START_DATE & sum by station_id
params = {
    '$limit':  FETCH_LIMIT,
    '$where': f"date >= '{START_DATE}'"
}
resp = requests.get(RIDERSHIP_URL, params=params)
resp.raise_for_status()
rides = pd.DataFrame(resp.json())

# parse and summarize date coverage
rides['date'] = pd.to_datetime(rides['date'])
unique_days = rides['date'].dt.normalize().nunique()
first_day   = rides['date'].min().date()
last_day    = rides['date'].max().date()
print(f"Dataset covers {unique_days} days, from {first_day} through {last_day}")

# coerce numeric fields
rides['station_id'] = pd.to_numeric(rides['station_id'], errors='coerce').astype(int)
rides['rides']      = pd.to_numeric(rides['rides'], errors='coerce').fillna(0)

totals = (
    rides.groupby('station_id', as_index=False)['rides']
         .sum()
         .rename(columns={'rides':'total_rides'})
)

# 3. Merge & sort
df = stations_df.merge(totals, on='station_id', how='left').fillna({'total_rides':0})
df['total_rides'] = df['total_rides'].astype(int)
df = df.sort_values('total_rides').reset_index(drop=True)

# 4. Compute max for normalization
max_rides = df['total_rides'].max()

# 5. Build Folium map centered on Chicago
m = folium.Map(
    location=[df['lat'].mean(), df['lng'].mean()],
    zoom_start=11
)

# 6. Create a normalized colormap (domain [0,1])
colormap = LinearColormap(
    ['yellow','orange','red'],
    vmin=0, vmax=1,
    caption="Relative 'L' Station Rides (2024+)"
)
colormap.add_to(m)

# 7. Plot each station with size & color relative to max_rides
for _, row in df.iterrows():
    rel    = row['total_rides'] / max_rides if max_rides>0 else 0
    radius = MIN_RADIUS + rel * (MAX_RADIUS - MIN_RADIUS)
    color  = colormap(rel)
    folium.CircleMarker(
        location=[row['lat'], row['lng']],
        radius=radius,
        color=color,
        fill=True, fill_color=color, fill_opacity=0.7,
        popup=f"{row['stationname']}: {row['total_rides']} rides"
    ).add_to(m)

# 8. Save or display
#m.save('cta_l_ridership_map_2024_onward.html')
# In Jupyter: put `m` on the last line to render inline
m

Dataset covers 349 days, from 2024-01-01 through 2024-12-14


In [35]:
# parse the date field
rides['date'] = pd.to_datetime(rides['date'])

# how many unique days are in our sample?
unique_days = rides['date'].dt.normalize().nunique()
first_day   = rides['date'].min().date()
last_day    = rides['date'].max().date()

print(f"Dataset covers {unique_days} days, from {first_day} through {last_day}")
# 2. Fetch ridership JSON & sum rides per station_id
resp = requests.get(RIDERSHIP_URL, params={'$limit': FETCH_LIMIT})
resp.raise_for_status()
rides = pd.DataFrame(resp.json())

# —– NEW: date parsing and summary —–
rides['date'] = pd.to_datetime(rides['date'])
unique_days   = rides['date'].dt.normalize().nunique()
first_day     = rides['date'].min().date()
last_day      = rides['date'].max().date()
print(f"Dataset covers {unique_days} days, from {first_day} through {last_day}")

rides['station_id'] = pd.to_numeric(rides['station_id'], errors='coerce').astype(int)
rides['rides']      = pd.to_numeric(rides['rides'], errors='coerce').fillna(0)


Dataset covers 349 days, from 2024-01-01 through 2024-12-14
Dataset covers 353 days, from 2001-01-01 through 2001-12-19
