# I-10 Broadway Curve Incidents - Exploratory Data Analysis

## Goals
- Preprocess data
1. Filter events (accidents and roadwork): keep only those on I-10 between Loop 202 and I-17.
2. Filter INRIX data based on time range (manually defined), and geolocation I-10 Broadway Curve
3. Filter TMC ...
4. Visualize road segments and events on map




In [1]:
# Core imports
import sqlite3
from pathlib import Path
from datetime import datetime, timedelta, date
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import random
import re

# Display all columns
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

DB_PATH = Path('../database/az511.db')
assert DB_PATH.exists(), f"Database not found at {DB_PATH}"
DB_PATH

PosixPath('../database/az511.db')

In [2]:
# Read all events from az511 database
def get_connection():
    return sqlite3.connect(DB_PATH)
with get_connection() as conn:
    events_df = pd.read_sql("SELECT * FROM events", conn)

print(f"Loaded events: {events_df.shape}")

Loaded events: (49822, 30)


In [3]:
# Filter events_df on I-10 Broadway Curve
# # based on lat/lon bounds
lat_min, lat_max = 33.296690, 33.428422
lon_min, lon_max = -112.039731, -111.962382
df_bw = events_df[
    (events_df['Latitude'] >= lat_min) & (events_df['Latitude'] <= lat_max) &
    (events_df['Longitude'] >= lon_min) & (events_df['Longitude'] <= lon_max)
]

# Filter if "RoadwayName" contains "I-10" or "I10" or "broadway"
# or "Description" contains "I-10" or "I10" or "broadway"
df_bw = df_bw[
    df_bw['RoadwayName'].str.contains("I-10|I10", case=False, na=False) 
]

# Simple fill in of DirectionOfTravel based on "RoadwayName" column
df_bw.loc[df_bw.DirectionOfTravel.eq('Unknown'),'DirectionOfTravel'] = (df_bw.loc[df_bw.DirectionOfTravel.eq('Unknown'),'RoadwayName'].str.extract(r'(west|east|south|north)', flags=re.I, expand=False).str.lower().fillna('Unknown'))

print(f"Filtered events in I-10 Broadway Curve area: {df_bw.shape}")
print(df_bw.EventType.value_counts())


Filtered events in I-10 Broadway Curve area: (472, 30)
EventType
accidentsAndIncidents    395
roadwork                  61
closures                  16
Name: count, dtype: int64


In [4]:
print("Top 15 EventSubType values:")
print(df_bw.EventSubType.value_counts().sort_values(ascending=False).head(15))

Top 15 EventSubType values:
EventSubType
C34Rshoulder               97
Crash on right shoulder    56
AccidentIncident           43
DebrisRshoulder            22
debrisLshoulder            20
leftlanes                  19
CrashLshoulder             17
rightlanes                 16
C34Lshoulder               14
exitclosed                 10
exitrestricted              9
T1018SR                     9
LeftLane                    6
ITS Equipment Damage        6
C34onramp                   6
Name: count, dtype: int64


In [5]:
# Read INRIX TMC data and filter only I-10 Broadway Curve
# Filter TMC rows on-the-fly while reading (lat/lon box)
tmc_path = Path('../database/inrix-traffic-speed/I10-and-I17-1year/TMC_Identification.csv')

usecols = [
    'tmc','road','direction','intersection',
    'start_latitude','start_longitude','end_latitude','end_longitude',
    'miles','road_order','active_start_date'
]

chunks = []
for chunk in pd.read_csv(
        tmc_path,
        usecols=usecols,
        chunksize=50_000,
        dtype={
            'tmc':'string','road':'string','direction':'string','intersection':'string',
            'miles':'float32','road_order':'float32'
        }):
    m = (
        (
            (chunk.start_latitude.between(lat_min, lat_max) & chunk.start_longitude.between(lon_min, lon_max)) |
            (chunk.end_latitude.between(lat_min, lat_max) & chunk.end_longitude.between(lon_min, lon_max))
        )
        & (chunk['road'] == 'I-10')
    )
    if m.any():
        chunks.append(chunk.loc[m])

df_tmc = pd.concat(chunks, ignore_index=True) if chunks else pd.DataFrame(columns=usecols)
print("TMC rows in bounds:", len(df_tmc))

TMC rows in bounds: 100


In [6]:
df_tmc.columns

Index(['tmc', 'road', 'direction', 'intersection', 'start_latitude',
       'start_longitude', 'end_latitude', 'end_longitude', 'miles',
       'road_order', 'active_start_date'],
      dtype='object')

In [None]:
# df_tmc has duplicate TMC codes, remove duplicates by averaging
original_length = len(df_tmc)
dup_counts = df_tmc['tmc'].value_counts()
print("Sample duplicated TMCs (count>1):")
print(dup_counts[dup_counts>1].head())

# keep the one that has laterst active_start_date
if 'active_start_date' in df_tmc.columns:
    df_tmc['active_start_date'] = pd.to_datetime(df_tmc['active_start_date'], errors='coerce')
    df_tmc = (
        df_tmc
        .sort_values(['tmc', 'active_start_date'], ascending=[True, True], na_position='first')
        .groupby('tmc', as_index=False)
        .tail(1)
        .reset_index(drop=True)
    )

print(f"Original TMC rows: {original_length} -> After cleaning: {len(df_tmc)}")


Sample duplicated TMCs (count>1):
tmc
115P04179    2
115-05165    2
115N04183    2
115N04182    2
115N04181    2
Name: count, dtype: Int64
Original TMC rows: 100 -> After cleaning: 50


In [8]:
# Read only the header row to inspect column names (no full load)
inrix_path = Path('../database/inrix-traffic-speed/I10-and-I17-1year/I10-and-I17-1year.csv')
cols = pd.read_csv(inrix_path, nrows=0).columns.tolist()
print(f"Column count: {len(cols)}")
print(cols)

Column count: 10
['tmc_code', 'measurement_tstamp', 'speed', 'historical_average_speed', 'reference_speed', 'travel_time_seconds', 'confidence_score', 'cvalue', 'Inrix 2013', 'Inrix 2019']


In [9]:
# read INRIX and filter out broadway curve based on time range manually defined
# further filter based on TMC codes from df_tmc (I-10 broadway)

# Ensure start/end are pandas Timestamps (your df_bw values look like epoch ints)
start_ts = pd.Timestamp('2025-06-16 00:00:00')
end_ts = pd.Timestamp('2025-09-23 00:00:00')
print("Manual time window:", start_ts, "->", end_ts)
tmc_set = set(df_tmc['tmc']) 

usecols = ['tmc_code', 'measurement_tstamp', 'speed', 'reference_speed',
           'travel_time_seconds', 'confidence_score', 'cvalue', 'Inrix 2013', 'Inrix 2019']

filtered_chunks = []
for chunk in pd.read_csv(
        inrix_path,
        usecols=usecols,
        parse_dates=['measurement_tstamp'],
        chunksize=20_000):
    m = (
        (chunk['tmc_code'].isin(tmc_set)) &
        (chunk['measurement_tstamp'] >= start_ts) &
        (chunk['measurement_tstamp'] <= end_ts)
    )
    if m.any():
        filtered_chunks.append(chunk.loc[m])

df_inrix = (pd.concat(filtered_chunks, ignore_index=True)
            if filtered_chunks else pd.DataFrame(columns=usecols))
print("Filtered INRIX rows (time range only):", len(df_inrix))


Manual time window: 2025-06-16 00:00:00 -> 2025-09-23 00:00:00


  for chunk in pd.read_csv(


Filtered INRIX rows (time range only): 7125744


In [10]:
# add "near_tmc" to df_bw based on the projection distance to TMC line segments

def point_segment_distance(px, py, ax, ay, bx, by):
    """
    Compute distance and projection for one point (px,py) to one segment A(ax,ay)->B(bx,by).
    Returns (dist, proj_x, proj_y, t) where t in [0,1].
    """
    vx, vy = bx - ax, by - ay
    wx, wy = px - ax, py - ay
    seg_len2 = vx*vx + vy*vy
    if seg_len2 == 0:
        # Degenerate segment
        return np.hypot(px-ax, py-ay), ax, ay, 0.0
    t = (wx*vx + wy*vy) / seg_len2
    t_clamped = max(0.0, min(1.0, t))
    proj_x = ax + t_clamped * vx
    proj_y = ay + t_clamped * vy
    dist = np.hypot(px - proj_x, py - proj_y)
    return dist, proj_x, proj_y, t_clamped

def points_segments_nearest(events_xy, seg_a_xy, seg_b_xy):
    """
    Vectorized nearest segment for many points.
    events_xy: (N,2)
    seg_a_xy: (M,2)
    seg_b_xy: (M,2)
    Returns:
      nearest_seg_index (N,)
      dist (N,)
      proj_xy (N,2)
    """
    N = events_xy.shape[0]
    M = seg_a_xy.shape[0]
    # Segment vectors
    v = seg_b_xy - seg_a_xy          # (M,2)
    v_len2 = (v**2).sum(axis=1)      # (M,)
    v_len2[v_len2 == 0] = 1e-12

    # We can process in chunks to limit memory
    chunk = 500
    best_dist2 = np.full(N, np.inf)
    best_idx = np.zeros(N, dtype=int)
    best_t = np.zeros(N)

    P = events_xy  # (N,2)
    for s in range(0, M, chunk):
        e = min(s+chunk, M)
        A = seg_a_xy[s:e]           # (m,2)
        V = v[s:e]                  # (m,2)
        L2 = v_len2[s:e]            # (m,)

        # P - A (broadcast)
        PA = P[:, None, :] - A[None, :, :]        # (N,m,2)
        # t raw
        t = (PA * V[None, :, :]).sum(axis=2) / L2[None, :]
        t_clamped = np.clip(t, 0, 1)

        proj = A[None, :, :] + t_clamped[..., None] * V[None, :, :]  # (N,m,2)
        diff = P[:, None, :] - proj
        dist2 = (diff**2).sum(axis=2)  # (N,m)

        local_min_idx = dist2.argmin(axis=1)
        local_min_val = dist2[np.arange(N), local_min_idx]
        improved = local_min_val < best_dist2
        if improved.any():
            best_dist2[improved] = local_min_val[improved]
            best_idx[improved] = s + local_min_idx[improved]
            best_t[improved] = t_clamped[np.arange(N), local_min_idx][improved]

    # Build projection points for winners
    proj_xy = seg_a_xy[best_idx] + best_t[:, None] * v[best_idx]
    return best_idx, np.sqrt(best_dist2), proj_xy, best_t

# Direction-aware nearest TMC assignment
if not df_bw.empty and not df_tmc.empty:
    lat0 = float(df_bw['Latitude'].mean())
    m_per_deg_lat = 110540.0
    m_per_deg_lon = 111320.0 * np.cos(np.deg2rad(lat0))

    # Normalize event direction (already lowercased earlier, but ensure)
    df_bw['DirectionOfTravel'] = df_bw['DirectionOfTravel'].str.lower()

    # Map event direction tokens to TMC direction field patterns
    dir_map = {
        'east': 'EASTBOUND',
        'west': 'WESTBOUND',
        'north': 'NORTHBOUND',
        'south': 'SOUTHBOUND'
    }

    # Prepare result columns (will fill incrementally)

    df_bw['near_tmc'] = np.nan
    df_bw['near_tmc'] = df_bw['near_tmc'].astype('string')
    ev_base_mask = df_bw[['Latitude','Longitude']].notna().all(axis=1)

    # Process each direction separately so only same-direction TMCs are considered
    for ev_dir, tmc_dir in dir_map.items():
        ev_mask_dir = ev_base_mask & df_bw.DirectionOfTravel.eq(ev_dir)
        if not ev_mask_dir.any():
            continue
        seg_mask = df_tmc['direction'].str.contains(tmc_dir, case=False, na=False)
        seg_subset = df_tmc.loc[seg_mask]
        if seg_subset.empty:
            continue

        ev_lat = df_bw.loc[ev_mask_dir, 'Latitude'].to_numpy()
        ev_lon = df_bw.loc[ev_mask_dir, 'Longitude'].to_numpy()
        events_xy = np.column_stack([
            ev_lon * m_per_deg_lon,
            ev_lat * m_per_deg_lat
        ])

        seg_a_xy = np.column_stack([
            seg_subset['start_longitude'].to_numpy() * m_per_deg_lon,
            seg_subset['start_latitude'].to_numpy() * m_per_deg_lat
        ])
        seg_b_xy = np.column_stack([
            seg_subset['end_longitude'].to_numpy() * m_per_deg_lon,
            seg_subset['end_latitude'].to_numpy() * m_per_deg_lat
        ])

        idx_local, dist_m, proj_xy, t_param = points_segments_nearest(events_xy, seg_a_xy, seg_b_xy)

        # Write back
        df_bw.loc[ev_mask_dir, 'near_tmc'] = seg_subset.iloc[idx_local]['tmc'].astype('string').to_numpy()
        # df_bw.loc[ev_mask_dir, 'near_tmc'] = seg_subset.iloc[idx_local]['tmc'].to_numpy()
        # df_bw.loc[ev_mask_dir, 'near_tmc_dist_m'] = dist_m
        # df_bw.loc[ev_mask_dir, 'near_tmc_dist_km'] = dist_m / 1000
        # df_bw.loc[ev_mask_dir, 'near_tmc_proj_lat'] = proj_xy[:,1] / m_per_deg_lat
        # df_bw.loc[ev_mask_dir, 'near_tmc_proj_lon'] = proj_xy[:,0] / m_per_deg_lon

    # Fallback: events with unknown or unmatched direction -> use all segments (previous logic)
    unmatched_mask = ev_base_mask & df_bw['near_tmc'].isna()
    if unmatched_mask.any():
        seg_a_xy_all = np.column_stack([
            df_tmc['start_longitude'].to_numpy() * m_per_deg_lon,
            df_tmc['start_latitude'].to_numpy() * m_per_deg_lat
        ])
        seg_b_xy_all = np.column_stack([
            df_tmc['end_longitude'].to_numpy() * m_per_deg_lon,
            df_tmc['end_latitude'].to_numpy() * m_per_deg_lat
        ])
        ev_lat = df_bw.loc[unmatched_mask, 'Latitude'].to_numpy()
        ev_lon = df_bw.loc[unmatched_mask, 'Longitude'].to_numpy()
        events_xy = np.column_stack([ev_lon * m_per_deg_lon, ev_lat * m_per_deg_lat])
        idx_all, dist_m, proj_xy, _ = points_segments_nearest(events_xy, seg_a_xy_all, seg_b_xy_all)
        df_bw.loc[unmatched_mask, 'near_tmc'] = df_tmc.iloc[idx_all]['tmc'].astype('string').to_numpy()
        # df_bw.loc[unmatched_mask, 'near_tmc'] = df_tmc.iloc[idx_all]['tmc'].to_numpy()
        # df_bw.loc[unmatched_mask, 'near_tmc_dist_m'] = dist_m
        # df_bw.loc[unmatched_mask, 'near_tmc_dist_km'] = dist_m / 1000
        # df_bw.loc[unmatched_mask, 'near_tmc_proj_lat'] = proj_xy[:,1] / m_per_deg_lat
        # df_bw.loc[unmatched_mask, 'near_tmc_proj_lon'] = proj_xy[:,0] / m_per_deg_lon

    print("Updated nearest TMC with direction-aware matching.")
    print(df_bw[['DirectionOfTravel','near_tmc']].head())

Updated nearest TMC with direction-aware matching.
    DirectionOfTravel   near_tmc
123              west  115P04184
137              west  115P04184
165              west  115P04185
166              west  115P04185
222              east  115N04185


In [11]:
# visualize df_inrix by road segment
# overlay with events on map

# 1. MANUAL speed snapshot at a chosen timestamp instead of latest per TMC
target_time_str = '2025-08-29 00:00:00'
target_ts = pd.Timestamp(target_time_str)
print("Target speed timestamp:", target_ts)

# 1. Try exact matches first
exact_speed = df_inrix[df_inrix['measurement_tstamp'] == target_ts]

# 2. Attach geometry (start/end coords)
seg_df = exact_speed.merge(
    # df_tmc.loc[(df_tmc['direction'] == 'EASTBOUND') ,#& (df_tmc['tmc']=='115-04180'),
    #                      ['tmc','start_latitude','start_longitude','end_latitude','end_longitude','direction','miles']],
    df_tmc,
    left_on='tmc_code', right_on='tmc', how='left'
)

# Drop rows without geometry
seg_df = seg_df.dropna(subset=['start_latitude','start_longitude','end_latitude','end_longitude'])

def random_color():
    return "#{:06x}".format(random.randint(0, 0xFFFFFF))

fig = go.Figure()
color_map = {}

# 1. Segment lines (tmc geometry)
for r in seg_df.itertuples():
    color = color_map.setdefault(r.tmc_code, random_color())
    fig.add_trace(go.Scattermap(
        mode="lines",
        lon=[r.start_longitude, r.end_longitude],
        lat=[r.start_latitude, r.end_latitude],
        line=dict(color=color, width=4),
        name=r.tmc_code,
        hovertext=f"TMC: {r.tmc_code}, Speed: {r.speed}"
    ))

# 2. Event points
if 'near_tmc' in df_bw.columns:
    event_colors = df_bw['near_tmc'].map(color_map).fillna("#000000")
else:
    event_colors = "#000000"

fig.add_trace(
    go.Scattermap(
        mode="markers",
        lon=df_bw['Longitude'],
        lat=df_bw['Latitude'],
        marker=dict(size=8, opacity=0.75, color=event_colors),
        name="Events",
        hovertext=(
            df_bw['near_tmc'].astype(str) + "<br>" +
            df_bw['DirectionOfTravel'].astype(str) + "<br>" +
            df_bw['Description'].fillna('').str.slice(0,80)
        ),
        hovertemplate="Event near TMC: %{hovertext}<extra></extra>",
    )
)
fig.update_layout(
    map_style="streets",
    map_zoom=11,
    map_center={"lat": seg_df.start_latitude.mean(),
                   "lon": seg_df.start_longitude.mean()},
    height=800
)


fig.show()


Target speed timestamp: 2025-08-29 00:00:00


In [12]:
# manually add tags on road segments
road_tags = {
    # Westbound
    '115P04188': {'onramp': 1, 'offramp': 0, 'curve': 1},
    '115+04188': {'onramp': 0, 'offramp': 1, 'curve': 1},
    '115P04187': {'onramp': 0, 'offramp': 1, 'curve': 1},
    '115+04187': {'onramp': 0, 'offramp': 1, 'curve': 1},
    '115P04186': {'onramp': 1, 'offramp': 0, 'curve': 1},
    '115+04186': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115P04185': {'onramp': 1, 'offramp': 0, 'curve': 0},
    '115+04185': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115P04184': {'onramp': 2, 'offramp': 0, 'curve': 1},
    '115+04184': {'onramp': 0, 'offramp': 1, 'curve': 1},
    '115P04183': {'onramp': 0, 'offramp': 0, 'curve': 1},
    '115+04183': {'onramp': 0, 'offramp': 1, 'curve': 1},
    '115P04182': {'onramp': 2, 'offramp': 0, 'curve': 0},
    '115+04182': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115P04181': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115+04181': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115P04180': {'onramp': 1, 'offramp': 0, 'curve': 0},
    '115+04180': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115P04179': {'onramp': 1, 'offramp': 0, 'curve': 0},
    '115+04179': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115P04178': {'onramp': 1, 'offramp': 0, 'curve': 0},
    '115+04178': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115P04177': {'onramp': 2, 'offramp': 0, 'curve': 0},
    '115+04177': {'onramp': 0, 'offramp': 0, 'curve': 0},
    '115P05165': {'onramp': 0, 'offramp': 0, 'curve': 0},
    # Eastbound
    '115N04188': {'onramp': 1, 'offramp': 0, 'curve': 1},
    '115-04187': {'onramp': 0, 'offramp': 0, 'curve': 0},
    '115N04187': {'onramp': 1, 'offramp': 0, 'curve': 1},
    '115-04186': {'onramp': 1, 'offramp': 2, 'curve': 1},
    '115N04186': {'onramp': 1, 'offramp': 0, 'curve': 1},
    '115-04185': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115N04185': {'onramp': 1, 'offramp': 0, 'curve': 0},
    '115-04184': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115N04184': {'onramp': 2, 'offramp': 0, 'curve': 1},
    '115-04183': {'onramp': 0, 'offramp': 0, 'curve': 1},
    '115N04183': {'onramp': 0, 'offramp': 0, 'curve': 1},
    '115-04182': {'onramp': 0, 'offramp': 2, 'curve': 0},
    '115N04182': {'onramp': 1, 'offramp': 1, 'curve': 0},
    '115-04181': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115N04181': {'onramp': 2, 'offramp': 0, 'curve': 0},
    '115-04180': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115N04180': {'onramp': 1, 'offramp': 0, 'curve': 0},
    '115-04179': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115N04179': {'onramp': 1, 'offramp': 0, 'curve': 0},
    '115-04178': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115N04178': {'onramp': 1, 'offramp': 0, 'curve': 0},
    '115-04177': {'onramp': 0, 'offramp': 1, 'curve': 0},
    '115N04177': {'onramp': 1, 'offramp': 0, 'curve': 0},
    '115-05165': {'onramp': 0, 'offramp': 0, 'curve': 0},
    '115N05165': {'onramp': 1, 'offramp': 0, 'curve': 0},
}
df_tmc['tmc_tags'] = df_tmc['tmc'].map(road_tags)

In [13]:
print(df_inrix.columns.values)
print(df_tmc.columns.values)
print(df_bw.columns.values)

['tmc_code' 'measurement_tstamp' 'speed' 'reference_speed'
 'travel_time_seconds' 'confidence_score' 'cvalue' 'Inrix 2013'
 'Inrix 2019']
['tmc' 'road' 'direction' 'intersection' 'start_latitude'
 'start_longitude' 'end_latitude' 'end_longitude' 'miles' 'road_order'
 'active_start_date' 'tmc_tags']
['ID' 'Organization' 'RoadwayName' 'DirectionOfTravel' 'Description'
 'Reported' 'LastUpdated' 'StartDate' 'PlannedEndDate' 'LanesAffected'
 'Latitude' 'Longitude' 'LatitudeSecondary' 'LongitudeSecondary'
 'EventType' 'EventSubType' 'IsFullClosure' 'Severity' 'EncodedPolyline'
 'Width' 'Height' 'Length' 'Weight' 'Speed' 'DetourPolyline'
 'DetourInstructions' 'Recurrence' 'RecurrenceSchedules' 'Details'
 'LaneCount' 'near_tmc']


In [14]:
# drop some columns
# df_bw.drop(columns=['near_tmc_dist_m', 'near_tmc_dist_km',
#                    'near_tmc_proj_lat', 'near_tmc_proj_lon'], inplace=True)
df_bw.drop(columns=['EncodedPolyline', 'Width','Height','Length','Weight','Speed',
                   'DetourPolyline', 'DetourInstructions'], inplace=True)

In [15]:
# save I-10 broadway data
out_dir = Path('../database/i10-broadway')
out_dir.mkdir(parents=True, exist_ok=True)

df_bw.to_parquet(out_dir / 'events.parquet', index=False)
df_inrix.to_parquet(out_dir / 'inrix.parquet', index=False)
df_tmc.to_parquet(out_dir / 'tmc.parquet', index=False)

print("Saved:")
print(f"  events.csv -> {out_dir/'events.parquet'}  rows={len(df_bw)}")
print(f"  inrix.csv  -> {out_dir/'inrix.parquet'}  rows={len(df_inrix)}")
print(f"  tmc.csv    -> {out_dir/'tmc.parquet'}    rows={len(df_tmc)}")


Saved:
  events.csv -> ../database/i10-broadway/events.parquet  rows=472
  inrix.csv  -> ../database/i10-broadway/inrix.parquet  rows=7125744
  tmc.csv    -> ../database/i10-broadway/tmc.parquet    rows=50
