In [97]:
import os
from pathlib import Path
import sqlite3
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
import numpy as np
import polyline as ppl
from pyproj import Geod
import folium
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', 100)

In [98]:
# data directory and files

data_dir = Path.cwd().parents[0] / "database" / "raw"
wzdx_data_file = data_dir / "az511.db"
# inrix_data_dir = data_dir/ "INRIX data"
# inrix_data_1 = inrix_data_dir / "I10-and-I17-1year" / "I10-and-I17-1year.csv"
# inrix_data_2 = inrix_data_dir / "Loop101-1year" / "Loop101-1year.csv"
# inrix_data_3 = inrix_data_dir / "SR60-1year" / "SR60-1year.csv"

In [99]:
# Defining methods to get data
# The name of the table in wzdx is events. Since this table is small, it can be loaded into the pandas dataframe directly w/o using too much memory.

def data_wzdx(data_file):
    con = sqlite3.connect(data_file)
    cur = con.cursor()

    cur.execute("SELECT * FROM events")
    columns = [desc[0] for desc in cur.description]

    data_df = pd.DataFrame(cur.fetchall(), columns=columns)

    con.close()

    return data_df

def data_inrix(data_file, n=1000):
    data = pd.read_csv(data_file, nrows=n)
    return data

In [100]:
df = data_wzdx(wzdx_data_file)

def line_from_encoded(enc):
    if pd.isna(enc) or enc == '':
        return None
    coords_latlon = ppl.decode(enc)
    coords_lonlat = [(lon, lat) for (lat, lon) in coords_latlon]
    return LineString(coords_lonlat)

In [101]:
time_columns = ['Reported', 'LastUpdated', 'StartDate', 'PlannedEndDate']

for col in time_columns:
    df['Local' + col] = df[col]%10000000000
    df['Local' + col] = pd.to_datetime(df['Local' + col], unit='s', utc=True).dt.tz_convert("America/Phoenix").dt.strftime("%Y-%m-%d %H:%M:%S")


In [102]:
# min_lat, max_lat = 33.05, 33.85
# min_lon, max_lon = -112.72,-111.45

# min_lat, max_lat = 33.292405, 33.429615
# min_lon, max_lon = -112.041268, -111.962923

i10_boxes = [
    (33.292405, 33.429615, -112.041268, -111.962923)
]


geom = []
geo_mask = False

for (lat_min, lat_max, lon_min, lon_max) in i10_boxes:
    geo_mask |= (df['Latitude'].between(lat_min, lat_max) &
             df['Longitude'].between(lon_min, lon_max))

# geo_mask = df["Latitude"].between(min_lat, max_lat) & df["Longitude"].between(min_lon, max_lon)
df_in = df.loc[geo_mask]

In [103]:
for _, r in df_in.iterrows():
    ln = line_from_encoded(r.get('EncodedPolyline'))
    if ln is not None:
        geom.append(ln)
    else:
        if pd.notna(r['Longitude']) and pd.notna(r['Latitude']):
            geom.append(Point(r['Longitude'], r['Latitude']))
        else:
            geom.append(None)

In [107]:
gdf = gpd.GeoDataFrame(df_in, geometry=geom, crs='EPSG:4326')

light_props = [
    'ID','RoadwayName','DirectionOfTravel', 'Description', 'EventType','EventSubType',
    'IsFullClosure','Severity', 'LocalReported', 'LocalStartDate','LocalPlannedEndDate','LanesAffected','LaneCount',
    'geometry'
]
gdf_map = gdf[light_props]

gdf_map.sample(10)

Unnamed: 0,ID,RoadwayName,DirectionOfTravel,Description,EventType,EventSubType,IsFullClosure,Severity,LocalReported,LocalStartDate,LocalPlannedEndDate,LanesAffected,LaneCount,geometry
21608,469588,BROADWAY RD,,BROADWAY RD - road construction - Broadway Ful...,roadwork,road construction,0,,2025-08-11 00:00:00,2025-08-11 00:00:00,2025-08-13 00:00:00,No Data,,POINT (-111.97825 33.40733)
29279,493268,36TH ST,,36TH ST - road construction - SE,roadwork,road construction,0,,2025-02-06 00:00:00,2025-02-06 00:00:00,2025-09-12 00:00:00,No Data,,POINT (-112.00399 33.40901)
7067,435412,UNIVERSITY DR,,UNIVERSITY DR - road construction - SE,roadwork,road construction,0,,2025-02-06 00:00:00,2025-02-06 00:00:00,2025-07-18 00:00:00,No Data,,POINT (-112.00427 33.41927)
31418,499114,BROADWAY RD,,BROADWAY RD - road construction - SE,roadwork,road construction,0,,2025-09-05 00:00:00,2025-09-05 00:00:00,2025-09-05 00:00:00,No Data,,POINT (-111.99294 33.40729)
15316,462001,48TH ST,,48TH ST - road construction - Pot Holing,roadwork,road construction,0,,2025-07-21 05:00:00,2025-07-21 05:00:00,2025-07-25 14:00:00,No Data,,POINT (-111.98035 33.30530)
26080,481325,RAY RD,,RAY RD - road construction - WATER/ SEWER,roadwork,road construction,0,,2025-08-06 08:30:00,2025-08-06 08:30:00,2025-08-22 16:00:00,No Data,,POINT (-111.97613 33.31987)
12019,446538,BASELINE RD,,BASELINE RD - road construction - ASPHALT REST...,roadwork,road construction,0,,2025-06-27 08:30:00,2025-06-27 08:30:00,2025-07-18 15:30:00,No Data,,POINT (-112.00993 33.37802)
23042,476520,42ND ST,,42ND ST - road construction - Concrete Valve,roadwork,road construction,0,,2025-08-07 08:30:00,2025-08-07 08:30:00,2025-08-15 16:00:00,No Data,,POINT (-111.99284 33.30522)
40113,506941,40TH ST,,40TH ST - road construction - PC,roadwork,road construction,0,,2025-09-16 00:00:00,2025-09-16 00:00:00,2025-09-26 00:00:00,No Data,,POINT (-111.99542 33.41641)
45470,533335,20TH ST,,20TH ST - road construction - Road reconstruct...,roadwork,road construction,0,,2025-06-23 07:00:00,2025-06-23 07:00:00,2025-10-13 16:00:00,No Data,,POINT (-112.04020 33.37444)


In [105]:
gdf_preview = gdf_map

m = folium.Map(location=[33.4484, -112.0740], zoom_start=11)
folium.GeoJson(
    gdf_map,
    name="WZDx (preview)",
    tooltip=folium.GeoJsonTooltip(fields=[f for f in ['RoadwayName','EventType', 'LocalReported', 'LocalStartDate', 'LocalPlannedEndDate']])
).add_to(m)
folium.LayerControl().add_to(m)
m.save("i-10events.html")