In [1]:
#Import libraries
import json
import polars as pl
from sklearn.preprocessing import LabelEncoder

In [2]:
pl.enable_string_cache()
pl.Config().set_tbl_cols(100)
pl.Config().set_tbl_rows(25)

polars.config.Config

In [3]:
%config InteractiveShell.ast_node_interactivity = 'last_expr_or_assign'

In [8]:
# Route names
routes = ["2L", "2R", "3", "10", "11", "12", "16", "17", "18", "19", "21", "21 Tripper"]

In [9]:
# Train LabelEncoder
encoder = LabelEncoder()
encoder.fit(routes)

In [10]:
# Save the mapping
label_mapping = {label: i for i, label in enumerate(encoder.classes_)}
with open("../data/label_encoder.json", "w") as f:
    json.dump(label_mapping, f)


print("LabelEncoder mapping saved!")

LabelEncoder mapping saved!


In [6]:
#Load stops data
file = open("../data/stops.json", "r")
stopsData = json.load(file)

stops = pl.DataFrame(stopsData['get_stops'])

rid,rsid,id,name,lat,lng,extID,shortName
i64,i64,i64,str,f64,f64,str,str
3,197057,449,"""Anderson Hall""",41.938465,-88.761086,"""449""","""449"""
3,197056,448,"""Barsema Hall""",41.939774,-88.761009,"""448""","""448"""
3,197054,446,"""Castle Dr. at College Ave.""",41.93412,-88.763115,"""446""","""446"""
3,197053,445,"""Castle Dr. at Montgomery""",41.9324,-88.762863,"""445""","""445"""
3,197044,436,"""Convocation Center Entrance 3""",41.936459,-88.785286,"""436""","""436"""
3,197428,820,"""DuSable Turnaround Rt 2L / 2R""",41.934803,-88.769974,"""""","""538"""
3,197055,447,"""Gilbert Way at Art Building""",41.935471,-88.762093,"""447""","""447"""
3,197074,466,"""Grant North""",41.938599,-88.777321,"""466""","""466"""
3,197058,450,"""Health Services on Lucinda Ave…",41.936481,-88.764244,"""450""","""450"""
3,197041,433,"""Lincoln Hall""",41.936333,-88.774391,"""433""","""433"""


In [5]:
#Load patterns data
pattern_mapping = {
    3: "2L",
    4: "2R",
    37: "3",
    17: "10",
    18: "11",
    23: "12",
    12: "16",
    13: "17",
    14: "18",
    33: "19",
    46: "21",
    45: "21 Tripper",
}

#Load patterns json
file = open("../data/patterns.json", "r")
patternsData = json.load(file)

patterns = pl.DataFrame(patternsData['get_patterns'])
patterns = patterns.with_columns(pl.col("id").replace_strict(pattern_mapping, default="None"))

id,name,extID,type,length,color,encLine,decLine,routes,routeNames,stations,stopIDs
str,str,str,i64,f64,str,str,list[null],list[i64],list[str],list[null],list[i64]
"""2L""","""Route 2L (Full Service)""","""2""",2,5.156097,"""#097138""","""eq}~Fxix|OM?a@@}A??fE?nEAbE?h@…",[],[3],"[""Route 2L""]",[],"[433, 465, … 820]"
"""2R""","""Route 2R Full Service""","""1""",1,5.083211,"""#FFD600""","""qk}~Fn{x|Ow@C{BhAoBp@Em@G}G@_I…",[],[4],"[""Route 2R""]",[],"[431, 451, … 432]"
"""3""","""Route 3 Full Service""","""3""",2,2.793036,"""#58F964""","""kq}~Fxix|Oa@??@sA?@?C?A}C???aC…",[],[33],"[""Route 3""]",[],"[431, 493, … 431]"
"""10""","""Route 10 Full Service""","""10""",3,5.564219,"""#782BC9""","""{q}~Fxix|OuBACfF@jH@xClFkBj@Sj…",[],[17],"[""Route 10""]",[],"[477, 432, … 431]"
"""11""","""Route 11 Full Service""","""11""",2,12.793737,"""#A07D5C""","""mq}~F~ix|O}A?QECKEaPAoACmQGGeR…",[],[18],"[""Route 11""]",[],"[492, 431, … 491]"
"""12""","""Route 12 Full Service""","""12""",2,39.210159,"""#3C8DBC""","""{~|~Fzex|O?dCrIA@qE@}Bx@u}@@mF…",[],[23],"[""Route 12""]",[],"[809, 453, … 575]"
"""16""","""Route 16 Full Service""","""16""",3,4.9,"""#A64598""","""y~|~Ftcx|OAfErI@@}H?W~@cbAFmAD…",[],[12],"[""Route 16""]",[],"[809, 536, … 811]"
"""17""","""Route 17 Full Service""","""17""",2,11.542563,"""#3E5BA6""","""ap}~Fjkx|OHOAYQGu@B_B?K}]?_M`B…",[],[13],"[""Route 17""]",[],"[805, 806, … 805]"
"""18""","""Route 18 Full Service""","""18""",4,14.597877,"""#FFAA00""","""}i`_G|no|OoA}Aa@]gAKCB[~BUp@c@…",[],[14],"[""Route 18""]",[],"[676, 835, … 685]"
"""19""","""Route 19 Full Service""","""19""",4,21.596067,"""#34ADE0""","""cm}~Fj_y|Ol@CRUFW?_@??ESOQ??WI…",[],[30],"[""Route 19""]",[],"[819, 528, … 819]"


In [None]:
exploded = patterns.select(["id", "stopIDs"]).explode("stopIDs")

id,stopIDs
str,i64
"""2L""",433
"""2L""",465
"""2L""",466
"""2L""",467
"""2L""",468
"""2L""",436
"""2L""",437
"""2L""",438
"""2L""",439
"""2L""",440


In [23]:
# Join with stops on stop ID
joined = exploded.join(
    stops.select(["id", "name"]),
    left_on="stopIDs",
    right_on="id",
    how="left"
)

# Rename columns for clarity
joined = joined.rename({
    "id": "routeID",
    "name": "stopName",
})

# Group back into per-route JSON-ready structure
routes_with_stops = (
    joined
    .group_by("routeID")
    .agg([
        pl.struct(["stopIDs", "stopName"]).alias("stops")
    ])
)

json_data = routes_with_stops.to_dicts()

[{'routeID': '19',
  'stops': [{'stopIDs': 819, 'stopName': 'DuSable Turnaround - Rt 18 / Rt 19'},
   {'stopIDs': 528, 'stopName': 'Annie Glidden Rd at Crane Dr'},
   {'stopIDs': 528, 'stopName': 'Annie Glidden Rd at Crane Dr'},
   {'stopIDs': 528, 'stopName': 'Annie Glidden Rd at Crane Dr'},
   {'stopIDs': 709, 'stopName': 'Annie Glidden Rd. & Loren Dr. (East Side)'},
   {'stopIDs': 616, 'stopName': 'Annie Glidden Rd. at Varsity Dr.'},
   {'stopIDs': 482, 'stopName': 'Blackhawk Rd. at Kimberly Dr.'},
   {'stopIDs': 482, 'stopName': 'Blackhawk Rd. at Kimberly Dr.'},
   {'stopIDs': 482, 'stopName': 'Blackhawk Rd. at Kimberly Dr.'},
   {'stopIDs': 483, 'stopName': 'Blackhawk Rd. at Edgebrook Dr.'},
   {'stopIDs': 483, 'stopName': 'Blackhawk Rd. at Edgebrook Dr.'},
   {'stopIDs': 483, 'stopName': 'Blackhawk Rd. at Edgebrook Dr.'},
   {'stopIDs': 577, 'stopName': 'Blackhawk Rd. at Ridge Dr.(10)'},
   {'stopIDs': 577, 'stopName': 'Blackhawk Rd. at Ridge Dr.(10)'},
   {'stopIDs': 577, 'stopN

In [24]:
with open("../data/route_stops.json", "w") as f:
    json.dump(json_data, f, indent=2)