In [2]:
import pandas as pd
from datetime import datetime

#make into function later
station_id = "080250"
path = "VDOT Capstone Files/"
volume_file = path + f"Station - {station_id} - Volume - 2021-2024.csv"
speed_file = path + f"Station - {station_id} - Speed - 2021-2024.csv"
output_file = f"output_flows/flows_{station_id}.rou.xml"

volume_df = pd.read_csv(volume_file, parse_dates=["STARTDATE1"])
volume_df.head()

Unnamed: 0,LINKID,STARTDATE1,DIRECTION,COUNTERNUMBER,LANE,VOL,AXLES,QUAL
0,80250,2021-01-01 00:00:00,N,1,1,15,,4
1,80250,2021-01-01 00:00:00,N,1,2,30,,4
2,80250,2021-01-01 00:00:00,N,1,3,10,,4
3,80250,2021-01-01 00:15:00,N,1,1,18,,4
4,80250,2021-01-01 00:15:00,N,1,2,20,,4


In [3]:
# speed files have all the info that volume does, so only speed ones are necessary
speed_df = pd.read_csv(speed_file, parse_dates=["STARTDATE1"])
speed_df.head()

Unnamed: 0,LINKID,STARTDATE1,DIRECTION,COUNTERNUMBER,LANE,MPH_0_15,MPH_15_25,MPH_25_30,MPH_30_35,MPH_35_40,...,MPH_50_55,MPH_55_60,MPH_60_65,MPH_65_70,MPH_70_75,MPH_75_80,MPH_80_85,MPH_85_UP,VOL1,QUAL
0,80250,2021-01-01 00:00:00,N,1,1,0,0,0,0,0,...,3,2,4,3,0,0,0,0,15,4
1,80250,2021-01-01 00:00:00,N,1,2,0,0,0,0,0,...,0,0,3,10,5,8,4,0,30,4
2,80250,2021-01-01 00:00:00,N,1,3,0,0,0,0,0,...,0,0,1,3,3,2,1,0,10,4
3,80250,2021-01-01 00:15:00,N,1,1,0,0,0,0,0,...,1,6,7,2,1,0,0,0,18,4
4,80250,2021-01-01 00:15:00,N,1,2,0,0,0,0,0,...,0,0,1,7,4,6,2,0,20,4


In [12]:
speed_df.columns

Index(['LINKID', 'STARTDATE1', 'DIRECTION', 'COUNTERNUMBER', 'LANE',
       'MPH_0_15', 'MPH_15_25', 'MPH_25_30', 'MPH_30_35', 'MPH_35_40',
       'MPH_40_45', 'MPH_45_50', 'MPH_50_55', 'MPH_55_60', 'MPH_60_65',
       'MPH_65_70', 'MPH_70_75', 'MPH_75_80', 'MPH_80_85', 'MPH_85_UP', 'VOL1',
       'QUAL'],
      dtype='object')

In [8]:
speed_bins = {
    "MPH_0_15": 7.5, "MPH_15_25": 20, "MPH_25_30": 27.5, "MPH_30_35": 32.5, "MPH_35_40": 37.5, "MPH_40_45": 42.5, 
    "MPH_45_50": 47.5, "MPH_50_55": 52.5, "MPH_55_60": 57.5, "MPH_60_65": 62.5, "MPH_65_70": 67.5, "MPH_70_75": 72.5,
    "MPH_75_80": 77.5, "MPH_80_85": 82.5, "MPH_85_UP": 87.5
}

speed_df["day"] = speed_df["STARTDATE1"].dt.day_name()
speed_df["time_block"] = speed_df["STARTDATE1"].dt.floor("15min").dt.time

grouped = speed_df.groupby(["day", "time_block", "LANE"]).agg({
    "VOL1": "mean",
    **{col: "mean" for col in speed_bins}  # average each speed bucket
}).reset_index()

grouped.head()

Unnamed: 0,day,time_block,LANE,VOL1,MPH_0_15,MPH_15_25,MPH_25_30,MPH_30_35,MPH_35_40,MPH_40_45,MPH_45_50,MPH_50_55,MPH_55_60,MPH_60_65,MPH_65_70,MPH_70_75,MPH_75_80,MPH_80_85,MPH_85_UP
0,Friday,00:00:00,1,24.62201,0.047847,0.076555,0.07177,0.047847,0.15311,0.511962,1.339713,4.038278,5.937799,7.248804,3.406699,1.272727,0.392344,0.066986,0.009569
1,Friday,00:00:00,2,65.870813,0.014354,0.028708,0.086124,0.066986,0.095694,0.186603,0.392344,1.229665,3.315789,9.837321,20.023923,19.425837,9.110048,1.827751,0.229665
2,Friday,00:00:00,3,30.990431,0.0,0.0,0.004785,0.009569,0.028708,0.129187,0.301435,0.368421,0.77512,2.899522,6.655502,9.665072,7.559809,2.086124,0.507177
3,Friday,00:15:00,1,22.200957,0.023923,0.038278,0.052632,0.095694,0.215311,0.421053,1.449761,3.741627,5.555024,6.119617,3.057416,1.062201,0.277512,0.076555,0.014354
4,Friday,00:15:00,2,65.100478,0.023923,0.057416,0.019139,0.07177,0.114833,0.172249,0.315789,0.985646,3.210526,10.45933,20.373206,18.875598,8.641148,1.497608,0.282297


In [9]:
# https://sumo.dlr.de/docs/Definition_of_Vehicles%2C_Vehicle_Types%2C_and_Routes.html#repeated_vehicles_flows
flows = []
for _, row in grouped.iterrows():
    day = row["day"]
    time_block = row["time_block"]
    lane = int(row["LANE"])
    vol = row["VOL1"]
    vehsPerHour = round(vol * 4, 2)

    # Compute average speed
    total_cars = 0
    weighted_speed = 0
    for col, mph in speed_bins.items():
        count = row[col]
        total_cars += count
        weighted_speed += count * mph
    if total_cars == 0:
        continue
    avg_speed = round(weighted_speed / total_cars, 2)

    # Convert time to seconds since midnight
    begin = time_block.hour * 3600 + time_block.minute * 60
    end = begin + 900  # 15-minute interval

    flow_id = f"{station_id}_{day}_{time_block.strftime('%H%M')}_LANE{lane}"
    flow = (
        f'<flow id="{flow_id}" begin="{begin}" end="{end}" from="edge_in" to="edge_out" '
        f'vehsPerHour="{vehsPerHour}" speed="{avg_speed}" departLane="free" />'
    )
    flows.append(flow)

print(flows[0])

<flow id="080250_Friday_0000_LANE1" begin="0" end="900" from="edge_in" to="edge_out" vehsPerHour="98.49" speed="59.38" departLane="free" />


In [22]:
with open(output_file, "w") as f:
    f.write('<routes>\n')
    for flow in flows:
        f.write(f"  {flow}\n")
    f.write('</routes>\n')