# Calculation Ridership

In [None]:
import yaml
import pandas as pd
import numpy as np
from lxml import etree

import os

# Set up event and vehicle types

In [None]:
# TRANSIT_VEHICLE_PATH = "data\\simple_scenario\\transitVehicles.xml"
# EVENT_PATH = "data\\simple_scenario\\output\\output_events.xml"
# PLAN_PATH = "data\\simple_scenario\\plans.xml"
# OUTPUT_EVENT_AFTER_PROCESSOR = "data\\simple_scenario\\scoring\\ridership\\bus_passager.csv"

TRANSIT_VEHICLE_PATH = "data\\real\\transitVehicles.xml"
EVENT_PATH = "data\\real\\output_events.xml"
PLAN_PATH = "data\\real\\plans_scale0.375true.xml"
OUTPUT_EVENT_AFTER_PROCESSOR = "data\\real\\scoring\\bus_passager.csv"

if os.path.exists(OUTPUT_EVENT_AFTER_PROCESSOR):
    os.remove(OUTPUT_EVENT_AFTER_PROCESSOR)

folder_path = os.path.dirname(OUTPUT_EVENT_AFTER_PROCESSOR)
os.makedirs(folder_path, exist_ok=True)

# Xử lý file transit vehicle

In [None]:
!powershell -Command "Get-Content 'data\\simple_scenario\\transitVehicles.xml' -TotalCount 20 | ForEach-Object { '{0:5}: {1}' -f $_.ReadCount, $_ }"

### Xử lý ra dict chứa id và type

In [None]:
tree = etree.parse(TRANSIT_VEHICLE_PATH)
root = tree.getroot()

#### xlmn trong thẻ vehicleDefinitions  là namespace cho tất cả các tag trong tag này. nghĩa là tên đầy đủ của tag là {http://www.matsim.org/files/dtd}vehicleDefinitions, {http://www.matsim.org/files/dtd}vehicle.

#### Khi dùng lxml phải dùng namespace: root.xpath("//m:vehicleDefinitions/m:vehicle", namespaces=ns)

In [None]:
ns = {'m': 'http://www.matsim.org/files/dtd'}
vehtype_dict = {}


for node in root.xpath("//m:vehicleDefinitions/m:vehicle", namespaces=ns):
    id = node.xpath("@id")[0]
    type = node.xpath("@type")[0]
    print([id,type])
    vehtype_dict[id] = type



## Xử lý event

In [None]:
!powershell -Command "Get-Content 'data\\simple_scenario\\output\\output_events.xml' -TotalCount 20 | ForEach-Object { '{0:5}: {1}' -f $_.ReadCount, $_ }"

#### Chỉ đếm những người lên xe bus khác lái xe

In [None]:
hint_bus_type = "bus"

schema = ['person_id', 'vehicle_id']
with open(OUTPUT_EVENT_AFTER_PROCESSOR, 'a') as f:
    f.write(schema[0] + "," + schema[1] + "\n")

driver_veh_bus_dict = {}
context = etree.iterparse(EVENT_PATH, events=('end',))

for event, elem in context:
    if elem.tag == "event":
        e_type = elem.get("type")

        if e_type == "PersonEntersVehicle":
            veh_id = elem.get("vehicle")
            person_id = elem.get("person")

            #Khác transit hoặc khác bus thì bỏ qua
            if veh_id not in vehtype_dict.keys() or not hint_bus_type in vehtype_dict[veh_id].lower():
                continue
            #Là người lái xe bus thì bỏ qua
            if elem.get("person").startswith("pt_"):
                continue

            with open(OUTPUT_EVENT_AFTER_PROCESSOR, 'a') as f:
                f.write(f"{person_id},{veh_id}\n")
            
    elem.clear()

print("Finished processing ridership data.")


# Tính Ridership

In [None]:
uni_bus = []

def calculte_ridership(bus_passenger_csv_path: str):
    ridership = 0
    busdf = pd.read_csv(bus_passenger_csv_path)
    ridership = busdf['person_id'].nunique()
    global uni_bus 
    uni_bus= busdf["vehicle_id"].unique()
    return ridership


parser = etree.XMLParser(remove_blank_text=True)
tree = etree.parse(PLAN_PATH, parser)
root = tree.getroot()
residence_count = len(root.xpath('//population/person'))
print("Residence count: ", residence_count)


ridership = calculte_ridership(OUTPUT_EVENT_AFTER_PROCESSOR)
print(f"Ridership : ${ridership};  percent: {ridership/residence_count*100:.3f}%")
