# Calculator On time percent 

In [None]:
import yaml
import pandas as pd
import numpy as np
from lxml import etree

import os

# Set up event and vehicle types

In [None]:
TRANSIT_VEHICLE_PATH = "data\\simple_scenario\\transitVehicles.xml"
EVENT_PATH = "data\\simple_scenario\\output\\output_events.xml"

OUTPUT_EVENT_AFTER_PROCESSOR = "data\\simple_scenario\\scoring\\otp\\bus_delay_at_stop.csv"


if os.path.exists(OUTPUT_EVENT_AFTER_PROCESSOR):
    os.remove(OUTPUT_EVENT_AFTER_PROCESSOR)

folder_path = os.path.dirname(OUTPUT_EVENT_AFTER_PROCESSOR)
os.makedirs(folder_path, exist_ok=True)

# Xử lý file transit vehicle

In [None]:
!powershell -Command "Get-Content 'data\\simple_scenario\\transitVehicles.xml' -TotalCount 20 | ForEach-Object { '{0:5}: {1}' -f $_.ReadCount, $_ }"

### Xử lý ra dict chứa id và type

In [None]:
tree = etree.parse(TRANSIT_VEHICLE_PATH)
root = tree.getroot()

#### xlmn trong thẻ vehicleDefinitions  là namespace cho tất cả các tag trong tag này. nghĩa là tên đầy đủ của tag là {http://www.matsim.org/files/dtd}vehicleDefinitions, {http://www.matsim.org/files/dtd}vehicle.

#### Khi dùng lxml phải dùng namespace: root.xpath("//m:vehicleDefinitions/m:vehicle", namespaces=ns)

In [None]:
ns = {'m': 'http://www.matsim.org/files/dtd'}
vehtype_dict = {}


for node in root.xpath("//m:vehicleDefinitions/m:vehicle", namespaces=ns):
    id = node.xpath("@id")[0]
    type = node.xpath("@type")[0]
    print([id,type])
    vehtype_dict[id] = type



## Xử lý event

In [None]:
!powershell -Command "Get-Content 'data\\simple_scenario\\output\\output_events.xml' -TotalCount 20 | ForEach-Object { '{0:5}: {1}' -f $_.ReadCount, $_ }"

#### Chỉ đếm những người lên xe bus khác lái xe

In [None]:
hint_bus_type = "bus"

schema = ['vehicle', 'ar_delay', 'arr_time']
with open(OUTPUT_EVENT_AFTER_PROCESSOR, 'a') as f:
    for x in schema[:-1]:
        f.write(x + ",")

context = etree.iterparse(EVENT_PATH, events=('end',))
for event, elem in context:
    if elem.tag == "event":
        e_type = elem.get("type")

        if e_type == "PersonEntersVehicle":
            veh_id = elem.get("vehicle")
            person_id = elem.get("person")

            #Khác transit hoặc khác bus thì bỏ qua
            if veh_id not in vehtype_dict.keys() or not hint_bus_type in vehtype_dict[veh_id].lower():
                continue
            #Là người lái xe bus thì bỏ qua
            if elem.get("person").startswith("pt_"):
                continue

            with open(OUTPUT_EVENT_AFTER_PROCESSOR, 'a') as f:
                f.write(f"{person_id},{veh_id}\n")
            
    elem.clear()

print("Finished processing ridership data.")


def _process_event(self, elem):
        e_type = elem.get("type")
        
        # 1. VehicleArrivesAtFacility
        if e_type == "VehicleArrivesAtFacility":
            veh_id = elem.get("vehicle")
            if veh_id not in self.bus_vehicles: return
            
            # Extract delay if present. Standard MATSim might not have it unless extended.
            # If delay is missing, we might need schedule data, but assuming it exists as per Kotlin equivalent.
            delay = elem.get("delay")
            if delay is None:
                # Fallback or ignore? The Kotlin code uses event.delay.
                # If it's missing, let's assume 0.0 or log warning if critical.
                # For now, let's treat it as 0.0 if missing, but typically it should be there if this is the intent.
                delay = "0.0"
                
            facility_id = elem.get("facility")
            time = elem.get("time")

            self._temp_bus_map[veh_id] = {
                "stopId": facility_id,
                "arrDelay": float(delay),
                "arrivalTime": float(time), # Good to keep reference
                "depDelay": 0.0 # Initialize
            }

        # 2. VehicleDepartsAtFacility
        elif e_type == "VehicleDepartsAtFacility":
            veh_id = elem.get("vehicle")
            if veh_id not in self._temp_bus_map: return
            
            delay = elem.get("delay")
            if delay is None:
                delay = "0.0"

            # Retrieve stored arrival data
            data = self._temp_bus_map[veh_id]
            
            # Verify it's the same facility? (Ideally yes, but let's assume sequence)
            # data has stopId. The departure event also has facility.
            facility_id = elem.get("facility")
            if facility_id != data["stopId"]:
                # Mismatch or missed event? 
                # If facility differs, maybe the bus didn't stop long or something weird.
                # But let's just proceed or ignore.
                pass
            
            data["depDelay"] = float(delay)
            data["departureTime"] = float(elem.get("time"))
            data["vehicleId"] = veh_id
            
            # Save record
            self.otp_data.append(data)
            
            # Clean up map? 
            # In simple logic, yes. A vehicle calls at one stop then leaves.
            del self._temp_bus_map[veh_id]

    def save_otp_data_to_csv(self, output_path: str):
        print(f"Saving OTP data to: {output_path}")
        save_csv_from_list(self.otp_data, output_path)
    
    def get_dataframe(self) -> pd.DataFrame:
        return pd.DataFrame(self.otp_data)



# Tính OTP