# Imports

In [1]:
import pandas as pd

In [2]:
import xml.etree.ElementTree as et

In [3]:
import os

In [4]:
import matplotlib.pyplot as plt

# Functions

In [5]:
def get_last_folder(path):
    # Normalize the path to handle different separators and remove trailing separator
    normalized_path = os.path.normpath(path)
    # Split the path into components
    folders = normalized_path.split(os.sep)
    # Get the last folder
    last_folder = folders[-1]
    return last_folder

In [6]:
def transform_xml(xml_doc):
    attr = xml_doc.attrib
    for xml in xml_doc.iter('vehicle'):
        dict = attr.copy()
        dict.update(xml.attrib)
        
        yield dict


In [7]:
def calc_elevation_up(group):
    z_diff = pd.to_numeric(group['z']).diff()

    # Filter out negative differences (upward movement)
    up = z_diff.apply(lambda x: x if x > 0 else 0)

    # Sum the positive differences to get the total upward movement
    total_up = up.sum()
    return total_up

In [8]:
def calc_elevation_down(group):
    z_diff = pd.to_numeric(group['z']).diff()

    # Filter out negative differences (upward movement)
    down = z_diff.apply(lambda x: x if x < 0 else 0)

    # Sum the positive differences to get the total upward movement
    total_down = down.sum()
    return total_down

In [9]:
def transform_xml_tripinfo(xml_doc):
    attr = xml_doc.attrib
    for xml in xml_doc.iter('tripinfo'):
        dict = attr.copy()
        dict.update(xml.attrib)
        
        yield dict

In [10]:
def transform_xml_stops(xml_doc):
    for route in xml_doc.iter('route'):
        route_dict = route.attrib.copy()
        stops = []
        
        for stop in route.findall('stop'):
            stop_dict = stop.attrib.copy()
            stops.append(stop_dict)
        
        route_dict['stops'] = stops
        
        yield route_dict

In [11]:
def get_group_by_id(list_of_dfs, desired_id):
    for df in list_of_dfs:
        if desired_id in df['id'].values:
            return df[df['id'] == desired_id]
    raise ValueError(f"ID '{desired_id}' not found in any dataframe.")

# XML to df

In [12]:
base_folder = "C:\\Users\\Admin\\Sumo\\nap_gellert_b"

In [13]:
file_path = os.path.join(base_folder, "emission.out.xml")
emission_output = et.parse(file_path)

transform = transform_xml(emission_output.getroot())
emission_output_list = list(transform)

emission_output_df = pd.DataFrame(emission_output_list)
emission_output_df = emission_output_df.drop(emission_output_df.columns[0], axis=1)
emission_output_df

Unnamed: 0,id,eclass,CO2,CO,HC,NOx,PMx,fuel,electricity,noise,route,type,waiting,lane,pos,speed,angle,x,y,z
0,C6305319.0,HBEFA3/Bus,5286.11,20.17,4.85,60.75,2.01,1671.11,0.00,67.11,C6305319,bus,0.00,34572881#1_0,12.10,0.00,281.98,4016.83,1694.47,113.74
1,C6305319.0,HBEFA3/Bus,7546.30,22.31,5.14,75.92,2.25,2386.51,0.00,72.94,C6305319,bus,0.00,34572881#1_0,13.07,0.97,281.98,4015.88,1694.67,113.80
2,C6305319.0,HBEFA3/Bus,8846.44,23.42,5.31,84.41,2.38,2798.07,0.00,72.31,C6305319,bus,0.00,34572881#1_0,14.84,1.77,281.98,4014.15,1695.04,113.91
3,C6305319.0,HBEFA3/Bus,11856.72,26.41,5.70,104.90,2.72,3750.83,0.00,73.88,C6305319,bus,0.00,34572881#1_0,17.61,2.77,281.98,4011.44,1695.61,114.09
4,C6305319.0,HBEFA3/Bus,13881.32,28.28,5.96,118.40,2.93,4391.67,0.00,74.13,C6305319,bus,0.00,34572881#1_0,21.34,3.73,281.98,4007.80,1696.38,114.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2436,C69789682.0,HBEFA3/Bus,22987.83,37.34,7.13,180.41,3.94,7273.91,0.00,76.67,C6978911,bus,0.00,53864729#4_0,405.77,6.72,357.79,2386.49,3593.90,147.52
2437,C69789682.0,HBEFA3/Bus,23550.87,37.50,7.20,183.46,3.98,7452.25,0.00,76.42,C6978911,bus,0.00,53864729#4_0,413.49,7.72,357.79,2386.19,3601.61,147.36
2438,C69789682.0,HBEFA3/Bus,21537.34,34.81,6.94,168.37,3.70,6815.20,0.00,75.56,C6978911,bus,0.00,53864729#4_0,421.96,8.47,357.79,2385.87,3610.06,147.19
2439,C69789682.0,HBEFA3/Bus,25566.93,39.03,7.46,196.24,4.16,8090.50,0.00,76.79,C6978911,bus,0.00,53864729#4_0,431.31,9.35,357.80,2385.51,3619.40,146.88


In [14]:
file_path = os.path.join(base_folder, "Battery.out.xml")

battery_output = et.parse(file_path)
battery_output_root = battery_output.getroot()

transform = transform_xml(battery_output_root)
battery_output_list = list(transform)

battery_output_pd = pd.DataFrame(battery_output_list)

battery_output_pd = battery_output_pd.drop(battery_output_pd.columns[0], axis=1)
battery_output_pd

Unnamed: 0,id,energyConsumed,totalEnergyConsumed,totalEnergyRegenerated,actualBatteryCapacity,maximumBatteryCapacity,chargingStationId,energyCharged,energyChargedInTransit,energyChargedStopped,speed,acceleration,x,y,lane,posOnLane,timeStopped
0,C6305319.0,0.0000,0.0000,0.0000,17500.0000,35000.0000,,0.0000,0.0000,0.0000,0.0000,0.0000,4016.8263,1694.4668,34572881#1_0,12.1000,0
1,C6305319.0,0.6337,0.6337,0.0000,17499.3663,35000.0000,,0.0000,0.0000,0.0000,0.9723,0.9723,4015.8770,1694.6683,34572881#1_0,13.0723,0
2,C6305319.0,1.2525,1.8862,0.0000,17498.1138,35000.0000,,0.0000,0.0000,0.0000,1.7703,0.7980,4014.1485,1695.0350,34572881#1_0,14.8426,0
3,C6305319.0,2.2335,4.1197,0.0000,17495.8803,35000.0000,,0.0000,0.0000,0.0000,2.7693,0.9990,4011.4448,1695.6088,34572881#1_0,17.6119,0
4,C6305319.0,3.0379,7.1576,0.0000,17492.8424,35000.0000,,0.0000,0.0000,0.0000,3.7302,0.9609,4007.8028,1696.3816,34572881#1_0,21.3421,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2436,C69789682.0,3.3596,890.1439,460.9966,17070.8527,35000.0000,,0.0000,0.0000,0.0000,6.7236,1.1436,2386.4906,3593.9043,53864729#4_0,405.7710,0
2437,C69789682.0,3.4096,893.5535,460.9966,17067.4431,35000.0000,,0.0000,0.0000,0.0000,7.7190,0.9954,2386.1932,3601.6116,53864729#4_0,413.4900,0
2438,C69789682.0,2.7999,896.3534,460.9966,17064.6432,35000.0000,,0.0000,0.0000,0.0000,8.4661,0.7471,2385.8670,3610.0648,53864729#4_0,421.9561,0
2439,C69789682.0,3.2976,899.6510,460.9966,17061.3456,35000.0000,,0.0000,0.0000,0.0000,9.3517,0.8856,2385.5085,3619.3991,53864729#4_0,431.3078,0


In [15]:
file_path = os.path.join(base_folder, "tripinfo.xml")

b_tripinfo_output = et.parse(file_path)
b_tripinfo_output_root = b_tripinfo_output.getroot()

transform = transform_xml_tripinfo(b_tripinfo_output_root)
b_tripinfo_output_list = list(transform)

b_tripinfo_output_pd = pd.DataFrame(b_tripinfo_output_list)

b_tripinfo_output_pd = b_tripinfo_output_pd.drop(b_tripinfo_output_pd.columns[0], axis=1)
#b_tripinfo_output_pd

In [16]:
file_path = os.path.join(base_folder, "gtfs_pt_vehicles.add.xml")
stops = et.parse(file_path)

transform = transform_xml_stops(stops.getroot())
stops_list = list(transform)

stops_pd = pd.DataFrame(stops_list)

In [17]:
file_path = os.path.join(base_folder, "gtfs_pt_vehicles.add.xml")
vehicles = et.parse(file_path)

transform = transform_xml(vehicles.getroot())
vehicles_list = list(transform)

vehicles_pd = pd.DataFrame(vehicles_list)
vehicles_pd = vehicles_pd.drop(vehicles_pd.columns[0], axis=1)
#vehicles_pd

## Grouping by id

In [18]:
grouped_df = battery_output_pd.groupby('id')

list_of_dfs = [group_data for _, group_data in grouped_df]
#C74509106 = list_of_dfs[0]
#C74509124 = list_of_dfs[1]
#C74509135 = list_of_dfs[2]

In [19]:
grouped_emission_df = emission_output_df.groupby('id')

list_of_emission_dfs = [group_data for _, group_data in grouped_emission_df]
#list_of_emission_dfs



In [20]:

results = []
for group_id, group_data in grouped_df:
    avg_speed = group_data['speed'].astype(float).mean()
    
    energy = float(group_data['totalEnergyConsumed'].iloc[-1])-float(group_data['totalEnergyRegenerated'].iloc[-1])
    
    time_loss = b_tripinfo_output_pd.loc[b_tripinfo_output_pd['id'] == group_id, 'timeLoss'].values[0]
    route_length = b_tripinfo_output_pd.loc[b_tripinfo_output_pd['id'] == group_id, 'routeLength'].values[0]
    
    route = vehicles_pd.loc[vehicles_pd['id'] == group_id, 'route'].values[0]
    count_stops = stops_pd[stops_pd['id'] == route]['stops'].apply(len).sum()
    
    z_up = calc_elevation_up(get_group_by_id(list_of_emission_dfs, group_id))
    z_down = calc_elevation_down(get_group_by_id(list_of_emission_dfs, group_id))
    
    # Store the results in a dictionary
    group_result = {
        'id': group_id,
        'avgSpeed': avg_speed,
        'battery': energy,
        'timeloss': time_loss,
        'routeLength': route_length,
        'numOfStops': count_stops,
        'up': z_up,
        'down': z_down
    }
    
    # Append the dictionary to the results list
    results.append(group_result)

# Convert the results list to a DataFrame
result_df = pd.DataFrame(results)
print(result_df)


             id  avgSpeed   battery timeloss routeLength  numOfStops     up  \
0    C6305319.0  3.033229  192.4163   430.12     1866.71           4  17.51   
1   C69789682.0  2.140330  442.8697  1024.37     2861.73           6  46.39   
2  C782572610.0  5.427416  335.3405   118.11     2724.58           4  88.58   

    down  
0 -19.52  
1 -15.76  
2 -85.17  


In [21]:
#avg_seed_b = C74509135['speed'].astype(float).mean()
#avg_seed_b

In [22]:
#float(C74509106['totalEnergyConsumed'].iloc[-1])-float(C74509106['totalEnergyRegenerated'].iloc[-1])
#24 2214.5173999999997  2219.5933
#06 2218.0583           2222.9716
#35 -615.606            -624.5073000000002

In [23]:
#plt.plot(C74509135['energyConsumed'].astype(float).cumsum())
#plt.xlabel('Time')
#plt.ylabel('Total Energy Regenerated')
#plt.title('Total Energy Regenerated Over Time')
#plt.xticks(rotation=45)
#plt.tight_layout()
#plt.show()

# CSV

In [24]:
tableBattery = pd.read_csv('batteryData.csv', delimiter=';')

tableBattery.shape

(0, 11)

## settings

In [25]:
locSetting = get_last_folder(base_folder)
seedSetting = 'dd'
trafficScaleSetting = 'wh'

#routeLengthSetting = :)
#numOfStopsSetting = :)
#tripIdSetting = :)
#avgSpeedSetting = :)
#timelossSetting = :)
#eleupSetting = 
#eledownSetting = 
#emissionSetting = :)

## Df to csv

In [26]:
for index, row in result_df.iterrows():
    row_data = {
        'loc': locSetting,
        'tripId': row['id'],
        'seed': seedSetting,
        'avgSpeed': row['avgSpeed'],
        'timeloss': row['timeloss'],
        'route_length': row['routeLength'],
        'elevation_up': row['up'],
        'elevation_down': row['down'],
        'trafficScale': trafficScaleSetting,
        'numOfStops': row['numOfStops'],
        'emission': row['battery']
    }
    temp_df = pd.DataFrame([row_data])
    #any empty or all-NA columns in tableBattery are excluded before concatenating the DataFrames
    tableBattery = tableBattery.dropna(axis=1, how='all')

    tableBattery = pd.concat([tableBattery, temp_df], ignore_index=True)


'id': group_id,
        'avgSpeed': avg_speed,
        'batteryCapacity': energy,
        'timeloss': time_loss,
        'routeLength': route_length,
        'stops': count_stops

In [27]:
tableBattery
tableBattery.to_csv('batteryData.csv', index=False, sep=';')