# Imports

In [82]:
import pandas as pd

In [83]:
import xml.etree.ElementTree as et

In [84]:
import os

In [85]:
import matplotlib.pyplot as plt

# Functions

In [86]:
def get_last_folder(path):
    # Normalize the path to handle different separators and remove trailing separator
    normalized_path = os.path.normpath(path)
    # Split the path into components
    folders = normalized_path.split(os.sep)
    # Get the last folder
    last_folder = folders[-1]
    return last_folder

In [87]:
def transform_xml(xml_doc):
    attr = xml_doc.attrib
    for xml in xml_doc.iter('vehicle'):
        dict = attr.copy()
        dict.update(xml.attrib)
        
        yield dict


In [88]:
def calc_elevation_up(group):
    z_diff = pd.to_numeric(group['z']).diff()

    # Filter out negative differences (upward movement)
    up = z_diff.apply(lambda x: x if x > 0 else 0)

    # Sum the positive differences to get the total upward movement
    total_up = up.sum()
    return total_up

In [89]:
def calc_elevation_down(group):
    z_diff = pd.to_numeric(group['z']).diff()

    # Filter out negative differences (upward movement)
    down = z_diff.apply(lambda x: x if x < 0 else 0)

    # Sum the positive differences to get the total upward movement
    total_down = down.sum()
    return total_down

In [90]:
def transform_xml_tripinfo(xml_doc):
    attr = xml_doc.attrib
    for xml in xml_doc.iter('tripinfo'):
        dict = attr.copy()
        dict.update(xml.attrib)
        
        yield dict

In [91]:
def transform_xml_stops(xml_doc):
    for route in xml_doc.iter('route'):
        route_dict = route.attrib.copy()
        stops = []
        
        for stop in route.findall('stop'):
            stop_dict = stop.attrib.copy()
            stops.append(stop_dict)
        
        route_dict['stops'] = stops
        
        yield route_dict

In [92]:
def get_group_by_id(list_of_dfs, desired_id):
    for df in list_of_dfs:
        if desired_id in df['id'].values:
            return df[df['id'] == desired_id]
    raise ValueError(f"ID '{desired_id}' not found in any dataframe.")

# XML to df

In [93]:
base_folder = "C:\\Users\\Admin\\Sumo\\nap_gellert_b"

In [94]:
file_path = os.path.join(base_folder, "emission.out.xml")
emission_output = et.parse(file_path)

transform = transform_xml(emission_output.getroot())
emission_output_list = list(transform)

emission_output_df = pd.DataFrame(emission_output_list)
emission_output_df = emission_output_df.drop(emission_output_df.columns[0], axis=1)
emission_output_df

Unnamed: 0,id,eclass,CO2,CO,HC,NOx,PMx,fuel,electricity,noise,route,type,waiting,lane,pos,speed,angle,x,y,z
0,C6305319.0,HBEFA3/Bus,5286.11,20.17,4.85,60.75,2.01,1671.11,0.00,67.11,C6305319,bus,0.00,34572881#1_0,12.10,0.00,281.98,4016.83,1694.47,113.74
1,C6305319.0,HBEFA3/Bus,6458.68,21.20,5.00,68.46,2.13,2042.28,0.00,71.09,C6305319,bus,0.00,34572881#1_0,12.76,0.66,281.98,4016.18,1694.60,113.78
2,C6305319.0,HBEFA3/Bus,7430.51,22.02,5.12,74.80,2.22,2349.92,0.00,71.07,C6305319,bus,0.00,34572881#1_0,14.04,1.28,281.98,4014.93,1694.87,113.86
3,C6305319.0,HBEFA3/Bus,10398.65,24.99,5.51,95.03,2.55,3289.34,0.00,73.39,C6305319,bus,0.00,34572881#1_0,16.27,2.23,281.98,4012.75,1695.33,114.00
4,C6305319.0,HBEFA3/Bus,12154.81,26.58,5.73,106.67,2.74,3845.22,0.00,73.50,C6305319,bus,0.00,34572881#1_0,19.40,3.13,281.98,4009.70,1695.98,114.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
558,C6305319.0,HBEFA3/Bus,13889.51,28.02,5.96,117.92,2.91,4394.36,0.00,73.50,C6305319,bus,0.00,-50219636#1_0,26.09,4.28,292.42,2434.74,1254.35,111.26
559,C6305319.0,HBEFA3/Bus,15979.87,30.02,6.22,132.00,3.14,5055.99,0.00,74.18,C6305319,bus,0.00,-50219636#1_0,31.21,5.12,292.42,2430.00,1256.30,111.37
560,C6305319.0,HBEFA3/Bus,21257.04,35.58,6.91,168.55,3.75,6726.12,0.00,76.20,C6305319,bus,0.00,-50219636#1_0,37.44,6.23,292.42,2424.25,1258.68,111.49
561,C6305319.0,HBEFA3/Bus,21730.24,35.66,6.97,171.00,3.77,6876.03,0.00,75.90,C6305319,bus,0.00,-50219636#1_0,44.62,7.18,292.42,2417.61,1261.42,111.64


In [95]:
file_path = os.path.join(base_folder, "Battery.out.xml")

battery_output = et.parse(file_path)
battery_output_root = battery_output.getroot()

transform = transform_xml(battery_output_root)
battery_output_list = list(transform)

battery_output_pd = pd.DataFrame(battery_output_list)

battery_output_pd = battery_output_pd.drop(battery_output_pd.columns[0], axis=1)
battery_output_pd

Unnamed: 0,id,energyConsumed,totalEnergyConsumed,totalEnergyRegenerated,actualBatteryCapacity,maximumBatteryCapacity,chargingStationId,energyCharged,energyChargedInTransit,energyChargedStopped,speed,acceleration,x,y,lane,posOnLane,timeStopped
0,C6305319.0,0.0000,0.0000,0.0000,17500.0000,35000.0000,,0.0000,0.0000,0.0000,0.0000,0.0000,4016.8263,1694.4668,34572881#1_0,12.1000,0
1,C6305319.0,0.3880,0.3880,0.0000,17499.6120,35000.0000,,0.0000,0.0000,0.0000,0.6631,0.6631,4016.1789,1694.6042,34572881#1_0,12.7631,0
2,C6305319.0,0.8102,1.1982,0.0000,17498.8018,35000.0000,,0.0000,0.0000,0.0000,1.2767,0.6137,4014.9323,1694.8687,34572881#1_0,14.0398,0
3,C6305319.0,1.7241,2.9223,0.0000,17497.0777,35000.0000,,0.0000,0.0000,0.0000,2.2308,0.9541,4012.7543,1695.3309,34572881#1_0,16.2706,0
4,C6305319.0,2.4437,5.3659,0.0000,17494.6341,35000.0000,,0.0000,0.0000,0.0000,3.1305,0.8997,4009.6979,1695.9795,34572881#1_0,19.4011,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
558,C6305319.0,2.3016,539.8838,360.6010,17320.7172,35000.0000,,0.0000,0.0000,0.0000,4.2848,0.7962,2434.7406,1254.3491,-50219636#1_0,26.0864,0
559,C6305319.0,2.8843,542.7682,360.6010,17317.8329,35000.0000,,0.0000,0.0000,0.0000,5.1248,0.8400,2430.0042,1256.3036,-50219636#1_0,31.2111,0
560,C6305319.0,4.2753,547.0435,360.6010,17313.5576,35000.0000,,0.0000,0.0000,0.0000,6.2299,1.1052,2424.2464,1258.6796,-50219636#1_0,37.4411,0
561,C6305319.0,4.4995,551.5430,360.6010,17309.0581,35000.0000,,0.0000,0.0000,0.0000,7.1827,0.9527,2417.6081,1261.4190,-50219636#1_0,44.6237,0


In [96]:
file_path = os.path.join(base_folder, "tripinfo.xml")

b_tripinfo_output = et.parse(file_path)
b_tripinfo_output_root = b_tripinfo_output.getroot()

transform = transform_xml_tripinfo(b_tripinfo_output_root)
b_tripinfo_output_list = list(transform)

b_tripinfo_output_pd = pd.DataFrame(b_tripinfo_output_list)

b_tripinfo_output_pd = b_tripinfo_output_pd.drop(b_tripinfo_output_pd.columns[0], axis=1)
#b_tripinfo_output_pd

In [97]:
file_path = os.path.join(base_folder, "gtfs_pt_vehicles.add.xml")
stops = et.parse(file_path)

transform = transform_xml_stops(stops.getroot())
stops_list = list(transform)

stops_pd = pd.DataFrame(stops_list)

In [98]:
file_path = os.path.join(base_folder, "gtfs_pt_vehicles.add.xml")
vehicles = et.parse(file_path)

transform = transform_xml(vehicles.getroot())
vehicles_list = list(transform)

vehicles_pd = pd.DataFrame(vehicles_list)
vehicles_pd = vehicles_pd.drop(vehicles_pd.columns[0], axis=1)
#vehicles_pd

## Grouping by id

In [99]:
grouped_df = battery_output_pd.groupby('id')

list_of_dfs = [group_data for _, group_data in grouped_df]
#C74509106 = list_of_dfs[0]
#C74509124 = list_of_dfs[1]
#C74509135 = list_of_dfs[2]

In [100]:
grouped_emission_df = emission_output_df.groupby('id')

list_of_emission_dfs = [group_data for _, group_data in grouped_emission_df]
#list_of_emission_dfs



In [101]:

results = []
for group_id, group_data in grouped_df:
    avg_speed = group_data['speed'].astype(float).mean()
    
    energy = float(group_data['totalEnergyConsumed'].iloc[-1])-float(group_data['totalEnergyRegenerated'].iloc[-1])
    
    time_loss = b_tripinfo_output_pd.loc[b_tripinfo_output_pd['id'] == group_id, 'timeLoss'].values[0]
    route_length = b_tripinfo_output_pd.loc[b_tripinfo_output_pd['id'] == group_id, 'routeLength'].values[0]
    
    route = vehicles_pd.loc[vehicles_pd['id'] == group_id, 'route'].values[0]
    count_stops = stops_pd[stops_pd['id'] == route]['stops'].apply(len).sum()
    
    z_up = calc_elevation_up(get_group_by_id(list_of_emission_dfs, group_id))
    z_down = calc_elevation_down(get_group_by_id(list_of_emission_dfs, group_id))
    
    # Store the results in a dictionary
    group_result = {
        'id': group_id,
        'avgSpeed': avg_speed,
        'battery': energy,
        'timeloss': time_loss,
        'routeLength': route_length,
        'numOfStops': count_stops,
        'up': z_up,
        'down': z_down
    }
    
    # Append the dictionary to the results list
    results.append(group_result)

# Convert the results list to a DataFrame
result_df = pd.DataFrame(results)
print(result_df)


           id  avgSpeed   battery timeloss routeLength  numOfStops     up  \
0  C6305319.0  3.308469  195.8197   379.78     1866.71           4  17.54   

    down  
0 -19.48  


In [102]:
#avg_seed_b = C74509135['speed'].astype(float).mean()
#avg_seed_b

In [103]:
#float(C74509106['totalEnergyConsumed'].iloc[-1])-float(C74509106['totalEnergyRegenerated'].iloc[-1])
#24 2214.5173999999997  2219.5933
#06 2218.0583           2222.9716
#35 -615.606            -624.5073000000002

In [104]:
#plt.plot(C74509135['energyConsumed'].astype(float).cumsum())
#plt.xlabel('Time')
#plt.ylabel('Total Energy Regenerated')
#plt.title('Total Energy Regenerated Over Time')
#plt.xticks(rotation=45)
#plt.tight_layout()
#plt.show()

# CSV

In [105]:
tableBattery = pd.read_csv('batteryData.csv', delimiter=';')

tableBattery.shape

(44, 11)

## settings

In [106]:
locSetting = get_last_folder(base_folder)
seedSetting = 'dd'
trafficScaleSetting = 'wh'

#routeLengthSetting = :)
#numOfStopsSetting = :)
#tripIdSetting = :)
#avgSpeedSetting = :)
#timelossSetting = :)
#eleupSetting = 
#eledownSetting = 
#emissionSetting = :)

## Df to csv

In [107]:
for index, row in result_df.iterrows():
    row_data = {
        'loc': locSetting,
        'tripId': row['id'],
        'seed': seedSetting,
        'avgSpeed': row['avgSpeed'],
        'timeloss': row['timeloss'],
        'route_length': row['routeLength'],
        'elevation_up': row['up'],
        'elevation_down': row['down'],
        'trafficScale': trafficScaleSetting,
        'numOfStops': row['numOfStops'],
        'emission': row['battery']
    }
    temp_df = pd.DataFrame([row_data])
    #any empty or all-NA columns in tableBattery are excluded before concatenating the DataFrames
    tableBattery = tableBattery.dropna(axis=1, how='all')

    tableBattery = pd.concat([tableBattery, temp_df], ignore_index=True)


'id': group_id,
        'avgSpeed': avg_speed,
        'batteryCapacity': energy,
        'timeloss': time_loss,
        'routeLength': route_length,
        'stops': count_stops

In [108]:
tableBattery
tableBattery.to_csv('batteryData.csv', index=False, sep=';')

Unnamed: 0,loc,seed,trafficScale,tripId,avgSpeed,timeloss,route_length,elevation_up,elevation_down,numOfStops,emission
0,normafa_b,1,0.3,C74509106.0,4.541014,454.73,6841.14,0.0,0.0,19.0,2222.9716
1,normafa_b,1,0.3,C74509124.0,4.541174,564.58,6841.14,0.0,0.0,19.0,2219.5933
2,normafa_b,1,0.3,C74509135.0,5.296114,433.93,6896.22,0.0,0.0,18.0,-624.5073
3,normafa_b,1,0.3,C746662.0,5.378957,402.96,7041.3,0.0,0.0,16.0,-872.989
4,normafa_b,1,0.3,C76142196.0,3.737428,133.48,837.41,0.0,0.0,3.0,15.7205
5,normafa_b,1,0.3,C76796102.0,3.682312,182.35,2664.71,0.0,0.0,9.0,451.0965
6,normafa_b,1,0.3,C76796110.0,4.903904,190.17,5331.22,0.0,0.0,15.0,1918.236
7,normafa_b,1,0.3,C76796171.0,4.796271,242.65,2604.73,0.0,0.0,9.0,147.4624
8,normafa_b,1,0.3,C76796174.0,3.679002,140.71,2664.71,0.0,0.0,9.0,452.5743
9,normafa_b,1,0.3,C76796178.0,4.907201,271.69,5331.22,0.0,0.0,15.0,1900.9944
