In [8]:
import numpy as np
import matplotlib.pyplot as plt
import xml.etree.ElementTree as et
import pandas as pd
import gzip

In [9]:
def parse_XML(xml_file, df_cols): 
    """Parse the input XML file and store the result in a pandas 
    DataFrame with the given columns. 
    
    The first element of df_cols is supposed to be the identifier 
    variable, which is an attribute of each node element in the 
    XML data; other features will be parsed from the text content 
    of each sub-element. 
    """
    if xml_file[-2:] == 'gz':
        file_uncompressed = gzip.open(xml_file, 'r')
        xtree = et.parse(file_uncompressed)
    else:
        xtree = et.parse(xml_file)
        
    xroot = xtree.getroot()
    rows = []
    
    for node in xroot: 
        res = []
        res.append(node.attrib.get(df_cols[0]))
        for el in df_cols[1:]: 
            if node is not None and node.get(el) is not None:
                res.append(node.get(el))
            else: 
                res.append(None)
        rows.append({df_cols[i]: res[i] 
                     for i in range(len(df_cols))})
    
    out_df = pd.DataFrame(rows, columns=df_cols)
        
    return out_df

In [10]:
xml_file = "../scenarios/grid_model_bimodal/output/output_events.xml.gz"
events = parse_XML(xml_file, ["time", "type", "person", "link", "actType", "legMode"])
# display(events)

In [11]:
events_drt = events[events['person'].str.contains('drt', regex=False) & events['person'].notna()]
events_passengers = events[events['person'].str.contains('pt', regex=False) & events['person'].notna()]
# used_modes = pd.DataFrame()
# events_passengers['is_walk'] = events_passengers['legMode'] == 'walk'
# events_passengers['is_pt'] = events_passengers['legMode'] == 'pt'
# events_passengers['is_drt'] = events_passengers['legMode'] == 'drt'

# events_passengers.groupby('person').any()['is_walk'])#[['is_walk','is_pt','is_drt']])
# with pd.option_context('display.max_rows', None):
#     display(events_drt)
# display(used_modes)

In [14]:
person_uniqueLegModes = events_passengers.groupby('person').nunique().sort_values('legMode', ascending=False)['legMode']
person_w_walk = len(events_passengers[events_passengers['legMode'] == 'walk'].groupby('person'))
display(person_w_walk)

100

In [15]:
display(person_uniqueLegModes)
display(person_wo_walk)

person
pt_41_18_44    2
pt_27_43_77    2
pt_58_91_65    2
pt_56_97_98    2
pt_54_1_23     2
              ..
pt_55_0_37     1
pt_52_85_25    1
pt_4_93_97     1
pt_45_44_14    1
pt_tr_9_1      1
Name: legMode, Length: 140, dtype: int64

100