In [None]:
import numpy as np
import matplotlib.pyplot as plt
import xml.etree.ElementTree as et
import pandas as pd
import gzip

In [None]:
def find_rec(node, element):
    if node.get('name') == element:
        yield node
    else:
        for node_ in list(node):
            yield from find_rec(node_, element)

# def find_rec(node, element):
#     for subnode in list(node):
#         if subnode.get == element:
#             yield subnode
#         for child in find_rec(item, element):
#             yield child

def readEventsXml(xml_file, df_cols): 
    """Parse the input XML file and store the result in a pandas 
    DataFrame with the given columns. 
    
    The first element of df_cols is supposed to be the identifier 
    variable, which is an attribute of each node element in the 
    XML data; other features will be parsed from the text content 
    of each sub-element. 
    """
    if xml_file[-2:] == 'gz':
        file_uncompressed = gzip.open(xml_file, 'r')
        xtree = et.parse(file_uncompressed)
    else:
        xtree = et.parse(xml_file)
        
    xroot = xtree.getroot()
    rows = []
    
    for node in xroot: 
        res = []
        res.append(node.attrib.get(df_cols[0]))
        for el in df_cols[1:]: 
            if node is not None and node.get(el) is not None:
                res.append(node.get(el))
            else: 
                res.append(None)
        rows.append({df_cols[i]: res[i] 
                     for i in range(len(df_cols))})
    
    out_df = pd.DataFrame(rows, columns=df_cols)
        
    return out_df

def readPlansXml(xml_file, df_cols): 
    """Parse the input XML file and store the result in a pandas 
    DataFrame with the given columns. 
    
    The first element of df_cols is supposed to be the identifier 
    variable, which is an attribute of each node element in the 
    XML data; other features will be parsed from the text content 
    of each sub-element. 
    """
    if xml_file[-2:] == 'gz':
        file_uncompressed = gzip.open(xml_file, 'r')
        xtree = et.parse(file_uncompressed)
    else:
        xtree = et.parse(xml_file)
        
    xroot = xtree.getroot()
    rows = []
    
    for node in xroot: 
        res = []
        if (node.tag == 'person'):
            for node_ in list(node):
                if node_.tag == 'plan':
                    mode_nodes = find_rec(node, 'routingMode')
                    for mn in mode_nodes:
                        print(mn.items())
                        res.append(node.attrib.get(df_cols[0]))
                        for el in df_cols[1:]: 
                            if node is not None and node.get(el) is not None:
                                res.append(node.get(el))
                            else: 
                                res.append(None)
                        rows.append({df_cols[i]: res[i] 
                                     for i in range(len(df_cols))})
    
    out_df = pd.DataFrame(rows, columns=df_cols)
        
    return out_df

In [None]:
# xml_file = "../scenarios/grid_model_bimodal/output/output_plans.xml.gz"
# plans = readPlansXml, [])

In [None]:
xml_file = "../scenarios/grid_model_bimodal/output/output_events.xml.gz"
events = readEventsXml(xml_file, ["time", "type", "person", "link", "actType", "legMode"])
# display(events)

In [None]:
events_passengers = events[events['person'].str.contains('pt_\d') & events['person'].notna()]
events_drt = events[events['person'].str.contains('drt', regex=False) & events['person'].notna()]
events_pt = events[events['person'].str.contains('pt_tr', regex=False) & events['person'].notna()]
passengers = pd.Series(events_passengers['person'].unique())

pd.set_option('chained_assignment', None)
# with pd.set_option('chained_assignment', None):
events_passengers['is_walk'] = events_passengers['legMode'] == 'walk'
events_passengers['is_pt'] = events_passengers['legMode'] == 'pt'
events_passengers['is_drt'] = events_passengers['legMode'] == 'drt'

used_modes = events_passengers.groupby('person').any()[['is_walk', 'is_pt', 'is_drt']]
pd.set_option('chained_assignment', 'warn')

In [None]:
person_uniqueLegModes = events_passengers.groupby('person').nunique().sort_values('legMode', ascending=False)['legMode']

In [None]:
display(person_uniqueLegModes)
print('#People: ', len(events_passengers.groupby('person')))
print('#People who use walking', used_modes['is_walk'].sum())
print('#People who use pt', used_modes['is_pt'].sum())
print('#People who use drt', used_modes['is_drt'].sum())

In [None]:
# with pd.option_context('display.max_rows', None):
#     display(person_uniqueLegModes)