## Load in packages and data

In [1]:
import numpy as np
import pandas as pd
# import geopandas as gpd
import matplotlib.pyplot as plt
import matsim_output_analysis as moa
import operator

In [2]:
df = pd.read_csv(r"2000_sample_routes.csv") #using look3, ignoring some fields such as carCO2
# df_spare = df #for resetting if needed

In [3]:
# df = df_spare 
df = df[~df["subpopulation"].isin(["hgv","lgv","airport", "lgv_ev"])] #choose only the actual people
df["negative_utility_somewhere"] = df.apply(lambda row: moa.neg_utility_somewhere(row), axis=1)
x = (sum(df["negative_utility_somewhere"])/len(df))*100
print(str(round(x,2))+" pc of people have a negative utility")
# df = df[df["negative_utility_somewhere"]==0] # keep only positive utility people
df=df[df["region"]=="East of England"] #temporarily remove Londoners while I sort the road pricing out
df = df.drop("negative_utility_somewhere", axis=1)

#TODO: decide how to handle negatives properly
#TODO: add London back in once road pricing has been sorted

6.33 pc of people have a negative utility


In [4]:
#turn plan summaries into lists instead of long strings
df['selected plan activity_type_or_mode'] = df['selected plan activity_type_or_mode'].str.split('; ')
df['selected plan distance_travelled'] = df['selected plan distance_travelled'].str.split('; ')
df['selected plan duration'] = df['selected plan duration'].str.split('; ')
df['selected plan location'] = df['selected plan location'].str.split('; ')
df['selected plan routes'] = df['selected plan routes'].str.split('; ')

df['unselected plan (1) activity_type_or_mode'] = df['unselected plan (1) activity_type_or_mode'].str.split('; ')
df['unselected plan (1) distance_travelled'] = df['unselected plan (1) distance_travelled'].str.split('; ')
df['unselected plan (1) duration'] = df['unselected plan (1) duration'].str.split('; ')
df['unselected plan (1) location'] = df['unselected plan (1) location'].str.split('; ')
df['unselected plan (1) routes'] = df['unselected plan (1) routes'].str.split('; ')

df['unselected plan (2) activity_type_or_mode'] = df['unselected plan (2) activity_type_or_mode'].str.split('; ')
df['unselected plan (2) distance_travelled'] = df['unselected plan (2) distance_travelled'].str.split('; ')
df['unselected plan (2) duration'] = df['unselected plan (2) duration'].str.split('; ')
df['unselected plan (2) location'] = df['unselected plan (2) location'].str.split('; ')
df['unselected plan (2) routes'] = df['unselected plan (2) routes'].str.split('; ')

df['unselected plan (3) activity_type_or_mode'] = df['unselected plan (3) activity_type_or_mode'].str.split('; ')
df['unselected plan (3) distance_travelled'] = df['unselected plan (3) distance_travelled'].str.split('; ')
df['unselected plan (3) duration'] = df['unselected plan (3) duration'].str.split('; ')
df['unselected plan (3) location'] = df['unselected plan (3) location'].str.split('; ')
df['unselected plan (3) routes'] = df['unselected plan (3) routes'].str.split('; ')

df['unselected plan (4) activity_type_or_mode'] = df['unselected plan (4) activity_type_or_mode'].str.split('; ')
df['unselected plan (4) distance_travelled'] = df['unselected plan (4) distance_travelled'].str.split('; ')
df['unselected plan (4) duration'] = df['unselected plan (4) duration'].str.split('; ')
df['unselected plan (4) location'] = df['unselected plan (4) location'].str.split('; ')
df['unselected plan (4) routes'] = df['unselected plan (4) routes'].str.split('; ')

## Remove "pt interaction" terms and all corresponding columns

In [7]:
#extract the indices of where pt interaction happens
df['selected plan ptinteraction_indices'] = df['selected plan activity_type_or_mode'].apply(moa.get_ptinteraction_index)
df['unselected plan (1) ptinteraction_indices'] = df['unselected plan (1) activity_type_or_mode'].apply(moa.get_ptinteraction_index)
df['unselected plan (2) ptinteraction_indices'] = df['unselected plan (2) activity_type_or_mode'].apply(moa.get_ptinteraction_index)
df['unselected plan (3) ptinteraction_indices'] = df['unselected plan (3) activity_type_or_mode'].apply(moa.get_ptinteraction_index)
df['unselected plan (4) ptinteraction_indices'] = df['unselected plan (4) activity_type_or_mode'].apply(moa.get_ptinteraction_index)

In [8]:
#remove the pt interaction activities
df['selected plan activity_type_or_mode'] = df.apply(lambda row: moa.remove_ptinteraction(row['selected plan ptinteraction_indices'], row['selected plan activity_type_or_mode']), axis=1)
df['unselected plan (1) activity_type_or_mode'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (1) ptinteraction_indices'], row['unselected plan (1) activity_type_or_mode']), axis=1)
df['unselected plan (2) activity_type_or_mode'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (2) ptinteraction_indices'], row['unselected plan (2) activity_type_or_mode']), axis=1)
df['unselected plan (3) activity_type_or_mode'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (3) ptinteraction_indices'], row['unselected plan (3) activity_type_or_mode']), axis=1)
df['unselected plan (4) activity_type_or_mode'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (4) ptinteraction_indices'], row['unselected plan (4) activity_type_or_mode']), axis=1)

In [9]:
#remove the correspinding pt interaction durations
df['selected plan duration'] = df.apply(lambda row: moa.remove_ptinteraction(row['selected plan ptinteraction_indices'], row['selected plan duration']), axis=1)
df['unselected plan (1) duration'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (1) ptinteraction_indices'], row['unselected plan (1) duration']), axis=1)
df['unselected plan (2) duration'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (2) ptinteraction_indices'], row['unselected plan (2) duration']), axis=1)
df['unselected plan (3) duration'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (3) ptinteraction_indices'], row['unselected plan (3) duration']), axis=1)
df['unselected plan (4) duration'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (4) ptinteraction_indices'], row['unselected plan (4) duration']), axis=1)

In [10]:
#remove the correspinding pt interaction locations
df['selected plan location'] = df.apply(lambda row: moa.remove_ptinteraction(row['selected plan ptinteraction_indices'], row['selected plan location']), axis=1)
df['unselected plan (1) location'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (1) ptinteraction_indices'], row['unselected plan (1) location']), axis=1)
df['unselected plan (2) location'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (2) ptinteraction_indices'], row['unselected plan (2) location']), axis=1)
df['unselected plan (3) location'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (3) ptinteraction_indices'], row['unselected plan (3) location']), axis=1)
df['unselected plan (4) location'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (4) ptinteraction_indices'], row['unselected plan (4) location']), axis=1)

In [11]:
#remove the correspinding pt interaction distance travelled
df['selected plan distance_travelled'] = df.apply(lambda row: moa.remove_ptinteraction(row['selected plan ptinteraction_indices'], row['selected plan distance_travelled']), axis=1)
df['unselected plan (1) distance_travelled'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (1) ptinteraction_indices'], row['unselected plan (1) distance_travelled']), axis=1)
df['unselected plan (2) distance_travelled'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (2) ptinteraction_indices'], row['unselected plan (2) distance_travelled']), axis=1)
df['unselected plan (3) distance_travelled'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (3) ptinteraction_indices'], row['unselected plan (3) distance_travelled']), axis=1)
df['unselected plan (4) distance_travelled'] = df.apply(lambda row: moa.remove_ptinteraction(row['unselected plan (4) ptinteraction_indices'], row['unselected plan (4) distance_travelled']), axis=1)

In [12]:
#clean  up table and remove indices
df = df.drop(["selected plan ptinteraction_indices","unselected plan (1) ptinteraction_indices","unselected plan (2) ptinteraction_indices","unselected plan (3) ptinteraction_indices","unselected plan (4) ptinteraction_indices"], axis=1)

## Check basic stuff of plans e.g., wraparound, end of day, longest legs

In [13]:
#check if plans wraparound (just check for selected plan)
df["wraparound"] = df["selected plan activity_type_or_mode"].apply(lambda x: 1 if x[0]==x[-1] else 0)
x = sum(df["wraparound"])/len(df) * 100
print(str(round(x, 2))+" pc of people have a wraparound plan")
# df = df.drop("wraparound", axis=1)

#TODO: should we only keep wraparound plans??

91.03 pc of people have a wraparound plan


In [14]:
#check what time the last activity ends (just check for selected plan). should be less than 32 hours
df["eod_time"] = df["selected plan duration"].apply(lambda x: x[-1])
print(str(max(df["eod_time"]))+" is the latest end of final activity")
print(str(min(df["eod_time"]))+" is the earliest end of final activity")
df = df.drop("eod_time", axis=1)

25:29:22 is the latest end of final activity
22:47:47 is the earliest end of final activity


In [15]:
#check how long the longest leg of a trip is
df["legs_indices"] = df["selected plan activity_type_or_mode"].apply(moa.get_trips_indices_only)
df["activities_indices"] = df["selected plan activity_type_or_mode"].apply(moa.get_activities_indices_only)
df["selected plan leg_durations"] = df.apply(lambda row: moa.get_trips_duration_or_mode(row["legs_indices"], row["selected plan duration"]), axis=1)
df["selected plan activity_durations"] = df.apply(lambda row: moa.get_trips_duration_or_mode(row["activities_indices"], row["selected plan duration"]), axis=1)

print(str(max(df["selected plan leg_durations"]))+" is the longest leg recorded")

['77:21:23'] is the longest leg recorded


In [None]:
#check how utility changes over time - mean
df2 = df[["surveyyear", "selected plan utility"]]
df2 = df2.groupby('surveyyear', as_index=False).mean()
plt.plot(df2["surveyyear"], df2["selected plan utility"])
plt.title("mean utility per year")

In [None]:
#check how utility changes over time - median
df2 = df[["surveyyear", "selected plan utility"]]
df2 = df2.groupby('surveyyear', as_index=False).median()
plt.plot(df2["surveyyear"], df2["selected plan utility"])
plt.title("median utility per year (including negative)")

## Calculating utility components

Travel utility

In [16]:
# 1. Get the trips out 
df["selected plan trips"] = df.apply(lambda row: moa.group_legs_into_trips(row["selected plan activity_type_or_mode"], row["activities_indices"]), axis=1)
df["unselected plan (1) trips"] = df.apply(lambda row: moa.group_legs_into_trips(row["unselected plan (1) activity_type_or_mode"], row["activities_indices"]), axis=1)
df["unselected plan (2) trips"] = df.apply(lambda row: moa.group_legs_into_trips(row["unselected plan (2) activity_type_or_mode"], row["activities_indices"]), axis=1)
df["unselected plan (3) trips"] = df.apply(lambda row: moa.group_legs_into_trips(row["unselected plan (3) activity_type_or_mode"], row["activities_indices"]), axis=1)
df["unselected plan (4) trips"] = df.apply(lambda row: moa.group_legs_into_trips(row["unselected plan (4) activity_type_or_mode"], row["activities_indices"]), axis=1)

In [17]:
# 1. Keep corresponding trip durations and distances
df["selected plan trips_duration"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["selected plan duration"], row["activities_indices"], "duration"), axis=1)
df["unselected plan (1) trips_duration"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["unselected plan (1) duration"], row["activities_indices"], "duration"), axis=1)
df["unselected plan (2) trips_duration"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["unselected plan (2) duration"], row["activities_indices"], "duration"), axis=1)
df["unselected plan (3) trips_duration"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["unselected plan (3) duration"], row["activities_indices"], "duration"), axis=1)
df["unselected plan (4) trips_duration"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["unselected plan (4) duration"], row["activities_indices"], "duration"), axis=1)

df["selected plan trips_distance"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["selected plan distance_travelled"], row["activities_indices"], "distance"), axis=1)
df["unselected plan (1) trips_distance"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["unselected plan (1) distance_travelled"], row["activities_indices"], "distance"), axis=1)
df["unselected plan (2) trips_distance"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["unselected plan (2) distance_travelled"], row["activities_indices"], "distance"), axis=1)
df["unselected plan (3) trips_distance"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["unselected plan (3) distance_travelled"], row["activities_indices"], "distance"), axis=1)
df["unselected plan (4) trips_distance"] = df.apply(lambda row: moa.group_legs_into_trips_d(row["unselected plan (4) distance_travelled"], row["activities_indices"], "distance"), axis=1)

In [None]:
def count_transfers(modes_in_leg):
    publicTrans = ["subway","bus","rail", "tram","ferry", "pt"]
    transfers = 0
    for i in range(len(modes_in_leg)-1):
        if modes_in_leg[i] in publicTrans and modes_in_leg[i + 1] in publicTrans:
            transfers += 1
    return transfers
# TODO: fix this to overlook short walks!
#calculate the utility lost across trips in a plan. returns a list of the utility from each trip


In [51]:
#do the tolls
import xmltodict
import pprint

with open('data/2040_do_minimum_40wfh_rerun_10pc_20250306/output_toll.xml', 'r', encoding='utf-8') as file:
    my_xml = file.read()
output_tolls = xmltodict.parse(my_xml)
tolls = output_tolls["roadpricing"]["links"]["link"]
tolls2  = {item['@id']: item['cost']['@amount'] for item in tolls}

In [62]:
df["calculated_travel_utility"] = df.apply(lambda row: moa.calculate_travel_utility(row["selected plan trips"], row["selected plan trips_duration"], row["selected plan trips_distance"], row["subpopulation"], row["selected plan routes"], tolls2), axis=1)

TypeError: calculate_travel_utility() takes 4 positional arguments but 6 were given

Activity utility

In [None]:
# just for initial sense checking, see if the calcs work for "well behaved" agents, ie those who DO wraparound and have positive utility
df_wellbehaved = df[df["selected plan utility"]>0]
df_wellbehaved = df_wellbehaved[df_wellbehaved["wraparound"]==1]

In [None]:
df_wellbehaved["activity_durations"] = df_wellbehaved.apply(lambda row: moa.get_activity_durations(row["activities_indices"], row["selected plan duration"], row["selected plan activity_type_or_mode"]), axis=1)

In [None]:
df_wellbehaved["selected plan activities"] = df_wellbehaved.apply(lambda row: moa.get_activities(row["selected plan activity_type_or_mode"], row["activities_indices"]), axis=1)

In [None]:
df_wellbehaved["calculated_activity_utility"] = df_wellbehaved.apply(lambda row: moa.calculate_activity_utility(row["selected plan activities"], row["activity_durations"]), axis=1)

In [None]:
df_wellbehaved["total_travel_u"] = df_wellbehaved.apply(lambda row: sum(row["calculated_travel_utility"]), axis=1)
df_wellbehaved.iloc[119]["total_travel_u"]

In [None]:
df_wellbehaved["total_activity_u"] = df_wellbehaved.apply(lambda row: sum(row["calculated_activity_utility"]), axis=1)
df_wellbehaved.iloc[119]["total_activity_u"]

In [None]:
df_wellbehaved["calculated_total_u"] = df_wellbehaved["total_travel_u"]+df_wellbehaved["total_activity_u"]
df_wellbehaved.iloc[119]["calculated_total_u"]

In [None]:
df_wellbehaved["travel_tot"] = df_wellbehaved["calculated_travel_utility"].apply(lambda x: sum(x))
df_wellbehaved["activity_tot"] = df_wellbehaved["calculated_activity_utility"].apply(lambda x: sum(x))

In [None]:
print(max(df_wellbehaved["travel_tot"]))
print(min(df_wellbehaved["travel_tot"]))
print(max(df_wellbehaved["activity_tot"]))
print(min(df_wellbehaved["activity_tot"]))

In [None]:
plt.hist(df_wellbehaved["calculated_total_u"])

In [None]:
plt.plot(df_wellbehaved["selected plan utility"], df_wellbehaved["calculated_total_u"], "x")

In [None]:
us = (list(df_wellbehaved["calculated_total_u"]))

In [None]:
for i in range(len(us)):
    if  us[i] < -400:
        print(i)

In [None]:
df_wellbehaved.iloc[31]["calculated_total_u"]

In [None]:
df_wellbehaved.iloc[210]["calculated_travel_utility"]

In [None]:
df_wellbehaved.iloc[626]["selected plan utility"]

In [None]:
df_wellbehaved.iloc[626]["selected plan duration"]

In [None]:
df_wellbehaved.iloc[626]["selected plan distance_travelled"]

In [None]:
df_wellbehaved.iloc[626]["selected plan activity_type_or_mode"]

In [None]:
df_wellbehaved.iloc[626]["activity_durations"]

In [None]:
df_wellbehaved.iloc[210]["calculated_activity_utility"]

In [None]:
df_wellbehaved.iloc[31]["selected plan trips"]

In [None]:
list(df_wellbehaved.columns.values)

In [None]:
a = df_wellbehaved.iloc[31]["selected plan trips"]
b = df_wellbehaved.iloc[31]["selected plan trips_distance"]
c = df_wellbehaved.iloc[31]["selected plan trips_duration"]
d = "high"

In [None]:
travtest = moa.calculate_travel_utility(a,b,c,d)
print(travtest)

## Whole plan modal flexibility

This requires the function to choose only the longest leg per trip and use that as a proxy for simplicity. Then compare the plan's set of main modes across plans and compare this to utility change across plans. Completed in old code setup. Skipped for now here. 

## Trip-based modal flexibility

Here we can unpick the utility function and compare utility for a person for a given trip. We can look at either a given trip number (e.g., first trip of the day), the longest (distance) trip, a given trip purpose (e.g., travel to work), or get a composite across the whole day (complex!!). We can also look at just the longest leg of the trip for simplicity or look at the total set of legs together. To do this well, we need to understand the config files properly though - **AGH**.

### First trip of the day, combining all legs

1. Separate out the trips and choose the first (for each plan)
2. Find the change in mode by comparing the whole combination of modes -> check, this might be problematic and we may need to revert to looking at main mode (by distance) for this step
3. Calculate the utility lost for each leg and sum to find utility of trip. For each plan
4. Find the change in utility across plans
5. Find the flexibility by comparing change in mode to change in plans