In [1]:
#Import relevant packages
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

#Read the data
df_pres = pd.read_csv('data/transport_performance.csv', sep=';')
df_mob= pd.read_csv('data/mobiliteit.csv', sep=';')

In [2]:
#Initialise prestation data
df_pres['Periods'] = pd.to_numeric(df_pres['Periods'], errors='coerce')
df_pres['Transport performance (billion passenger kilometres )'] = pd.to_numeric(df_pres['Transport performance (billion passenger kilometres )'], errors='coerce')

#Rename columns
df_pres.rename({'Transport performance (billion passenger kilometres )': 'Total travel'}, axis=1, inplace=True)

#Drop unnesecary columns
df_pres.drop('Margins', axis=1, inplace=True)
df_pres.drop('Population', axis=1, inplace=True)

### Graph: How did the transport performance of different modes of transport change during and after the corona period (2020-2022) compared to 2019?



In [3]:
#Select only The Netherlands region
df=df_pres.loc[df_pres['Region characteristics']=='The Netherlands'].copy()

#Create lineplot that shows the total travel per mode
fig = px.line(df, x='Periods', y='Total travel', color='Modes of travel')
fig.show()

### Graph: Which transport modes show the largest recovery compared to pre-COVID levels?

In [4]:
#Select only the netherlands as region
df = df_pres.loc[df_pres['Region characteristics'] == 'The Netherlands'].copy()

# Calculate the base value for each mode (first period)
base = df.groupby('Modes of travel')['Total travel'].transform('first')

# Create an index (relative to the first period)
df['Travel (km) index'] = df['Total travel'] / base * 100

fig = px.line(df, x='Periods', y='Travel (km) index', color='Modes of travel', 
              title='Relative change in travel kilometers split by travel mode', labels={'Periods' : 'Ýear'})
fig.add_hline(y=100, line_dash='dot')
fig.show()

### Graph Are there significant differences in the degree of recovery between urban and rural regions in the Netherlands?

In [7]:
ref_year = 2018
levels = ["Extremely urbanised","Strongly urbanised","Moderately urbanised","Hardly urbanised","Not urbanised"]

df = (
    df_pres.loc[(df_pres["Modes of travel"] == "Total") & df_pres["Region characteristics"].isin(levels)]
           .assign(Periods=lambda d: pd.to_numeric(d["Periods"], errors="coerce"))
)

base_per_level = df.loc[df["Periods"] == ref_year].set_index("Region characteristics")["Total travel"]
df["Index"] = df["Total travel"] / df["Region characteristics"].map(base_per_level) * 100

fig1 = px.line(df, x="Periods", y="Index", color="Region characteristics", markers=True,
               title=f"Recovery index ({ref_year} = 100) by degree of urbanisation - Netherlands",
               labels={"Periods": "Year", "Index": "Index"})
fig1.add_hline(y=100, line_dash="dot")
fig1.show()

In [8]:
group_map = {
    "Extremely urbanised": "Urban",
    "Strongly urbanised": "Urban",
    "Moderately urbanised": "Urban",
    "Hardly urbanised": "Rural",
    "Not urbanised": "Rural",
}

uvr = (
    df.assign(Periods=lambda d: pd.to_numeric(d["Periods"], errors="coerce"),
              Group=lambda d: d["Region characteristics"].map(group_map))
      .loc[:, ["Periods", "Group", "Total travel"]]
      .groupby(["Periods", "Group"], as_index=False)["Total travel"].sum()
)

base_grp = uvr.loc[uvr["Periods"] == ref_year].set_index("Group")["Total travel"]
uvr["Index"] = uvr["Total travel"] / uvr["Group"].map(base_grp) * 100

fig2 = px.line(uvr, x="Periods", y="Index", color="Group", markers=True,
               title=f"Recovery index ({ref_year} = 100): Urban vs Rural - Netherlands",
               labels={"Periods": "Year", "Index": "Index"})
fig2.add_hline(y=100, line_dash="dot")
fig2.show()

### Graph: Are there significant differences in the degree of recovery between provinces in the Netherlands?

In [11]:
regions = ['Fryslân (PV)', 'Groningen (PV)', 'Drenthe (PV)', 'Overijssel (PV)',
           'Flevoland (PV)', 'Gelderland (PV)', 'Utrecht (PV)', 'Noord-Holland (PV)',
           'Zuid-Holland (PV)', 'Zeeland (PV)', 'Noord-Brabant (PV)', 'Limburg (PV)']

data_selected = (
    df_pres.loc[lambda d: (d['Modes of travel'] == 'Total') & (d['Region characteristics'].isin(regions))]
           .assign(Periods=lambda d: pd.to_numeric(d['Periods'], errors='coerce'))
           .copy()
)

baseline = data_selected.loc[data_selected['Periods'] == 2018].set_index('Region characteristics')['Total travel']
data_selected['Relative travel (%)'] = data_selected['Total travel'] / data_selected['Region characteristics'].map(baseline) * 100

fig3 = px.line(
    data_selected,
    x='Periods', y='Relative travel (%)', color='Region characteristics',
    title='Fig3 — Total travel over time (2018 = 100%)'
)
fig3.update_layout(xaxis_title='Year', yaxis_title='Relative travel (%)')
fig3.show()

In [12]:
diff_pivot = (
    data_selected[data_selected['Periods'].isin([2019, 2023])]
    .pivot_table(index='Region characteristics', columns='Periods', values='Relative travel (%)')
    .assign(Difference=lambda d: d[2023] - d[2019])
    .reset_index()
    .sort_values('Difference', ascending=False)
)

fig4 = px.bar(
    diff_pivot,
    x='Region characteristics',
    y='Difference',
    title='Fig4 — Change in relative total travel between 2019 and 2023 (percentage points)',
    labels={'Difference': 'Difference (2023 - 2019)'}
)
fig4.update_layout(xaxis_tickangle=45)
fig4.show()