In [11]:
#Import relevant packages
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

#Read the data
df_pres = pd.read_csv('data/transport_performance.csv', sep=';')
df_mob= pd.read_csv('data/mobiliteit.csv', sep=';')
df_pres.head()

Unnamed: 0,Modes of travel,Population,Margins,Region characteristics,Periods,Transport performance (billion passenger kilometres )
0,Total,Population 6 years or older,Value,The Netherlands,2018,218.1
1,Total,Population 6 years or older,Value,The Netherlands,2019,218.8
2,Total,Population 6 years or older,Value,The Netherlands,2020,152.0
3,Total,Population 6 years or older,Value,The Netherlands,2021,168.4
4,Total,Population 6 years or older,Value,The Netherlands,2022,186.9


In [12]:
#Initialise prestation data
df_pres['Periods'] = pd.to_numeric(df_pres['Periods'], errors='coerce')
df_pres['Transport performance (billion passenger kilometres )'] = pd.to_numeric(df_pres['Transport performance (billion passenger kilometres )'], errors='coerce')

#Rename columns
df_pres.rename({'Transport performance (billion passenger kilometres )': 'Total travel'}, axis=1, inplace=True)

#Drop unnesecary columns
df_pres.drop('Margins', axis=1, inplace=True)
df_pres.drop('Population', axis=1, inplace=True)

df_pres.head()

Unnamed: 0,Modes of travel,Region characteristics,Periods,Total travel
0,Total,The Netherlands,2018,218.1
1,Total,The Netherlands,2019,218.8
2,Total,The Netherlands,2020,152.0
3,Total,The Netherlands,2021,168.4
4,Total,The Netherlands,2022,186.9


In [13]:
#Initialise mobility data
df_mob.head()

Unnamed: 0,Populatie,Geslacht,Persoonskenmerken,Reismotieven,Marges,Perioden,Regio's,Gemiddeld per persoon per jaar/Verplaatsingen (aantal),Gemiddeld per persoon per jaar/Afstand (reizigerskilometers),Gemiddeld per persoon per jaar/Reisduur (uren)
0,Populatie: 6 jaar of ouder,Totaal mannen en vrouwen,Totaal personen,Totaal,Waarde,2018,Nederland,1015,13200,453.8
1,Populatie: 6 jaar of ouder,Totaal mannen en vrouwen,Totaal personen,Totaal,Waarde,2019,Nederland,989,13140,452.6
2,Populatie: 6 jaar of ouder,Totaal mannen en vrouwen,Totaal personen,Totaal,Waarde,2020,Nederland,861,9105,378.0
3,Populatie: 6 jaar of ouder,Totaal mannen en vrouwen,Totaal personen,Totaal,Waarde,2021,Nederland,915,9942,424.9
4,Populatie: 6 jaar of ouder,Totaal mannen en vrouwen,Totaal personen,Totaal,Waarde,2022,Nederland,984,11027,442.2


In [None]:
import pandas as pd
import plotly.express as px
df = pd.read_csv('Data\\transport_performance.csv', sep=';')


# Define the column names
performance_col = 'Transport performance (billion passenger kilometres )'
region_col = 'Region characteristics'

# Convert the performance column to numeric and drop any rows that couldn't be converted
df['Performance'] = pd.to_numeric(df[performance_col], errors='coerce')
df.dropna(subset=['Performance'], inplace=True)

# Filter the data for 'Total' Modes of travel for an aggregate view
df_filtered = df[df['Modes of travel'] == 'Total'].copy()

# This is the dataframe for using only the regions with "urbanised" in them
df_urbanised = df_filtered[
    df_filtered[region_col].str.contains('urbanised', case=False, na=False)].copy()

# This is the dataframe for using only the regions with "PV" in them, the provinces
df_provinces = df_filtered[
    df_filtered[region_col].str.contains('PV', case=False, na=False)].copy()

#Create the Scatter/Line Plot
fig = px.line(
    df_provinces,
    x='Periods',
    y='Performance',
    color=region_col,
    markers=True,
    labels={
        'Periods': 'Year',
        'Performance': 'Transport Performance (Billion Passenger Kilometres)',
        region_col: 'Region Characteristic'
    },
    title='Transport Performance Over Time by Region Characteristics (Total Modes)'
)

# Customize the layout for better appearance (optional)
fig.update_layout(
    xaxis=dict(tickmode='linear', dtick=1), # Ensure integer ticks for years
    legend_title_text='Region Characteristic'
)

# Display the plot
fig.show()

# If you need to save it as an HTML file (for embedding) instead of showing it:
# fig.write_html("transport_performance_plotly.html")


In [26]:
df = pd.read_csv('C:\TIL6022 Environment files\Group project\TIL6022-Groupproject\Data\mobility_trends__modes_of_travel.csv', sep=';', header=5)
df.columns = df.columns.str.strip()
df.head()

# Rename the unnamed 'Periods' column to 'Year'
col_names = df.columns.tolist()
col_to_rename = col_names[0] if col_names[0] in ('', 'Unnamed: 0') or col_names[0].startswith('Unnamed') else 'Periods'

df.rename(columns={
    col_to_rename: 'Year',
    'Travel motives': 'Travel_Motives',
    'Modes of travel': 'Mode_Topic'
}, inplace=True)

# Drop the first row which is the unit row and remove the 'Source: CBS' row
df.drop(index=0, inplace=True)
df = df[~df['Year'].astype(str).str.contains('Source: CBS', na=False, case=False)]
df.reset_index(drop=True, inplace=True)

# Clean and convert 'Year' to integer
df['Year'] = df['Year'].str.replace('*', '', regex=False).astype(int)

# Select only the 'Distance travelled per stage (average)' columns
distance_cols = [col for col in df.columns if 'Distance travelled per stage (average)' in col]
id_vars = ['Year']
df_plot = df[id_vars + distance_cols].copy()

# --- 2. Melt and Finalize Data for Plotting ---

# Melt the data to long format
df_melted = df_plot.melt(
    id_vars=id_vars,
    value_vars=distance_cols,
    var_name='Metric',
    value_name='Distance'
)

# Extract the Mode of Travel from the column name
df_melted['Mode'] = df_melted['Metric'].str.split(' - ').str[0]

# Convert Distance to float and filter out the 'Total' mode
df_melted['Distance'] = df_melted['Distance'].str.replace(',', '.', regex=False).astype(float)
df_final = df_melted[df_melted['Mode'] != 'Total'].copy()

# --- 3. Create the Plotly Chart ---

fig = px.line(
    df_final,
    x='Year',
    y='Distance',
    # 'Mode' is used here to assign colors and generate the legend labels
    color='Mode', 
    markers=True,
    labels={
        'Year': 'Year',
        'Distance': 'Average Distance Travelled per Stage (km)',
        'Mode': 'Mode of Travel' 
    },
    title='Mobility Trends: Average Distance Travelled per Stage by Mode (1999-2023)'
)

# Ensure Year is treated as discrete steps for clarity
fig.update_xaxes(dtick=1)

fig.show()

# If you want to save it as an interactive file:
# fig.write_html("mobility_trends_distance_by_mode_plotly.html")


invalid escape sequence '\T'


invalid escape sequence '\T'


invalid escape sequence '\T'

