In [49]:
import pandas as pd
import os

In [50]:
# Load the original travel data
filename = os.path.join('data', 'per_person__travel_modes__travel_purpose_08102025_094303.csv')
df = pd.read_csv(filename, sep=';')

# Clean numeric columns (comma to dot conversion)
for col in df.columns:
    try:
        df[col] = df[col].astype(str).str.replace(',', '.').astype(float)
    except ValueError:
        pass

# Rename columns names
rename_map = {
    "Region characteristics": "Regions", "Periods": "Year",
    "Average per person per year/Trips (number)": "Average number of trips per person per year",
    "Average per person per year/Distance travelled    (passenger kilometres )": "Average kilometers travelled per person per year",
    "Average per person per year/Time travelled     (hours)": "Average hours travelled per person per year"
}
df = df.rename(columns=rename_map)

# Convert Year to integer and export to CSV
df['Year'] = df['Year'].astype(int)
df.to_csv('data/clean_data.csv', index=False)
print(f"✓ Clean data saved. Shape: {df.shape}")

# Display DataFrame info
print(f"\nColumns: {', '.join(df.columns)}")
print(f"\nFirst 3 rows:")
display(df.head(3))
print(f"\nData types:\n{df.dtypes}")
df.tail(3)

✓ Clean data saved. Shape: (6240, 9)

Columns: Travel motives, Population, Travel modes, Margins, Regions, Year, Average number of trips per person per year, Average kilometers travelled per person per year, Average hours travelled per person per year

First 3 rows:


Unnamed: 0,Travel motives,Population,Travel modes,Margins,Regions,Year,Average number of trips per person per year,Average kilometers travelled per person per year,Average hours travelled per person per year
0,Total,Population 6 years or older,Total,Value,The Netherlands,2018,1015.0,13200.0,453.8
1,Total,Population 6 years or older,Total,Value,The Netherlands,2019,989.0,13140.0,452.6
2,Total,Population 6 years or older,Total,Value,The Netherlands,2020,861.0,9105.0,378.0



Data types:
Travel motives                                       object
Population                                           object
Travel modes                                         object
Margins                                              object
Regions                                              object
Year                                                  int32
Average number of trips per person per year         float64
Average kilometers travelled per person per year    float64
Average hours travelled per person per year         float64
dtype: object


Unnamed: 0,Travel motives,Population,Travel modes,Margins,Regions,Year,Average number of trips per person per year,Average kilometers travelled per person per year,Average hours travelled per person per year
6237,Other,Population 6 years or older,Other,Value,Limburg (PV),2021,,,
6238,Other,Population 6 years or older,Other,Value,Limburg (PV),2022,,,
6239,Other,Population 6 years or older,Other,Value,Limburg (PV),2023,,,
