In [2]:
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import streamlit as st
import plotly.express as px
import numpy as n

In [3]:
def get_passenger_data():
    # building url
    url = f'https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/road_eqs_carmot?format=JSON&unit=NR&mot_nrg=TOTAL&mot_nrg=PET&mot_nrg=DIE&engine=TOTAL&engine=CC_LT1400&engine=CC1400-1999&engine=CC_GE2000&lang=en'
    url_text = requests.get(url)

    passenger_dict = json.loads(url_text.text)

    return passenger_dict


In [4]:
data = get_passenger_data()
data

{'version': '2.0',
 'class': 'dataset',
 'label': 'Passenger cars, by type of motor energy and size of engine',
 'source': 'ESTAT',
 'updated': '2025-05-15T23:00:00+0200',
 'value': {'24783': 233368,
  '24784': 252775,
  '24785': 277611,
  '23789': 1088000,
  '23790': 1211000,
  '23791': 1325000,
  '23792': 1413000,
  '23793': 1528000,
  '23794': 1642000,
  '23795': 1731000,
  '23796': 1737000,
  '23797': 1780000,
  '23798': 1808000,
  '23799': 1847000,
  '23800': 1895000,
  '23801': 1946000,
  '23802': 1999000,
  '23803': 2040083,
  '23804': 2075423,
  '23805': 2114381,
  '23806': 2161479,
  '23807': 2189040,
  '23808': 2205012,
  '23809': 2218877,
  '23810': 2232277,
  '23811': 2220991,
  '23812': 2189120,
  '23813': 2157405,
  '24505': 411265,
  '24506': 443778,
  '24507': 475081,
  '24508': 502914,
  '24509': 520360,
  '24510': 548988,
  '24511': 573425,
  '24512': 580679,
  '24513': 619808,
  '24514': 641894,
  '24515': 674227,
  '22769': 1865000,
  '22770': 1962000,
  '22771': 20

In [5]:
print(len(data["value"]))
data["dimension"]

6765


{'freq': {'label': 'Time frequency',
  'category': {'index': {'A': 0}, 'label': {'A': 'Annual'}}},
 'unit': {'label': 'Unit of measure',
  'category': {'index': {'NR': 0}, 'label': {'NR': 'Number'}}},
 'mot_nrg': {'label': 'Motor energy',
  'category': {'index': {'TOTAL': 0, 'PET': 1, 'DIE': 2},
   'label': {'TOTAL': 'Total', 'PET': 'Petroleum products', 'DIE': 'Diesel'}}},
 'engine': {'label': 'Engine capacity of vehicle',
  'category': {'index': {'TOTAL': 0,
    'CC_LT1400': 1,
    'CC1400-1999': 2,
    'CC_GE2000': 3},
   'label': {'TOTAL': 'Total',
    'CC_LT1400': 'Less than 1 400 cm³',
    'CC1400-1999': 'From 1 400 to 1 999 cm³',
    'CC_GE2000': '2 000 cm³ or over'}}},
 'geo': {'label': 'Geopolitical entity (reporting)',
  'category': {'index': {'EU27_2020': 0,
    'BE': 1,
    'BG': 2,
    'CZ': 3,
    'DK': 4,
    'DE': 5,
    'EE': 6,
    'IE': 7,
    'EL': 8,
    'ES': 9,
    'FR': 10,
    'HR': 11,
    'IT': 12,
    'CY': 13,
    'LV': 14,
    'LT': 15,
    'LU': 16,
    '

In [6]:
def decode_dataset_index(flat_index):
    sizes = data["size"]
    dimension_ids = data["id"]
    dimension_data = data["dimension"]


    # Convert to coordinates
    coords = [0] * len(sizes)
    remaining = flat_index
    
    for i in range(len(sizes) - 1, -1, -1):
        coords[i] = remaining % sizes[i]
        remaining = remaining // sizes[i]
    
    # Map to codes
    result = {}
    for i, dim_id in enumerate(dimension_ids):
        coord = coords[i]
        categories = dimension_data[dim_id]['category']
        
        # Find code for this coordinate
        code = next(k for k, v in categories['index'].items() if v == coord)
        
        result[dim_id] = {
            'code': code,
            'label': categories.get('label', {}).get(code, code),
            'index': coord
        }
    
    return result

decode_dataset_index(208)

{'freq': {'code': 'A', 'label': 'Annual', 'index': 0},
 'unit': {'code': 'NR', 'label': 'Number', 'index': 0},
 'mot_nrg': {'code': 'TOTAL', 'label': 'Total', 'index': 0},
 'engine': {'code': 'TOTAL', 'label': 'Total', 'index': 0},
 'geo': {'code': 'CZ', 'label': 'Czechia', 'index': 3},
 'time': {'code': '2016', 'label': '2016', 'index': 46}}

In [7]:
new_passengers_dict = {"MotorEnergy": [], "Engine": [], "Country": [], "Year": [], "NumCars": []}

for key in data["value"].keys():
    key_info = decode_dataset_index(int(key))

    new_passengers_dict["MotorEnergy"].append(key_info["mot_nrg"]["label"])
    new_passengers_dict["Engine"].append(key_info["engine"]["label"])
    new_passengers_dict["Country"].append(key_info["geo"]["label"])
    new_passengers_dict["Year"].append(key_info["time"]["label"])
    new_passengers_dict["NumCars"].append(data["value"][key])

new_passengers_df = pd.DataFrame(new_passengers_dict)
new_passengers_df

Unnamed: 0,MotorEnergy,Engine,Country,Year,NumCars
0,Diesel,From 1 400 to 1 999 cm³,Albania,2021,233368
1,Diesel,From 1 400 to 1 999 cm³,Albania,2022,252775
2,Diesel,From 1 400 to 1 999 cm³,Albania,2023,277611
3,Diesel,From 1 400 to 1 999 cm³,Austria,1999,1088000
4,Diesel,From 1 400 to 1 999 cm³,Austria,2000,1211000
...,...,...,...,...,...
6760,Total,Total,Kosovo*,2019,309509
6761,Total,Total,Kosovo*,2020,292902
6762,Total,Total,Kosovo*,2021,322701
6763,Total,Total,Kosovo*,2022,339131


In [8]:
total_passengers_df = new_passengers_df[
    (new_passengers_df['Engine'] == 'Total') &
    (new_passengers_df['MotorEnergy'] == 'Total')
]

total_passengers_df

Unnamed: 0,MotorEnergy,Engine,Country,Year,NumCars
5354,Total,Total,Albania,2013,341691
5355,Total,Total,Albania,2014,378053
5356,Total,Total,Albania,2015,403630
5357,Total,Total,Albania,2016,435613
5358,Total,Total,Albania,2017,417426
...,...,...,...,...,...
6760,Total,Total,Kosovo*,2019,309509
6761,Total,Total,Kosovo*,2020,292902
6762,Total,Total,Kosovo*,2021,322701
6763,Total,Total,Kosovo*,2022,339131


In [20]:
exclude_countries = [
    'European Union - 27 countries (from 2020)',
    'Kosovo*',
    'Iceland',
    'Türkiye',
    'United Kingdom'
]

total_passengers_df = total_passengers_df[~total_passengers_df['Country'].isin(exclude_countries)]
total_passengers_df

Unnamed: 0,MotorEnergy,Engine,Country,Year,NumCars
5354,Total,Total,Albania,2013,341691
5355,Total,Total,Albania,2014,378053
5356,Total,Total,Albania,2015,403630
5357,Total,Total,Albania,2016,435613
5358,Total,Total,Albania,2017,417426
...,...,...,...,...,...
6674,Total,Total,Slovakia,2019,2393577
6675,Total,Total,Slovakia,2020,2439986
6676,Total,Total,Slovakia,2021,2493183
6677,Total,Total,Slovakia,2022,2555491


In [21]:
fig = px.line(total_passengers_df, x = 'Year', y = 'NumCars', title = 'Number of Cars by Year', color = 'Country', 
              color_discrete_sequence = px.colors.qualitative.Bold)
fig.show()

In [10]:
fig = px.bar(total_passengers_df, x = 'Year', y = 'NumCars', title = 'Increase of Passenger Cars Over Time', 
             color_discrete_sequence = px.colors.qualitative.Bold)
fig.show()

fig.write_image("passenger_cars_bar.png")


In [12]:
fig = px.scatter(total_passengers_df, x = 'Year', y = 'NumCars', title = 'Number of Cars over Years',
                 color_discrete_sequence = px.colors.qualitative.Bold,
                 log_x = True, size = 'NumCars')
fig.show()

In [12]:
pip install -U kaleido

Note: you may need to restart the kernel to use updated packages.


In [13]:
%run roadspending_gdp.ipynb

final_gdp_roadspending_df

Note: you may need to restart the kernel to use updated packages.


KeyboardInterrupt: 

KeyboardInterrupt: 

In [None]:
total_passengers_df['Year'] = total_passengers_df['Year'].astype(int)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
cars_gdp_df = pd.merge(total_passengers_df, final_gdp_roadspending_df, on = ['Year', 'Country'])

cars_gdp_df

Unnamed: 0,MotorEnergy,Engine,Country,Year,NumCars,RoadSpending,GDP,Spending/GDP%
0,Total,Total,Austria,1995,3594000,457000000.0,2.146983e+11,0.212857
1,Total,Total,Austria,1996,3690000,426000000.0,2.112353e+11,0.201671
2,Total,Total,Austria,1997,3783000,365000000.0,1.896100e+11,0.192500
3,Total,Total,Austria,1998,3887000,430000000.0,1.945157e+11,0.221062
4,Total,Total,Austria,1999,4010000,391000000.0,1.935528e+11,0.202012
...,...,...,...,...,...,...,...,...
702,Total,Total,Slovenia,2019,1165371,306000000.0,4.798524e+10,0.637696
703,Total,Total,Slovenia,2020,1170690,235000000.0,4.747438e+10,0.495004
704,Total,Total,Slovenia,2021,1189457,298000000.0,5.479730e+10,0.543822
705,Total,Total,Slovenia,2022,1207755,422000000.0,5.337775e+10,0.790592


In [None]:
cars_gdp_df['CarsPercap'] = (cars_gdp_df['NumCars'] / cars_gdp_df['GDP']) * 100000

cars_gdp_df.head()

# Number of Cars per 100,000 GDP

Unnamed: 0,MotorEnergy,Engine,Country,Year,NumCars,RoadSpending,GDP,Spending/GDP%,CarsPercap
0,Total,Total,Austria,1995,3594000,457000000.0,214698300000.0,0.212857,1.673977
1,Total,Total,Austria,1996,3690000,426000000.0,211235300000.0,0.201671,1.746867
2,Total,Total,Austria,1997,3783000,365000000.0,189610000000.0,0.1925,1.995147
3,Total,Total,Austria,1998,3887000,430000000.0,194515700000.0,0.221062,1.998296
4,Total,Total,Austria,1999,4010000,391000000.0,193552800000.0,0.202012,2.071787


In [None]:
cars_gdp_df = cars_gdp_df[cars_gdp_df['Year'] >= 1979]
cars_gdp_df

Unnamed: 0,MotorEnergy,Engine,Country,Year,NumCars,RoadSpending,GDP,Spending/GDP%,CarsPercap
0,Total,Total,Austria,1995,3594000,457000000.0,2.146983e+11,0.212857,1.673977
1,Total,Total,Austria,1996,3690000,426000000.0,2.112353e+11,0.201671,1.746867
2,Total,Total,Austria,1997,3783000,365000000.0,1.896100e+11,0.192500,1.995147
3,Total,Total,Austria,1998,3887000,430000000.0,1.945157e+11,0.221062,1.998296
4,Total,Total,Austria,1999,4010000,391000000.0,1.935528e+11,0.202012,2.071787
...,...,...,...,...,...,...,...,...,...
702,Total,Total,Slovenia,2019,1165371,306000000.0,4.798524e+10,0.637696,2.428603
703,Total,Total,Slovenia,2020,1170690,235000000.0,4.747438e+10,0.495004,2.465941
704,Total,Total,Slovenia,2021,1189457,298000000.0,5.479730e+10,0.543822,2.170649
705,Total,Total,Slovenia,2022,1207755,422000000.0,5.337775e+10,0.790592,2.262656


In [None]:
fig = px.scatter(cars_gdp_df, x = 'CarsPercap', y = 'NumCars', color = 'Country', hover_name = 'Country', size_max = 40, color_discrete_sequence = px.colors.qualitative.Alphabet, 
                 labels = {
                     'CarsPercap': 'Number of Cars per Capita (per 100,000 in GDP)',
                     'NumCars': 'Number of Cars'
                 }
)

fig.update_layout(title = 'Number of Cars Per Capita vs. Number of Cars')
fig.show()

In [None]:
# without germany, spain, france, italy, and poland

In [None]:
fig = px.bar(cars_gdp_df, x = 'Country', y = 'NumCars', color = 'RoadSpending', title = 'Number of Cars and Road Spending per Country')
fig.show()

NameError: name 'px' is not defined