In [14]:
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import streamlit as st
import plotly.express as px
import numpy as n

In [106]:
def get_passenger_data():
    # building url
    url = f'https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/road_eqs_carmot?format=JSON&unit=NR&mot_nrg=TOTAL&mot_nrg=PET&mot_nrg=DIE&engine=TOTAL&engine=CC_LT1400&engine=CC1400-1999&engine=CC_GE2000&lang=en'
    url_text = requests.get(url)

    passenger_dict = json.loads(url_text.text)

    return passenger_dict


In [107]:
data = get_passenger_data()
data

{'version': '2.0',
 'class': 'dataset',
 'label': 'Passenger cars, by type of motor energy and size of engine',
 'source': 'ESTAT',
 'updated': '2025-05-15T23:00:00+0200',
 'value': {'24783': 233368,
  '24784': 252775,
  '24785': 277611,
  '23789': 1088000,
  '23790': 1211000,
  '23791': 1325000,
  '23792': 1413000,
  '23793': 1528000,
  '23794': 1642000,
  '23795': 1731000,
  '23796': 1737000,
  '23797': 1780000,
  '23798': 1808000,
  '23799': 1847000,
  '23800': 1895000,
  '23801': 1946000,
  '23802': 1999000,
  '23803': 2040083,
  '23804': 2075423,
  '23805': 2114381,
  '23806': 2161479,
  '23807': 2189040,
  '23808': 2205012,
  '23809': 2218877,
  '23810': 2232277,
  '23811': 2220991,
  '23812': 2189120,
  '23813': 2157405,
  '24505': 411265,
  '24506': 443778,
  '24507': 475081,
  '24508': 502914,
  '24509': 520360,
  '24510': 548988,
  '24511': 573425,
  '24512': 580679,
  '24513': 619808,
  '24514': 641894,
  '24515': 674227,
  '22769': 1865000,
  '22770': 1962000,
  '22771': 20

In [108]:
print(len(data["value"]))
data["dimension"]

6765


{'freq': {'label': 'Time frequency',
  'category': {'index': {'A': 0}, 'label': {'A': 'Annual'}}},
 'unit': {'label': 'Unit of measure',
  'category': {'index': {'NR': 0}, 'label': {'NR': 'Number'}}},
 'mot_nrg': {'label': 'Motor energy',
  'category': {'index': {'TOTAL': 0, 'PET': 1, 'DIE': 2},
   'label': {'TOTAL': 'Total', 'PET': 'Petroleum products', 'DIE': 'Diesel'}}},
 'engine': {'label': 'Engine capacity of vehicle',
  'category': {'index': {'TOTAL': 0,
    'CC_LT1400': 1,
    'CC1400-1999': 2,
    'CC_GE2000': 3},
   'label': {'TOTAL': 'Total',
    'CC_LT1400': 'Less than 1 400 cm³',
    'CC1400-1999': 'From 1 400 to 1 999 cm³',
    'CC_GE2000': '2 000 cm³ or over'}}},
 'geo': {'label': 'Geopolitical entity (reporting)',
  'category': {'index': {'EU27_2020': 0,
    'BE': 1,
    'BG': 2,
    'CZ': 3,
    'DK': 4,
    'DE': 5,
    'EE': 6,
    'IE': 7,
    'EL': 8,
    'ES': 9,
    'FR': 10,
    'HR': 11,
    'IT': 12,
    'CY': 13,
    'LV': 14,
    'LT': 15,
    'LU': 16,
    '

In [109]:
def decode_dataset_index(flat_index):
    sizes = data["size"]
    dimension_ids = data["id"]
    dimension_data = data["dimension"]


    # Convert to coordinates
    coords = [0] * len(sizes)
    remaining = flat_index
    
    for i in range(len(sizes) - 1, -1, -1):
        coords[i] = remaining % sizes[i]
        remaining = remaining // sizes[i]
    
    # Map to codes
    result = {}
    for i, dim_id in enumerate(dimension_ids):
        coord = coords[i]
        categories = dimension_data[dim_id]['category']
        
        # Find code for this coordinate
        code = next(k for k, v in categories['index'].items() if v == coord)
        
        result[dim_id] = {
            'code': code,
            'label': categories.get('label', {}).get(code, code),
            'index': coord
        }
    
    return result

decode_dataset_index(208)

{'freq': {'code': 'A', 'label': 'Annual', 'index': 0},
 'unit': {'code': 'NR', 'label': 'Number', 'index': 0},
 'mot_nrg': {'code': 'TOTAL', 'label': 'Total', 'index': 0},
 'engine': {'code': 'TOTAL', 'label': 'Total', 'index': 0},
 'geo': {'code': 'CZ', 'label': 'Czechia', 'index': 3},
 'time': {'code': '2016', 'label': '2016', 'index': 46}}

In [110]:
new_passengers_dict = {"MotorEnergy": [], "Engine": [], "Country": [], "Year": [], "NumCars": []}

for key in data["value"].keys():
    key_info = decode_dataset_index(int(key))

    new_passengers_dict["MotorEnergy"].append(key_info["mot_nrg"]["label"])
    new_passengers_dict["Engine"].append(key_info["engine"]["label"])
    new_passengers_dict["Country"].append(key_info["geo"]["label"])
    new_passengers_dict["Year"].append(key_info["time"]["label"])
    new_passengers_dict["NumCars"].append(data["value"][key])

new_passengers_df = pd.DataFrame(new_passengers_dict)
new_passengers_df

Unnamed: 0,MotorEnergy,Engine,Country,Year,NumCars
0,Diesel,From 1 400 to 1 999 cm³,Albania,2021,233368
1,Diesel,From 1 400 to 1 999 cm³,Albania,2022,252775
2,Diesel,From 1 400 to 1 999 cm³,Albania,2023,277611
3,Diesel,From 1 400 to 1 999 cm³,Austria,1999,1088000
4,Diesel,From 1 400 to 1 999 cm³,Austria,2000,1211000
...,...,...,...,...,...
6760,Total,Total,Kosovo*,2019,309509
6761,Total,Total,Kosovo*,2020,292902
6762,Total,Total,Kosovo*,2021,322701
6763,Total,Total,Kosovo*,2022,339131


In [111]:
def transform_index(index, sizes):
    coords = []
    for size in reversed(sizes):
        coords.append(index % size)
        index //= size
    return list(reversed(coords))

In [112]:
def get_passenger_df(passenger_dict):
    # access dimensions and ids, sizes, categories, and labels
    dimensions = passenger_dict['dimension']
    dimension_ids = passenger_dict['id']
    dimension_sizes = passenger_dict['size']
    dimension_labels = {
        dim: dimensions[dim]['category']['label']
        for dim in dimension_ids
    }

    # initialize variable to 1 so stuff won't multiply to 0
    # finding total number of datapoints --> multiply all together
    total_items = 1
    for size in dimension_sizes:
        total_items *= size
    
    records = []
    for idx in range(total_items):
        # convert values to strings
        key = str(idx)
        # check if actual data in value, some empty
        if key in passenger_dict['value']:
            # get coordinates for each datapoint
            coords = transform_index(idx, dimension_sizes)
            record = {}
            for i, dim in enumerate(dimension_ids):
                label_keys = list(dimension_labels[dim].keys())
                # finding labels that go with each datapoint, store in records
                record[dim] = dimension_labels[dim][label_keys[coords[i]]]
            record['value'] = passenger_dict['value'][key]
            records.append(record)
    df = pd.DataFrame(records)
    return df

In [113]:
passenger_dict = get_passenger_data()
passenger_df = get_passenger_df(passenger_dict)
passenger_df

Unnamed: 0,freq,unit,mot_nrg,engine,geo,time,value
0,Annual,Number,Total,Total,European Union - 27 countries (from 2020),2000,182033003
1,Annual,Number,Total,Total,European Union - 27 countries (from 2020),2010,211348234
2,Annual,Number,Total,Total,European Union - 27 countries (from 2020),2011,217805044
3,Annual,Number,Total,Total,European Union - 27 countries (from 2020),2012,219461115
4,Annual,Number,Total,Total,European Union - 27 countries (from 2020),2013,221705164
...,...,...,...,...,...,...,...
6760,Annual,Number,Diesel,2 000 cm³ or over,Türkiye,2023,201971
6761,Annual,Number,Diesel,2 000 cm³ or over,Kosovo*,2017,25597
6762,Annual,Number,Diesel,2 000 cm³ or over,Kosovo*,2018,39970
6763,Annual,Number,Diesel,2 000 cm³ or over,Kosovo*,2019,51071


In [114]:
passenger_df.rename(columns = {'mot_nrg': 'Motor_Type', 'engine': 'Engine_Size', 'geo': 'Country', 'time': 'Year', 'value': 'Num_Passengers'}, inplace = True)
passenger_df = passenger_df[['Motor_Type', 'Engine_Size', 'Country', 'Year', 'Num_Passengers']]

exclude_countries = [
    'European Union - 27 countries (from 2020)',
    'Türkiye',
    'Serbia',
    'Georgia',
    'Kosovo*',
    'Montenegro',
    'Moldova',
    'Bosnia and Herzegovina',
    'Liechtenstein',
    'Croatia',
    'Norway',
    'Iceland'
]
passenger_df = passenger_df[~passenger_df['Country'].isin(exclude_countries)]
passenger_df


Unnamed: 0,Motor_Type,Engine_Size,Country,Year,Num_Passengers
15,Total,Total,Belgium,1970,2060000
16,Total,Total,Belgium,1975,2614000
17,Total,Total,Belgium,1979,3077000
18,Total,Total,Belgium,1980,3159000
19,Total,Total,Belgium,1981,3206000
...,...,...,...,...,...
6702,Diesel,2 000 cm³ or over,United Kingdom,2022,2475624
6703,Diesel,2 000 cm³ or over,United Kingdom,2023,2362252
6740,Diesel,2 000 cm³ or over,Albania,2021,173706
6741,Diesel,2 000 cm³ or over,Albania,2022,184887


In [115]:
passenger_df['Country'] = passenger_df['Country'].astype(str)

fig = px.bar(passenger_df, x = 'Country', y = 'Num_Passengers', title = 'Number of Passengers by Country', color_discrete_sequence=["#7D092A"])
fig.show()

In [116]:

fig = px.line(passenger_df, x = 'Year', y = 'Num_Passengers', title = 'Number of Passengers over Time', color_discrete_sequence=["#07544B"])
fig.show()