In [1]:
import requests
import json
import pandas as pd
import plotly.express as px
import numpy as np

In [2]:
def get_trip_data():
    # build url
    url = f'https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/tour_dem_tttot?format=JSON&unit=NR&unit=NR&c_dest=EU27_2020_FOR&c_dest=EU27_2020_FOR&purpose=TOTAL&purpose=PER&purpose=PROF&purpose=TOTAL&purpose=PER&purpose=PROF&duration=N_GE1&duration=N1-3&duration=N_GE4&duration=N_GE1&duration=N1-3&duration=N_GE4&lang=en'
    url_text = requests.get(url)

    trip_dict = json.loads(url_text.text)

    return trip_dict

In [3]:
data = get_trip_data()
data

{'version': '2.0',
 'class': 'dataset',
 'label': 'Trips by duration, purpose and main destination',
 'source': 'ESTAT',
 'updated': '2025-02-27T23:00:00+0100',
 'value': {'2144': 31962,
  '1988': 1236012,
  '1989': 1897332,
  '1990': 4102510,
  '1991': 4675543,
  '1760': 1390863,
  '1761': 1680574,
  '1762': 3253301,
  '1763': 3678589,
  '1772': 37953,
  '1774': 102076,
  '1775': 193702,
  '2096': 1532374,
  '2097': 1259831,
  '2098': 2685401,
  '1904': 71908,
  '1905': 56478,
  '1906': 216308,
  '1907': 273609,
  '1784': 575544,
  '1785': 490169,
  '1786': 1301715,
  '1787': 1701345,
  '1808': 6360670,
  '1809': 8497900,
  '1810': 12972090,
  '1811': 14296783,
  '1796': 711631,
  '1797': 763901,
  '1798': 1143824,
  '1799': 1295918,
  '1748': 16914983,
  '1749': 21784674,
  '1750': 41682870,
  '1751': 49432386,
  '1820': 141893,
  '1821': 109548,
  '1822': 328393,
  '1823': 408300,
  '1844': 24894,
  '1845': 39806,
  '1846': 87513,
  '1847': 85435,
  '1856': 1060415,
  '1857': 126291

In [4]:
def decode_tourism_index(flat_index):
    """
    Decode a flat index from tourism dataset to dimension codes and labels
    
    Args:
        flat_index (int): The flattened index to decode
        sizes (list): List of dimension sizes [1, 1, 3, 3, 1, 36, 12]
        dimension_data (dict): Dictionary containing dimension metadata
        dimension_ids (list): List of dimension IDs ['freq', 'c_dest', 'purpose', 'duration', 'unit', 'geo', 'time']
    
    Returns:
        dict: Decoded dimension information
    """
    sizes = data["size"]
    dimension_ids = data["id"]
    dimension_data = data["dimension"]

    # Step 1: Convert flat index to coordinates
    coords = [0] * len(sizes)
    remaining = flat_index
    
    # Work from rightmost (time) to leftmost (freq) dimension
    for i in range(len(sizes) - 1, -1, -1):
        coords[i] = remaining % sizes[i]
        remaining = remaining // sizes[i]
    
    # Step 2: Map coordinates to actual codes and labels
    result = {}
    for i, dim_id in enumerate(dimension_ids):
        coord = coords[i]
        categories = dimension_data[dim_id]['category']
        
        # Find the code that corresponds to this coordinate
        code = next((k for k, v in categories['index'].items() if v == coord), None)
        
        result[dim_id] = {
            'code': code,
            'label': categories.get('label', {}).get(code, code) if code else None,
            'index': coord
        }
    
    return result

decode_tourism_index(272)


{'freq': {'code': 'A', 'label': 'Annual', 'index': 0},
 'c_dest': {'code': 'EU27_2020_FOR',
  'label': 'EU27 countries (from 2020) except reporting country',
  'index': 0},
 'purpose': {'code': 'TOTAL', 'label': 'Total', 'index': 0},
 'duration': {'code': 'N_GE1', 'label': '1 night or over', 'index': 0},
 'unit': {'code': 'NR', 'label': 'Number', 'index': 0},
 'geo': {'code': 'PL', 'label': 'Poland', 'index': 22},
 'time': {'code': '2020', 'label': '2020', 'index': 8}}

In [6]:
new_trips_dict = {"Purpose": [], "Duration": [], "Country": [], "Year": [], "NumTrips": []}

for key in data["value"].keys():
    key_info = decode_tourism_index(int(key))

    new_trips_dict["Purpose"].append(key_info["purpose"]["label"])
    new_trips_dict["Duration"].append(key_info["duration"]["label"])
    new_trips_dict["Country"].append(key_info["geo"]["label"])
    new_trips_dict["Year"].append(key_info["time"]["label"])
    new_trips_dict["NumTrips"].append(data["value"][key])

new_trips_df = pd.DataFrame(new_trips_dict)
new_trips_df

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
0,Personal reasons,From 1 to 3 nights,Albania,2020,31962
1,Personal reasons,From 1 to 3 nights,Austria,2020,1236012
2,Personal reasons,From 1 to 3 nights,Austria,2021,1897332
3,Personal reasons,From 1 to 3 nights,Austria,2022,4102510
4,Personal reasons,From 1 to 3 nights,Austria,2023,4675543
...,...,...,...,...,...
1041,Total,4 nights or over,Slovenia,2023,1673975
1042,Total,4 nights or over,Slovakia,2020,421183
1043,Total,4 nights or over,Slovakia,2021,915394
1044,Total,4 nights or over,Slovakia,2022,1625701


In [None]:
trip_dict = get_trip_data()
trip_dict['dimension']['time']['category']['label'].keys()


dict_keys(['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023'])

In [None]:
# improve modularity of function below
def transform_index(index, sizes):
    coords = []
    for size in reversed(sizes):
        coords.append(index % size)
        index //= size
    return list(reversed(coords))


In [None]:
def get_trip_df(trip_dict):
    # access dimensions & ids, size, category, label
    dimensions = trip_dict['dimension']
    dimension_ids = trip_dict['id']
    dimension_sizes = trip_dict['size']
    dimension_labels = {
        dim: dimensions[dim]['category']['label']
        for dim in dimension_ids
    }

    # set total_items to 1 so no multiply to 0
    total_items = 1
    for size in dimension_sizes:
        total_items *= size

    print(total_items)
    
    records = []
    for idx in range(total_items):
         # convert vals to strings
         key = str(idx)
         if key in trip_dict['value']:
             # transform datapoints into coordinates to access each oen
             coords = transform_index(idx, dimension_sizes)
             record = {}
             for i, dim in enumerate(dimension_ids):
                 label_keys = list(dimension_labels[dim].keys())
                 record[dim] = dimension_labels[dim][label_keys[coords[i]]]
             record['value'] = trip_dict['value'][key]
             records.append(record)
    df = pd.DataFrame(records)
    return df
    

In [None]:
trip_dict = get_trip_data()
trip_df = get_trip_df(trip_dict)

trip_df

3888


Unnamed: 0,freq,c_dest,purpose,duration,unit,geo,time,value
0,Annual,EU27 countries (from 2020) except reporting co...,Total,1 night or over,Number,European Union - 27 countries (from 2020),2020,80397451
1,Annual,EU27 countries (from 2020) except reporting co...,Total,1 night or over,Number,European Union - 27 countries (from 2020),2021,109539505
2,Annual,EU27 countries (from 2020) except reporting co...,Total,1 night or over,Number,European Union - 27 countries (from 2020),2022,205080131
3,Annual,EU27 countries (from 2020) except reporting co...,Total,1 night or over,Number,European Union - 27 countries (from 2020),2023,228685148
4,Annual,EU27 countries (from 2020) except reporting co...,Total,1 night or over,Number,Euro area – 20 countries (from 2023),2020,65271156
...,...,...,...,...,...,...,...,...
1041,Annual,EU27 countries (from 2020) except reporting co...,"Professional, business",4 nights or over,Number,Sweden,2021,312962
1042,Annual,EU27 countries (from 2020) except reporting co...,"Professional, business",4 nights or over,Number,Sweden,2022,571680
1043,Annual,EU27 countries (from 2020) except reporting co...,"Professional, business",4 nights or over,Number,Norway,2021,160509
1044,Annual,EU27 countries (from 2020) except reporting co...,"Professional, business",4 nights or over,Number,Norway,2022,323550


In [None]:
trip_df['time'].unique()

array(['2020', '2021', '2022', '2023'], dtype=object)

In [None]:
trip_df.rename(columns = {'purpose': 'Purpose', 'duration': 'Duration', 'geo': 'Country', 'time': 'Year', 'value': 'Num_Trips'}, inplace = True)
trip_df_filtered = trip_df[['Purpose', 'Duration', 'Country', 'Year', 'Num_Trips']]

exclude_countries = [
    'European Union - 27 countries (from 2020)',
    'Euro area – 20 countries (from 2023)'
]
trip_df_final = trip_df_filtered[~trip_df_filtered['Country'].isin(exclude_countries)]
trip_df_final

Unnamed: 0,Purpose,Duration,Country,Year,Num_Trips
8,Total,1 night or over,Belgium,2020,5031045
9,Total,1 night or over,Belgium,2021,6385433
10,Total,1 night or over,Belgium,2022,11361386
11,Total,1 night or over,Belgium,2023,12030346
12,Total,1 night or over,Bulgaria,2020,115995
...,...,...,...,...,...
1041,"Professional, business",4 nights or over,Sweden,2021,312962
1042,"Professional, business",4 nights or over,Sweden,2022,571680
1043,"Professional, business",4 nights or over,Norway,2021,160509
1044,"Professional, business",4 nights or over,Norway,2022,323550


In [None]:
fig = px.scatter(trip_df_final, x = 'Num_Trips', y = 'Country', title = 'Number of Trips by Country', color = 'Num_Trips',
                 color_continuous_scale = ['red', 'green', 'blue'])
fig.show()