In [2]:
import requests
import json
import pandas as pd
import plotly.express as px
import numpy as np

In [3]:
def get_trip_data():
    # build url
    url = f'https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/tour_dem_tttot?lang=EN'
    url_text = requests.get(url)

    trip_dict = json.loads(url_text.text)

    return trip_dict

In [4]:
data = get_trip_data()
data

{'version': '2.0',
 'class': 'dataset',
 'label': 'Trips by duration, purpose and main destination',
 'source': 'ESTAT',
 'updated': '2025-02-27T23:00:00+0100',
 'value': {'9917': 2447366,
  '9919': 2816246,
  '9920': 1856588,
  '9756': 5777112,
  '9757': 6314809,
  '9758': 6709183,
  '9759': 6351613,
  '9760': 6882323,
  '9761': 6764655,
  '9762': 7361385,
  '9763': 7909105,
  '9764': 6288046,
  '9765': 6677951,
  '9766': 9499556,
  '9767': 9211332,
  '9528': 1489881,
  '9529': 1670786,
  '9530': 1435782,
  '9531': 2235671,
  '9532': 1698476,
  '9533': 2175530,
  '9534': 2469676,
  '9535': 2782431,
  '9536': 2219809,
  '9537': 3606225,
  '9538': 3316240,
  '9539': 3403263,
  '9540': 1941286,
  '9541': 1877711,
  '9542': 1855864,
  '9543': 1424295,
  '9544': 1596173,
  '9545': 2736847,
  '9546': 2335183,
  '9547': 2204807,
  '9548': 1208155,
  '9549': 1677700,
  '9550': 1967127,
  '9551': 2030006,
  '9864': 3905121,
  '9865': 4450442,
  '9866': 3993011,
  '9867': 4058296,
  '9868': 383

In [5]:
def decode_tourism_index(flat_index):
    """
    Decode a flat index from tourism dataset to dimension codes and labels
    
    Args:
        flat_index (int): The flattened index to decode
        sizes (list): List of dimension sizes [1, 1, 3, 3, 1, 36, 12]
        dimension_data (dict): Dictionary containing dimension metadata
        dimension_ids (list): List of dimension IDs ['freq', 'c_dest', 'purpose', 'duration', 'unit', 'geo', 'time']
    
    Returns:
        dict: Decoded dimension information
    """
    sizes = data["size"]
    dimension_ids = data["id"]
    dimension_data = data["dimension"]

    # Step 1: Convert flat index to coordinates
    coords = [0] * len(sizes)
    remaining = flat_index
    
    # Work from rightmost (time) to leftmost (freq) dimension
    for i in range(len(sizes) - 1, -1, -1):
        coords[i] = remaining % sizes[i]
        remaining = remaining // sizes[i]
    
    # Step 2: Map coordinates to actual codes and labels
    result = {}
    for i, dim_id in enumerate(dimension_ids):
        coord = coords[i]
        categories = dimension_data[dim_id]['category']
        
        # Find the code that corresponds to this coordinate
        code = next((k for k, v in categories['index'].items() if v == coord), None)
        
        result[dim_id] = {
            'code': code,
            'label': categories.get('label', {}).get(code, code) if code else None,
            'index': coord
        }
    
    return result

decode_tourism_index(272)


{'freq': {'code': 'A', 'label': 'Annual', 'index': 0},
 'c_dest': {'code': 'EU27_2020_FOR',
  'label': 'EU27 countries (from 2020) except reporting country',
  'index': 0},
 'purpose': {'code': 'TOTAL', 'label': 'Total', 'index': 0},
 'duration': {'code': 'N_GE1', 'label': '1 night or over', 'index': 0},
 'unit': {'code': 'NR', 'label': 'Number', 'index': 0},
 'geo': {'code': 'PL', 'label': 'Poland', 'index': 22},
 'time': {'code': '2020', 'label': '2020', 'index': 8}}

In [6]:
new_trips_dict = {"Purpose": [], "Duration": [], "Country": [], "Year": [], "NumTrips": []}

for key in data["value"].keys():
    key_info = decode_tourism_index(int(key))

    new_trips_dict["Purpose"].append(key_info["purpose"]["label"])
    new_trips_dict["Duration"].append(key_info["duration"]["label"])
    new_trips_dict["Country"].append(key_info["geo"]["label"])
    new_trips_dict["Year"].append(key_info["time"]["label"])
    new_trips_dict["NumTrips"].append(data["value"][key])

new_trips_df = pd.DataFrame(new_trips_dict)
new_trips_df

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
0,Personal reasons,From 1 to 3 nights,Albania,2017,2447366
1,Personal reasons,From 1 to 3 nights,Albania,2019,2816246
2,Personal reasons,From 1 to 3 nights,Albania,2020,1856588
3,Personal reasons,From 1 to 3 nights,Austria,2012,5777112
4,Personal reasons,From 1 to 3 nights,Austria,2013,6314809
...,...,...,...,...,...
11804,Total,4 nights or over,Slovakia,2021,2384051
11805,Total,4 nights or over,Slovakia,2022,3839167
11806,Total,4 nights or over,Slovakia,2023,4631198
11807,Total,4 nights or over,United Kingdom,2012,69416611


In [7]:
total_trips_df = new_trips_df[
    (new_trips_df['Purpose'] == 'Total')
]

total_trips_df

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
2073,Total,From 1 to 3 nights,Albania,2017,2646033
2074,Total,From 1 to 3 nights,Albania,2019,2971522
2075,Total,From 1 to 3 nights,Albania,2020,1887456
2076,Total,From 1 to 3 nights,Austria,2012,7494367
2077,Total,From 1 to 3 nights,Austria,2013,8140062
...,...,...,...,...,...
11804,Total,4 nights or over,Slovakia,2021,2384051
11805,Total,4 nights or over,Slovakia,2022,3839167
11806,Total,4 nights or over,Slovakia,2023,4631198
11807,Total,4 nights or over,United Kingdom,2012,69416611


In [8]:
"""exclude_countries = [
    'European Union - 27 countries (from 2020)',
    'Euro area - 20 countries (from 2023)'
]

total_trips_df = total_trips_df[~total_trips_df['Country'].isin(exclude_countries)]
total_trips_df"""

"exclude_countries = [\n    'European Union - 27 countries (from 2020)',\n    'Euro area - 20 countries (from 2023)'\n]\n\ntotal_trips_df = total_trips_df[~total_trips_df['Country'].isin(exclude_countries)]\ntotal_trips_df"

In [9]:
total_trips_df = pd.DataFrame(new_trips_dict)

# Convert 'Year' to integer (or datetime if needed)
total_trips_df["Year"] = total_trips_df["Year"].astype(int)

total_trips_df = new_trips_df[
    (new_trips_df['Purpose'] == 'Total')
]

total_trips_df

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
2073,Total,From 1 to 3 nights,Albania,2017,2646033
2074,Total,From 1 to 3 nights,Albania,2019,2971522
2075,Total,From 1 to 3 nights,Albania,2020,1887456
2076,Total,From 1 to 3 nights,Austria,2012,7494367
2077,Total,From 1 to 3 nights,Austria,2013,8140062
...,...,...,...,...,...
11804,Total,4 nights or over,Slovakia,2021,2384051
11805,Total,4 nights or over,Slovakia,2022,3839167
11806,Total,4 nights or over,Slovakia,2023,4631198
11807,Total,4 nights or over,United Kingdom,2012,69416611


In [10]:
exclude_countries = [
    'European Union - 27 countries (from 2020)',
    'Euro area - 20 countries (from 2023)',
]

total_trips_df = total_trips_df[~total_trips_df['Country'].isin(exclude_countries)]
total_trips_df

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
2073,Total,From 1 to 3 nights,Albania,2017,2646033
2074,Total,From 1 to 3 nights,Albania,2019,2971522
2075,Total,From 1 to 3 nights,Albania,2020,1887456
2076,Total,From 1 to 3 nights,Austria,2012,7494367
2077,Total,From 1 to 3 nights,Austria,2013,8140062
...,...,...,...,...,...
11804,Total,4 nights or over,Slovakia,2021,2384051
11805,Total,4 nights or over,Slovakia,2022,3839167
11806,Total,4 nights or over,Slovakia,2023,4631198
11807,Total,4 nights or over,United Kingdom,2012,69416611


In [11]:
# Plot the line graph
fig = px.line(total_trips_df, x='Year', y='NumTrips', color='Country')
fig.update_layout(title='Number of Trips per Year by Country',
                  xaxis_title='Year',
                  yaxis_title='Number of Trips',
                  legend_title='Country')

In [12]:
fig = px.scatter(total_trips_df, x = 'Country', y = 'Year', color = 'NumTrips', size = 'NumTrips')
fig.update_layout(
    title = {
        'text': 'Number of Trips by Counter per Year',
        "x": 0.5,
        'xanchor': 'center'
    },
    height = 0.075 * len(total_trips_df)

)
fig.show()