In [109]:
import requests
import json
import pandas as pd
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [67]:
def get_trip_data():
    # build url
    url = f'https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/tour_dem_tttot?lang=EN'
    url_text = requests.get(url)

    trip_dict = json.loads(url_text.text)

    return trip_dict

In [68]:
data = get_trip_data()
data

{'version': '2.0',
 'class': 'dataset',
 'label': 'Trips by duration, purpose and main destination',
 'source': 'ESTAT',
 'updated': '2025-02-27T23:00:00+0100',
 'value': {'9917': 2447366,
  '9919': 2816246,
  '9920': 1856588,
  '9756': 5777112,
  '9757': 6314809,
  '9758': 6709183,
  '9759': 6351613,
  '9760': 6882323,
  '9761': 6764655,
  '9762': 7361385,
  '9763': 7909105,
  '9764': 6288046,
  '9765': 6677951,
  '9766': 9499556,
  '9767': 9211332,
  '9528': 1489881,
  '9529': 1670786,
  '9530': 1435782,
  '9531': 2235671,
  '9532': 1698476,
  '9533': 2175530,
  '9534': 2469676,
  '9535': 2782431,
  '9536': 2219809,
  '9537': 3606225,
  '9538': 3316240,
  '9539': 3403263,
  '9540': 1941286,
  '9541': 1877711,
  '9542': 1855864,
  '9543': 1424295,
  '9544': 1596173,
  '9545': 2736847,
  '9546': 2335183,
  '9547': 2204807,
  '9548': 1208155,
  '9549': 1677700,
  '9550': 1967127,
  '9551': 2030006,
  '9864': 3905121,
  '9865': 4450442,
  '9866': 3993011,
  '9867': 4058296,
  '9868': 383

In [69]:
def decode_tourism_index(flat_index):
    """
    Decode a flat index from tourism dataset to dimension codes and labels
    
    Args:
        flat_index (int): The flattened index to decode
        sizes (list): List of dimension sizes [1, 1, 3, 3, 1, 36, 12]
        dimension_data (dict): Dictionary containing dimension metadata
        dimension_ids (list): List of dimension IDs ['freq', 'c_dest', 'purpose', 'duration', 'unit', 'geo', 'time']
    
    Returns:
        dict: Decoded dimension information
    """
    sizes = data["size"]
    dimension_ids = data["id"]
    dimension_data = data["dimension"]

    # Step 1: Convert flat index to coordinates
    coords = [0] * len(sizes)
    remaining = flat_index
    
    # Work from rightmost (time) to leftmost (freq) dimension
    for i in range(len(sizes) - 1, -1, -1):
        coords[i] = remaining % sizes[i]
        remaining = remaining // sizes[i]
    
    # Step 2: Map coordinates to actual codes and labels
    result = {}
    for i, dim_id in enumerate(dimension_ids):
        coord = coords[i]
        categories = dimension_data[dim_id]['category']
        
        # Find the code that corresponds to this coordinate
        code = next((k for k, v in categories['index'].items() if v == coord), None)
        
        result[dim_id] = {
            'code': code,
            'label': categories.get('label', {}).get(code, code) if code else None,
            'index': coord
        }
    
    return result

decode_tourism_index(272)


{'freq': {'code': 'A', 'label': 'Annual', 'index': 0},
 'c_dest': {'code': 'EU27_2020_FOR',
  'label': 'EU27 countries (from 2020) except reporting country',
  'index': 0},
 'purpose': {'code': 'TOTAL', 'label': 'Total', 'index': 0},
 'duration': {'code': 'N_GE1', 'label': '1 night or over', 'index': 0},
 'unit': {'code': 'NR', 'label': 'Number', 'index': 0},
 'geo': {'code': 'PL', 'label': 'Poland', 'index': 22},
 'time': {'code': '2020', 'label': '2020', 'index': 8}}

In [70]:
new_trips_dict = {"Purpose": [], "Duration": [], "Country": [], "Year": [], "NumTrips": []}

for key in data["value"].keys():
    key_info = decode_tourism_index(int(key))

    new_trips_dict["Purpose"].append(key_info["purpose"]["label"])
    new_trips_dict["Duration"].append(key_info["duration"]["label"])
    new_trips_dict["Country"].append(key_info["geo"]["label"])
    new_trips_dict["Year"].append(key_info["time"]["label"])
    new_trips_dict["NumTrips"].append(data["value"][key])

new_trips_df = pd.DataFrame(new_trips_dict)
new_trips_df

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
0,Personal reasons,From 1 to 3 nights,Albania,2017,2447366
1,Personal reasons,From 1 to 3 nights,Albania,2019,2816246
2,Personal reasons,From 1 to 3 nights,Albania,2020,1856588
3,Personal reasons,From 1 to 3 nights,Austria,2012,5777112
4,Personal reasons,From 1 to 3 nights,Austria,2013,6314809
...,...,...,...,...,...
11804,Total,4 nights or over,Slovakia,2021,2384051
11805,Total,4 nights or over,Slovakia,2022,3839167
11806,Total,4 nights or over,Slovakia,2023,4631198
11807,Total,4 nights or over,United Kingdom,2012,69416611


In [71]:
new_trips_df['Year'] = new_trips_df['Year'].astype(int)

In [72]:
new_trips_df_sorted = new_trips_df.sort_values(by = ['Country', 'Year'])
new_trips_df_sorted = new_trips_df_sorted.reset_index(drop = True)
new_trips_df_sorted

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
0,Personal reasons,From 1 to 3 nights,Albania,2017,2447366
1,Personal reasons,1 night or over,Albania,2017,3265076
2,Personal reasons,4 nights or over,Albania,2017,817709
3,"Professional, business",From 1 to 3 nights,Albania,2017,198667
4,"Professional, business",1 night or over,Albania,2017,348445
...,...,...,...,...,...
11804,"Professional, business",1 night or over,United Kingdom,2013,23515460
11805,"Professional, business",4 nights or over,United Kingdom,2013,5785092
11806,Total,From 1 to 3 nights,United Kingdom,2013,89975954
11807,Total,1 night or over,United Kingdom,2013,159413526


In [94]:
print(total_trips_df['Country'].unique())

['Albania' 'Austria' 'Belgium' 'Bulgaria' 'Croatia' 'Cyprus' 'Czechia'
 'Denmark' 'Estonia' 'Euro area – 20 countries (from 2023)' 'Finland'
 'France' 'Germany' 'Greece' 'Hungary' 'Ireland' 'Italy' 'Latvia'
 'Lithuania' 'Luxembourg' 'Malta' 'Montenegro' 'Netherlands'
 'North Macedonia' 'Norway' 'Poland' 'Portugal' 'Romania' 'Serbia'
 'Slovakia' 'Slovenia' 'Spain' 'Sweden' 'Switzerland']


In [73]:
total_trips_df = new_trips_df_sorted[
    (new_trips_df_sorted['Purpose'] == 'Total')
]

total_trips_df

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
6,Total,From 1 to 3 nights,Albania,2017,2646033
7,Total,1 night or over,Albania,2017,3613521
8,Total,4 nights or over,Albania,2017,967487
15,Total,From 1 to 3 nights,Albania,2017,287335
16,Total,1 night or over,Albania,2017,960690
...,...,...,...,...,...
11798,Total,1 night or over,United Kingdom,2013,52277882
11799,Total,4 nights or over,United Kingdom,2013,41835431
11806,Total,From 1 to 3 nights,United Kingdom,2013,89975954
11807,Total,1 night or over,United Kingdom,2013,159413526


In [97]:
exclude_countries = [
    'Euro area – 20 countries (from 2023)',
    'United Kingdom'
]

total_trips_df = total_trips_df[~total_trips_df['Country'].isin(exclude_countries)]
print(total_trips_df['Country'].unique())

['Albania' 'Austria' 'Belgium' 'Bulgaria' 'Croatia' 'Cyprus' 'Czechia'
 'Denmark' 'Estonia' 'Finland' 'France' 'Germany' 'Greece' 'Hungary'
 'Ireland' 'Italy' 'Latvia' 'Lithuania' 'Luxembourg' 'Malta' 'Montenegro'
 'Netherlands' 'North Macedonia' 'Norway' 'Poland' 'Portugal' 'Romania'
 'Serbia' 'Slovakia' 'Slovenia' 'Spain' 'Sweden' 'Switzerland']


In [98]:
total_trips_df_1to3 = total_trips_df[total_trips_df['Duration'] == 'From 1 to 3 nights']
total_trips_df_1plus = total_trips_df[total_trips_df['Duration'] == '1 night or over']
total_trips_df_4plus = total_trips_df[total_trips_df['Duration'] == '4 nights or over']

total_trips_df_1plus

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
7,Total,1 night or over,Albania,2017,3613521
16,Total,1 night or over,Albania,2017,960690
25,Total,1 night or over,Albania,2017,4574210
34,Total,1 night or over,Albania,2019,4317025
43,Total,1 night or over,Albania,2019,1033579
...,...,...,...,...,...
11723,Total,1 night or over,Switzerland,2022,7167832
11731,Total,1 night or over,Switzerland,2022,9712398
11740,Total,1 night or over,Switzerland,2022,11275095
11744,Total,1 night or over,Switzerland,2022,1562697


In [99]:
final_1to3_df = total_trips_df_1to3.groupby(['Country', 'Year'], as_index = False)['NumTrips'].sum()
final_1to3_df['Purpose'] = 'Total'
final_1to3_df['Duration'] = 'From 1 to 3 nights'

final_1to3_df = final_1to3_df[['Purpose', 'Duration', 'Country', 'Year', 'NumTrips']]
final_1to3_df.head(6)

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
0,Total,From 1 to 3 nights,Albania,2017,5866737
1,Total,From 1 to 3 nights,Albania,2019,6543304
2,Total,From 1 to 3 nights,Albania,2020,4161939
3,Total,From 1 to 3 nights,Austria,2012,23336036
4,Total,From 1 to 3 nights,Austria,2013,24729154
5,Total,From 1 to 3 nights,Austria,2014,25266124


In [100]:
final_1plus_df = total_trips_df_1plus.groupby(['Country', 'Year'], as_index = False)['NumTrips'].sum()
final_1plus_df['Purpose'] = 'Total'
final_1plus_df['Duration'] = '1 night or over'

final_1plus_df = final_1plus_df[['Purpose', 'Duration', 'Country', 'Year', 'NumTrips']]
final_1plus_df

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
0,Total,1 night or over,Albania,2017,9148421
1,Total,1 night or over,Albania,2019,10701208
2,Total,1 night or over,Albania,2020,6715933
3,Total,1 night or over,Austria,2012,43844400
4,Total,1 night or over,Austria,2013,44802455
...,...,...,...,...,...
345,Total,1 night or over,Switzerland,2018,43891490
346,Total,1 night or over,Switzerland,2019,39966568
347,Total,1 night or over,Switzerland,2020,32758516
348,Total,1 night or over,Switzerland,2021,34150483


In [101]:
final_4plus_df = total_trips_df_4plus.groupby(['Country', 'Year'], as_index = False)['NumTrips'].sum()
final_4plus_df['Purpose'] = 'Total'
final_4plus_df['Duration'] = '4 nights or over'

final_4plus_df = final_4plus_df[['Purpose', 'Duration', 'Country', 'Year', 'NumTrips']]
final_4plus_df

Unnamed: 0,Purpose,Duration,Country,Year,NumTrips
0,Total,4 nights or over,Albania,2017,3281684
1,Total,4 nights or over,Albania,2019,4157904
2,Total,4 nights or over,Albania,2020,2553994
3,Total,4 nights or over,Austria,2012,20508363
4,Total,4 nights or over,Austria,2013,20073300
...,...,...,...,...,...
345,Total,4 nights or over,Switzerland,2018,24682356
346,Total,4 nights or over,Switzerland,2019,22526692
347,Total,4 nights or over,Switzerland,2020,19173687
348,Total,4 nights or over,Switzerland,2021,20261896


In [102]:
# Plot the line graph
fig = px.line(final_1to3_df, x='Year', y='NumTrips', color='Country', color_discrete_sequence = px.colors.qualitative.Bold)
fig.update_layout(title='Number of Trips (1 to 3 days) per Year by Country',
                  xaxis_title='Year',
                  yaxis_title='Number of Trips',
                  legend_title='Country')

In [137]:
# Plot the line graph
fig = px.line(final_1plus_df, x='Year', y='NumTrips', color='Country', color_discrete_sequence = px.colors.qualitative.Bold)
fig.update_layout(title='Number of Trips (1 Plus Nights) per Year by Country',
                  xaxis_title='Year',
                  yaxis_title='Number of Trips',
                  legend_title='Country')

In [135]:
countries = ['Germany', 'France', 'Spain']
colors = {
    'Germany': 'crimson',
    'Spain': 'darkorange',
    'France':'royalblue'
}
top_countries = final_1plus_df[final_1plus_df['Country'].isin(countries)]

print(top_countries['Country'].dtype)


object


In [129]:
top_countries['Country'] = top_countries['Country'].astype(str)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [130]:
pivot_df = top_countries.pivot(index='Year', columns='Country', values='NumTrips').reset_index()
pivot_df

Country,Year,France,Germany,Spain
0,2012,450050272,495047972,274627767
1,2013,449900009,497164037,264701470
2,2014,452521191,473819894,255866630
3,2015,398313334,495751892,272822318
4,2016,392020852,511296156,290946608
5,2017,441549618,487154218,305415622
6,2018,440459824,535758223,310580348
7,2019,433136304,521035772,308633215
8,2020,338064748,329191395,162442754
9,2021,413350505,404983918,233549351


In [136]:
fig = go.Figure()

for country in countries:
    fig.add_trace(go.Bar(
        x = pivot_df['Year'],
        y = pivot_df[country],
        name = country,
        marker_color = colors[country]
    ))

fig.update_layout(
    barmode = 'group',
    title = 'Number of Trips for Top 3 Most Traveled Countries by Year (1 night or over)',
    xaxis_title = 'Year',
    yaxis_title = 'Number of Trips'
)

fig.show()