In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# UN Datasets

### Inbounds 

In [16]:
mapeo = pd.read_excel('data/raw/mapeo.xlsx')
inbounds_path = 'data/raw/unwto-inbound-arrivals-data.xlsx'

df = pd.read_excel(
    inbounds_path,
    skiprows = 5,
    skipfooter = 8
)

In [17]:
df['Unnamed: 5'] = df['Unnamed: 5'].combine_first(df['Unnamed: 6'])

In [18]:
df = (
df
    .rename(
        columns = {
            'Basic data and indicators': 'country',
            'Unnamed: 5': 'variable'
        }
    )
    .drop(['C.', 'S.', 'C. & S.', 'Units', 'Notes', 'Series', 'Unnamed: 39', 'Unnamed: 4', 'Unnamed: 6', 'Unnamed: 7'], axis = 1)
    .assign(
        country = lambda x: x['country'].ffill()
    )
    .query('variable.isin(["Overnights visitors (tourists)", "Total arrivals"])')
    .melt(
        id_vars = ['country', 'variable'],
        var_name = 'year',
        value_name = 'inbounds'
    )
    .replace({
        'inbounds': {
            '..': np.nan
        }
    })
    .assign(
        inbounds = lambda x: x['inbounds'] * 1_000
    )
)


In [19]:
df = (df
    .pivot(
        index = ['country', 'year'],
        columns = 'variable',
        values = 'inbounds'
    )
    .rename(
        columns = {
            'Overnights visitors (tourists)': 'inbounds_tourists',
            'Total arrivals': 'inbounds_total'
        }
    )
    .reset_index()
)


In [20]:
# añadimos la columna región
inbounds = pd.merge(df, mapeo, on = 'country')

In [21]:
# inbounds.to_excel('data/processed/inbounds.xlsx')

In [22]:
inbounds.country.nunique()

223

Fixing countries with empty values

In [23]:
inbounds.query('year == 2022')

Unnamed: 0,country,year,inbounds_tourists,inbounds_total,region
27,AFGHANISTAN,2022,,,South Asia
55,ALBANIA,2022,7104700.0,7543800.0,Europe
83,ALGERIA,2022,,1398000.0,Africa
111,AMERICAN SAMOA,2022,,,East Asia and the Pacific
139,ANDORRA,2022,3555100.0,8426700.0,Europe
...,...,...,...,...,...
6131,"VENEZUELA, BOLIVARIAN REPUBLIC OF",2022,,,Americas
6159,VIET NAM,2022,,3661000.0,East Asia and the Pacific
6187,YEMEN,2022,,,Middle East
6215,ZAMBIA,2022,,,Africa


In [24]:
missing_countries = df[['inbounds_tourists', 'country']].groupby('country').count().reset_index().sort_values(by = 'inbounds_tourists')

print('complete countries:')
missing_countries.query('inbounds_tourists == 28').count()

complete countries:


variable
country              108
inbounds_tourists    108
dtype: int64

In [25]:
# countries with no data
missing_countries.query('inbounds_tourists == 0')

variable,country,inbounds_tourists
0,AFGHANISTAN,0
216,UZBEKISTAN,0
196,TAIWAN PROVINCE OF CHINA,0
187,SOUTH SUDAN,0
185,SOMALIA,0
138,NAURU,0
104,"KOREA, REPUBLIC OF",0
103,"KOREA, DEMOCRATIC PEOPLE´S REPUBLIC OF",0
98,JAPAN,0
93,IRAQ,0


In [26]:
df[['inbounds_tourists', 'country']].groupby('country').count().reset_index().sort_values(by = 'inbounds_tourists').query('inbounds_tourists == 28')

variable,country,inbounds_tourists
47,COOK ISLANDS,28
51,CUBA,28
189,SRI LANKA,28
168,SAINT LUCIA,28
188,SPAIN,28
...,...,...
140,NETHERLANDS,28
139,NEPAL,28
137,NAMIBIA,28
150,OMAN,28


### Arrivals by Regions

In [27]:
region_path = 'data/raw/unwto-inbound-arrivals-by-region-data.xlsx'

regions = pd.read_excel(
    region_path,
    skiprows = 5,
    skipfooter = 8
)

In [28]:
regions['Unnamed: 5'] = regions['Unnamed: 5'].combine_first(regions['Unnamed: 6'])

In [29]:
regions = (
regions
    .rename(
        columns = {
            'Basic data and indicators': 'country',
            'Unnamed: 5': 'variable'
        }
    )
    .drop(['C.', 'S.', 'C. & S.', 'Units', 'Notes', 'Series', 'Unnamed: 39', 'Unnamed: 4', 'Unnamed: 6', 'Unnamed: 7'], axis = 1)
    .assign(
        country = lambda x: x['country'].ffill()
    )
    .query('variable.isin(["Total", "Africa", "Americas", "Europe", "Middle East", "East Asia and the Pacific", "South Asia", "Other not classified", "Total arrivals"])')
    .melt(
        id_vars = ['country', 'variable'],
        var_name = 'year',
        value_name = 'inbounds'
    )
    .replace({
        'inbounds': {
            '..': np.nan
        }
    })
    .assign(
        inbounds = lambda x: x['inbounds'] * 1_000
    )
)


In [30]:
regions = (regions
    .pivot(
        index = ['country', 'year'],
        columns = 'variable',
        values = 'inbounds'
    )
    .rename(
        columns = {
            'Total': 'region_total',
        }
    )
    .reset_index()
)


In [31]:
regions

variable,country,year,Africa,Americas,East Asia and the Pacific,Europe,Middle East,Other not classified,South Asia,region_total
0,AFGHANISTAN,1995,,,,,,,,
1,AFGHANISTAN,1996,,,,,,,,
2,AFGHANISTAN,1997,,,,,,,,
3,AFGHANISTAN,1998,,,,,,,,
4,AFGHANISTAN,1999,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
6239,ZIMBABWE,2018,2064000.0,120300.0,141900.0,237200.0,2400.0,,14100.0,2579900.0
6240,ZIMBABWE,2019,1871900.0,101200.0,113400.0,190500.0,4400.0,,13000.0,2294400.0
6241,ZIMBABWE,2020,567700.0,17100.0,14200.0,37450.0,300.0,,2600.0,639350.0
6242,ZIMBABWE,2021,269200.0,20700.0,17100.0,70600.0,300.0,,2800.0,380700.0


In [368]:
regions = pd.merge(regions, mapeo, on = 'country')

In [129]:
# regions.to_excel('data/processed/arrivals_by_region.xlsx')

In [257]:
# añadimos la columna región
df = pd.merge(inbounds, mapeo, on = 'country')

In [104]:
df.country.nunique(
)

223

### Purpose of Trip

In [32]:
purpose_path = 'data/raw/unwto-inbound-arrivals-by-main-purpose-data.xlsx'

In [33]:
df_purpose = pd.read_excel(
    purpose_path,
    skiprows = 5,
    skipfooter = 8
)


In [34]:
df_purpose = (
df_purpose
    .rename(
        columns = {
            'Basic data and indicators': 'country',
            'Unnamed: 6': 'variable'
        }
    )
    .drop(['C.', 'S.', 'C. & S.', 'Units', 'Notes', 'Series', 'Unnamed: 39', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 7'], axis = 1)
    .assign(
        country = lambda x: x['country'].ffill()
    )
    .query('variable.isin(["Personal", "Business and professional"])')
    .melt(
        id_vars = ['country', 'variable'],
        var_name = 'year',
        value_name = 'purpose'
    )
    .replace({
        'purpose': {
            '..': np.nan
        }
    })
    .assign(
        purpose = lambda x: x['purpose'] * 1_000
    )
)


In [35]:
df_purpose = (df_purpose
    .pivot(
        index = ['country', 'year'],
        columns = 'variable',
        values = 'purpose'
    )
    .rename(
        columns = {
            'Personal': 'personal_tourists',
            'Business and professional': 'business_tourists'
        }
    )
    .reset_index()
)


In [36]:
df_purpose

variable,country,year,business_tourists,personal_tourists
0,AFGHANISTAN,1995,,
1,AFGHANISTAN,1996,,
2,AFGHANISTAN,1997,,
3,AFGHANISTAN,1998,,
4,AFGHANISTAN,1999,,
...,...,...,...,...
6239,ZIMBABWE,2018,302400.0,2277500.0
6240,ZIMBABWE,2019,220600.0,2073700.0
6241,ZIMBABWE,2020,100600.0,538700.0
6242,ZIMBABWE,2021,105700.0,275100.0


In [144]:
# df_purpose.to_excel('data/processed/purpose_of_trip.xlsx')

In [37]:
df = pd.merge(df, df_purpose, on = ['country', 'year'], how = 'left')

In [119]:
# df.to_parquet('./data/inbounds.parquet')

### Arrival method

In [150]:
transport_path = 'data/raw/unwto-inbound-arrivals-by-transport-data.xlsx'

df_transport = pd.read_excel(
    transport_path,
    skiprows = 5,
    skipfooter = 8
)

In [152]:
df_transport['Unnamed: 5'] = df_transport['Unnamed: 5'].combine_first(df_transport['Unnamed: 6'])

In [155]:
df_transport = (
df_transport
    .rename(
        columns = {
            'Basic data and indicators': 'country',
            'Unnamed: 5': 'variable'
        }
    )
    .drop(['C.', 'S.', 'C. & S.', 'Units', 'Notes', 'Series', 'Unnamed: 39', 'Unnamed: 4', 'Unnamed: 6', 'Unnamed: 7'], axis = 1)
    .assign(
        country = lambda x: x['country'].ffill()
    )
    .query('variable.isin(["Total", "Air", "Water", "Land"])')
    .melt(
        id_vars = ['country', 'variable'],
        var_name = 'year',
        value_name = 'inbounds'
    )
    .replace({
        'inbounds': {
            '..': np.nan
        }
    })
    # solo añadimos este assign para variables de inbounds, ya que están en thousands
    .assign(
        inbounds = lambda x: x['inbounds'] * 1_000
    )
)

In [158]:
df_transport = (df_transport
    .pivot(
        index = ['country', 'year'],
        columns = 'variable',
        values = 'inbounds'
    )
    .rename(
        columns = {
            'Total': 'inbounds_total',
            'Air': 'inbounds_air',
            'Water': 'inbounds_water',
            'Land': 'inbounds_land'
        }
    )
    .reset_index()
)

In [161]:
df_transport.to_excel('data/processed/arrival_method.xlsx')

### Industries / Accomodation
"Number of establishments", "Number of rooms", "Number of bed-places", "Occupancy rate / rooms", "Occupancy rate / bed-places", "Average length of stay"

In [38]:
industries_path = 'data/raw/unwto-tourism-industries-data.xlsx'

df_industries = pd.read_excel(
    industries_path,
    skiprows = 5,
    skipfooter = 4
)

In [None]:
# nos sobra porque solo necesitamos el Unnamed 5
# df_industries['Unnamed: 5'] = df_industries['Unnamed: 5'].combine_first(df_industries['Unnamed: 6'])

In [39]:
df_industries = (
df_industries
    .rename(
        columns = {
            'Basic data and indicators': 'country',
            'Unnamed: 5': 'variable'
        }
    )
    .drop(['C.', 'S.', 'C. & S.', 'Units', 'Notes','Unnamed: 38', 'Unnamed: 4', 'Unnamed: 6', 'Unnamed: 7'], axis = 1)
    .assign(
        country = lambda x: x['country'].ffill()
    )
    .query('variable.isin(["Number of establishments", "Number of rooms", "Number of bed-places", "Occupancy rate / rooms", "Occupancy rate / bed-places", "Average length of stay"])')
    .melt(
        id_vars = ['country', 'variable'],
        var_name = 'year',
        value_name = 'inbounds'
    )
    .replace({
        'inbounds': {
            '..': np.nan
        }
    })

)

In [40]:
df_industries = (df_industries
    .pivot(
        index = ['country', 'year'],
        columns = 'variable',
        values = 'inbounds'
    )
    .rename(
        columns = {
            "Number of establishments": 'establishments_number', 
            "Number of rooms": 'rooms_number', 
            "Number of bed-places": 'beds_number', 
            "Occupancy rate / rooms": 'rooms_occupancy_rate', 
            "Occupancy rate / bed-places": 'beds_occupancy_rate', 
            "Average length of stay": 'avg_length_stay'
        }
    )
    .reset_index()
)

In [41]:
df_industries

variable,country,year,avg_length_stay,beds_number,establishments_number,rooms_number,beds_occupancy_rate,rooms_occupancy_rate
0,AFGHANISTAN,1995,,,,,,
1,AFGHANISTAN,1996,,,,,,
2,AFGHANISTAN,1997,,,,,,
3,AFGHANISTAN,1998,,,,,,
4,AFGHANISTAN,1999,,,,,,
...,...,...,...,...,...,...,...,...
6239,ZIMBABWE,2018,3.0,12772.0,96.0,6483.0,39.0,53.0
6240,ZIMBABWE,2019,3.0,13158.0,97.0,6676.0,32.0,44.0
6241,ZIMBABWE,2020,3.0,14551.0,95.0,7217.0,15.0,19.0
6242,ZIMBABWE,2021,3.0,14551.0,95.0,7217.0,18.0,27.0


In [42]:
# df_industries.to_excel('data/processed/accomodation_variables.xlsx')

# Age Segmentation

In [2]:
age_segmentation_path = ('data/raw/age_segmentation_data.xlsx')

df_age_segmentation = (
    pd.read_excel(
        age_segmentation_path,
        sheet_name = 'Estimates',
        skiprows = 16,
        usecols = 'C, K, L:AF',
)
.rename(
    columns = {
        'Region, subregion, country or area *': 'country',
        'Year': 'year'
    }
)
)
df_age_segmentation = df_age_segmentation.drop(df_age_segmentation.index[:1732])

In [3]:
df_age = (
    pd.read_excel(
        age_segmentation_path,
        sheet_name = 'Medium variant',
        skiprows = 16,
        usecols = 'C, K, L:AF',
)
.rename(
    columns = {
        'Region, subregion, country or area *': 'country',
        'Year': 'year'
    }
)
)

df_age = df_age.drop(df_age.index[:1742])

In [4]:
df = pd.concat([df_age_segmentation, df_age])

In [5]:
countries_to_drop = ['AFRICA', 'EUROPE', 'ASIA', 'LATIN AMERICA AND THE CARIBBEAN', 'NORTHERN AMERICA', 'OCEANIA',
                     'Northern Europe', 'Northern Africa', 'Western Africa', 'Western Asia', 'Western Europe', 
                     'Southern Africa', 'Southern Asia', 'Southern Europe', 'South America', 'Central Asia', 'Central America'
                     'Eastern Asia', 'South-Eastern Asia', 'Eastern Europe', 'Eastern Africa']

df_filtered = df[~df['country'].isin(countries_to_drop)]

In [6]:
df_filtered['total'] = df_filtered['0-4'] + df_filtered['5-9'] + df_filtered['10-14'] + df_filtered['15-19'] + df_filtered['20-24'] + df_filtered['25-29'] + df_filtered['30-34'] + df_filtered['35-39'] + df_filtered['40-44'] + df_filtered['45-49'] + df_filtered['50-54'] + df_filtered['55-59'] + df_filtered['60-64'] + df_filtered['65-69'] + df_filtered['70-74'] + df_filtered['75-79'] + df_filtered['80-84'] + df_filtered['85-89']
df_filtered['seniors'] = df_filtered['65-69'] + df_filtered['70-74'] + df_filtered['75-79'] + df_filtered['80-84'] + df_filtered['85-89']
df_filtered['senior_%'] = df_filtered['seniors'] / df_filtered['total'] * 100

In [7]:
# multiply filtered values by a 1000
df = pd.DataFrame(df_filtered * 1000)
df['year'] = df['year'] / 1000
df['country'] = df_filtered['country']

In [10]:
age = df.query('year < 2025').query('year > 1994')

In [11]:
age.to_excel('data/processed/age_variable.xlsx')

# Safety Index

In [344]:
year = 2008
gpi = pd.DataFrame()

while year < 2024:
    path = f'./data/raw/safety vars/GPI_2023_{year}-1.csv'
    df = pd.read_csv(path)
    gpi = pd.concat([gpi, df])
    year +=1

gpi.drop([col for col in gpi.columns if 'band_' in col], axis = 1, inplace = True)
gpi.rename(columns = {'index_over': 'peace_index'}, inplace=True)
gpi.replace('Czech Republic', 'Czechia', inplace=True)

In [357]:
gpi.query('year == 2023').query('peace_index > 0')['peace_index'].mean()

2.070771566374233

In [236]:
year = 2011
gti = pd.DataFrame()

while year < 2024:
    path = f'./data/raw/safety vars/GTI_{year}_24.csv'
    df = pd.read_csv(path)
    gti = pd.concat([gti, df])
    year +=1

# gti.drop(['band_over', 'band_inci', 'band_fata', 'band_inju', 'band_prop'], axis = 1, inplace = True)
gti.drop([col for col in gti.columns if 'band_' in col], axis = 1, inplace = True)
gti.rename(columns = {'index_over': 'terrorism_index'}, inplace=True)

In [238]:
gti

Unnamed: 0,code,name,year,rank,terrorism_index,index_inci,index_fata,index_inju,index_prop
0,AFG,Afghanistan,2011,3,8.784,580,1561,2528,132
1,AGO,Angola,2011,66,1.741,0,0,0,0
2,ALB,Albania,2011,92,0.423,0,0,0,0
3,ARE,United Arab Emirates,2011,114,0.000,0,0,0,0
4,ARG,Argentina,2011,60,1.962,2,0,0,0
...,...,...,...,...,...,...,...,...,...
158,XKO,Kosovo,2023,63,1.218,1,1,1,0
159,YEM,Yemen,2023,23,4.951,22,41,46,0
160,ZAF,South Africa,2023,89,0.000,0,0,0,0
161,ZMB,Zambia,2023,89,0.000,0,0,0,0


In [None]:
# gpi.to_excel('./data/raw/safety vars/global_peace_index.xlsx')
# gti.to_excel('./data/raw/safety vars/global_terrorism_index.xlsx')

In [239]:
# joining the peace index and the terrorism index in one df
safety = pd.merge(gpi[['name', 'year', 'peace_index']], gti[['name', 'year', 'terrorism_index']], on = ['name', 'year'], how = 'outer').rename(columns = {'name': 'country'}).sort_values(by = 'year')

In [240]:
# checking all countries merged properly
result = safety.query('year > 2010').query('peace_index > 0')[['peace_index', 'terrorism_index']].isna().any(axis=1)
rows_with_missing_values = safety.query('year > 2010').query('peace_index > 0')[result]
rows_with_missing_values

Unnamed: 0,country,year,peace_index,terrorism_index


In [None]:
safety.to_excel('data/raw/safety vars/safety_variables.xlsx')

# Michelin Stars by Country

In [388]:
df_mich = pd.read_csv('data/raw/michelin-stars-by-country---michelin-star-restaurants-by-country-2024.csv')

In [389]:
df_mich.head()

Unnamed: 0,country,MichelinStarsTotalRestaurantsWithMichelinStars2023,MichelinStars1StarRestaurantsTotal2023,MichelinStars2StarRestaurantsTotal2023,MichelinStars3StarRestaurantsTotal2023,MichelinStarsGreenStarRestaurantsTotal2023
0,China,122,102,16.0,4.0,2.0
1,United States,223,177,33.0,13.0,13.0
2,Brazil,13,10,3.0,,
3,Japan,414,321,72.0,21.0,27.0
4,Vietnam,4,4,,,


In [391]:
df_mich = df_mich.rename(columns = {'MichelinStarsTotalRestaurantsWithMichelinStars2023' : 'michelin_stars'})

In [393]:
df_mich.to_excel('data/processed/michelin_variable.xlsx')

# World Heritage Sites UNESCO

In [383]:
df_wh = pd.read_excel('data/raw/whc-sites-2023.xls')
df_wh = df_wh[['unique_number', 'name_en', 'short_description_en', 'longitude', 'latitude', 'category', 'states_name_en', 'region_en', 'iso_code']]

In [384]:
# Splitting the values in the 'Country' column by comma
df_wh['country'] = df_wh['states_name_en'].str.split(',')

# Exploding the DataFrame to duplicate rows based on the number of elements after splitting
df_wh = df_wh.explode('country').reset_index(drop=True)

In [385]:
df_wh

Unnamed: 0,unique_number,name_en,short_description_en,longitude,latitude,category,states_name_en,region_en,iso_code,country
0,230,Cultural Landscape and Archaeological Remains ...,<p>The cultural landscape and archaeological r...,67.825250,34.846940,Cultural,Afghanistan,Asia and the Pacific,af,Afghanistan
1,234,Minaret and Archaeological Remains of Jam,"<p>The 65m-tall Minaret of Jam is a graceful, ...",64.515889,34.396417,Cultural,Afghanistan,Asia and the Pacific,af,Afghanistan
2,1590,Historic Centres of Berat and Gjirokastra,<p>Berat and Gjirokastra are inscribed as rare...,20.140833,40.074167,Cultural,Albania,Europe and North America,al,Albania
3,1563,Butrint,"<p>Inhabited since prehistoric times, Butrint ...",20.020950,39.745732,Cultural,Albania,Europe and North America,al,Albania
4,111,Al Qal'a of Beni Hammad,<p>In a mountainous site of extraordinary beau...,4.786840,35.818440,Cultural,Algeria,Arab States,dz,Algeria
...,...,...,...,...,...,...,...,...,...,...
1295,2435,Silk Roads: Zarafshan-Karakum Corridor,The Zarafshan-Karakum Corridor is a key sectio...,69.685558,39.441489,Cultural,"Tajikistan,Turkmenistan,Uzbekistan",Asia and the Pacific,"tj,tm,uz",Tajikistan
1296,2435,Silk Roads: Zarafshan-Karakum Corridor,The Zarafshan-Karakum Corridor is a key sectio...,69.685558,39.441489,Cultural,"Tajikistan,Turkmenistan,Uzbekistan",Asia and the Pacific,"tj,tm,uz",Turkmenistan
1297,2435,Silk Roads: Zarafshan-Karakum Corridor,The Zarafshan-Karakum Corridor is a key sectio...,69.685558,39.441489,Cultural,"Tajikistan,Turkmenistan,Uzbekistan",Asia and the Pacific,"tj,tm,uz",Uzbekistan
1298,593,Mosi-oa-Tunya / Victoria Falls,<p>These are among the most spectacular waterf...,25.855390,-17.924530,Natural,"Zambia,Zimbabwe",Africa,"zm,zw",Zambia


In [386]:
sites = df_wh[['name_en', 'country']].groupby('country').count().reset_index().rename(columns = {'name_en': 'wh_sites'})

In [387]:
df_wh = pd.merge(df_wh, sites, on = 'country', how = 'left')
df_wh

Unnamed: 0,unique_number,name_en,short_description_en,longitude,latitude,category,states_name_en,region_en,iso_code,country,wh_sites
0,230,Cultural Landscape and Archaeological Remains ...,<p>The cultural landscape and archaeological r...,67.825250,34.846940,Cultural,Afghanistan,Asia and the Pacific,af,Afghanistan,2
1,234,Minaret and Archaeological Remains of Jam,"<p>The 65m-tall Minaret of Jam is a graceful, ...",64.515889,34.396417,Cultural,Afghanistan,Asia and the Pacific,af,Afghanistan,2
2,1590,Historic Centres of Berat and Gjirokastra,<p>Berat and Gjirokastra are inscribed as rare...,20.140833,40.074167,Cultural,Albania,Europe and North America,al,Albania,4
3,1563,Butrint,"<p>Inhabited since prehistoric times, Butrint ...",20.020950,39.745732,Cultural,Albania,Europe and North America,al,Albania,4
4,111,Al Qal'a of Beni Hammad,<p>In a mountainous site of extraordinary beau...,4.786840,35.818440,Cultural,Algeria,Arab States,dz,Algeria,7
...,...,...,...,...,...,...,...,...,...,...,...
1295,2435,Silk Roads: Zarafshan-Karakum Corridor,The Zarafshan-Karakum Corridor is a key sectio...,69.685558,39.441489,Cultural,"Tajikistan,Turkmenistan,Uzbekistan",Asia and the Pacific,"tj,tm,uz",Tajikistan,4
1296,2435,Silk Roads: Zarafshan-Karakum Corridor,The Zarafshan-Karakum Corridor is a key sectio...,69.685558,39.441489,Cultural,"Tajikistan,Turkmenistan,Uzbekistan",Asia and the Pacific,"tj,tm,uz",Turkmenistan,5
1297,2435,Silk Roads: Zarafshan-Karakum Corridor,The Zarafshan-Karakum Corridor is a key sectio...,69.685558,39.441489,Cultural,"Tajikistan,Turkmenistan,Uzbekistan",Asia and the Pacific,"tj,tm,uz",Uzbekistan,7
1298,593,Mosi-oa-Tunya / Victoria Falls,<p>These are among the most spectacular waterf...,25.855390,-17.924530,Natural,"Zambia,Zimbabwe",Africa,"zm,zw",Zambia,1


In [233]:
df_wh.to_excel('data/processed/world_heritage_variable.xlsx')