In [2]:
import pandas as pd
import altair as alt
from vega_datasets import data

In [3]:
df = pd.read_excel('./data/unwto-tourism-industries-data.xlsx', usecols = 'A,B,E:AE')
df_coords = pd.read_csv('./data/GoogleDevCountryGeoCoords.csv')
df.rename(columns={'Basic data and indicators':'Country','Unnamed: 1':'Statistics'}, inplace=True)

In [4]:
for i in range(0, len(df)-1, 8):
    for j in range(1,8):
        df.loc[i+j,'Country'] = df.loc[i,'Country']

df['Country'] = df['Country'].str.title()
df['Country_key'] = df['Country'].str.lower()
df_coords['Country_key2'] = df_coords['name'].str.lower()

df_merged = df.merge(right = df_coords, how='left', left_on = 'Country_key', right_on = 'Country_key2')
df_merged.tail(20)

df_merged['Country'] = df_merged['name'] #Change country names to appropriate format
df_merged.drop(columns=['Country_key','Country_key2','country','name'], inplace=True)

In [5]:
# Test code to ensure that country names are consistent between the two datasets as much as possible

df_merge_test = df.merge(right = df_coords, how='left', left_on = 'Country_key', right_on = 'Country_key2')
out = df[~df['Country_key'].isin(df_coords['Country_key2'])]
out['Country'].unique()

array(['Bonaire', 'Curaçao', 'Eswatini', 'Macao', 'Montenegro', 'Reunion',
       'Saba', 'Sao Tome And Principe', 'Sint Eustatius',
       'Sint Maarten (Dutch Part)', 'South Sudan', 'State Of Palestine',
       'United States Virgin Islands', nan], dtype=object)

In [150]:
df_bed_places_coords = df_merged[df_merged['Statistics'] == 'Number of bed-places'].copy()
df_bed_places = df_merged[df_merged['Statistics'] == 'Number of bed-places'].copy()

df_bed_places.drop(columns=['latitude','longitude'], inplace=True)
df_bed_places.drop(columns='Statistics', inplace=True)
df_bed_places = pd.melt(df_bed_places.loc[:,:], id_vars='Country', var_name='Year',value_name='Number of bed-places')
df_bed_places['Number of bed-places'] = df_bed_places['Number of bed-places'].replace('..', '0')

df_bed_places_coords = df_merged[df_merged['Statistics'] == 'Number of bed-places']
df_bed_places_coords.drop(columns='Statistics', inplace=True)
df_bed_places_coords = pd.melt(df_bed_places_coords.loc[:,:], id_vars=['Country','latitude','longitude'], var_name='Year',value_name='Number of bed-places')
df_bed_places_coords.dropna(inplace=True)
df_bed_places_coords['dataAvailable'] = (df_bed_places_coords['Number of bed-places'] != '..')
df_bed_places.sample(50)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Country,Year,Number of bed-places
5344,Uruguay,2018,26233
4847,Russia,2016,1046000
4088,Georgia,2013,32165
4834,Pakistan,2016,0
1651,India,2002,170962
504,Dominican Republic,1997,96263
4868,Somalia,2016,0
687,Belarus,1998,24920
2058,Cuba,2004,85605
1279,Russia,2000,346100


In [86]:
# this goes at the top of the notebook
def to_altair(x):    
    x.to_json('chart.json', orient='records', date_format='iso')
    return('chart.json')
pd.DataFrame.to_altair = to_altair # attach to DataFrame objects

countries = list(df_bed_places['Country'].unique())

country_checkbox = alt.binding_select(options=countries)
country_selector = alt.selection_single(
    fields=['Country'],
    init = {'Country':countries[1]},
    bind = country_checkbox,
    name='Country'
)

mouseSelection = alt.selection_single(encodings = ['x'], nearest=True, on='mouseover', empty='none')
opacityCondition = alt.condition(mouseSelection, alt.value(1), alt.value(0))

bedPlaceChart = alt.Chart(df_bed_places.to_altair()).mark_line().encode(
    x = alt.X('Year:O'),
    y = alt.Y('Number of bed-places:Q'),
    color = alt.Color('Country:N'),
).transform_filter(
    country_selector
).add_selection(
    country_selector,
    click_selector
).properties(
    width=600,
    height=400
)

interactionDots = alt.Chart(df_bed_places.to_altair()).mark_point(size=90).encode(
    x = alt.X('Year:O'),
    y = alt.Y('Number of bed-places:Q'),
    opacity = opacityCondition
).transform_filter(
    country_selector,
).transform_filter(
    click_selector,
)

verticalLine = alt.Chart(df_bed_places.to_altair()).mark_rule(size=2, color='black', strokeDash=[15,15]).encode(
    x = alt.X('Year:O'),
    y = alt.Y('Number of bed-places:Q'),
    opacity=opacityCondition
).transform_filter(
    country_selector,
).add_selection(
    mouseSelection
)

textLabels = interactionDots.mark_text(
    align='left',
    fontSize=14,
    dx = 7, 
).encode(
    alt.Text('Number of bed-places:Q', formatType='number'),
    opacity = opacityCondition
)



bedPlaceChart + interactionDots + verticalLine + textLabels

In [145]:
# this goes at the top of the notebook
def to_altair(x):    
    x.to_json('chart.json', orient='records', date_format='iso')
    return('chart.json')
pd.DataFrame.to_altair = to_altair # attach to DataFrame objects

countries = list(df_bed_places['Country'].unique())

country_checkbox = alt.binding_select(options=countries)
country_selector = alt.selection_single(
    fields=['Country'],
    init = {'Country':countries[1]},
    bind = country_checkbox,
    name='Country'
)

mouseSelection = alt.selection_single(encodings = ['x'], nearest=True, on='mouseover', empty='none')
opacityCondition = alt.condition(mouseSelection, alt.value(1), alt.value(0))

click_selector = alt.selection_multi(fields=['Country'])
# click_selector = alt.selection_interval()

bedPlaceChart = alt.Chart(df_bed_places.to_altair()).mark_line().encode(
    x = alt.X('Year:O'),
    y = alt.Y('Number of bed-places:Q'),
    color = alt.Color('Country:N'),
).transform_filter(
    country_selector | click_selector
).add_selection(
    country_selector,
    click_selector
).properties(
    width=600,
    height=400
)

interactionDots = alt.Chart(df_bed_places.to_altair()).mark_point(size=90).encode(
    x = alt.X('Year:O'),
    y = alt.Y('Number of bed-places:Q'),
    color = alt.Color('Country:N'),
    opacity = opacityCondition
).transform_filter(
    country_selector | click_selector
)

verticalLine = alt.Chart(df_bed_places.to_altair()).mark_rule(size=2, color='black', strokeDash=[15,15]).encode(
    x = alt.X('Year:O'),
    y = alt.Y('Number of bed-places:Q'),
    opacity=opacityCondition
).transform_filter(
    country_selector | click_selector
).add_selection(
    mouseSelection
)

textLabels = interactionDots.mark_text(
    align='left',
    fontSize=14,
    dx = 7, 
).encode(
    alt.Text('Number of bed-places:Q', formatType='number'),
    opacity = opacityCondition
)







countries_url = data.world_110m.url
countries = alt.topo_feature(countries_url, 'countries')

slider = alt.binding_range(min=1995, max=2021, step=1, name='Year: ')
year_selector = alt.selection_single(
    name='year selector',
    fields=['Year'],
    bind=slider,
    init={'Year': 2021}
)



worldMap = alt.Chart(countries).mark_geoshape(
    fill = '#F2F3F4',
    stroke = 'white',
    strokeWidth = 0.5
).properties(
    width = 900,
    height = 500,
).project(
    'naturalEarth1'
)

circles = alt.Chart(df_bed_places_coords.to_altair()).mark_circle(size=100).encode(
    latitude='latitude:Q',
    longitude='longitude:Q',
    tooltip=['Country:N','Year:O','Number of bed-places:Q'],
    color='Number of bed-places:Q',
    opacity=alt.condition(click_selector, alt.value(1), alt.value(0.4)),
    size=alt.condition(click_selector, alt.value(200), alt.value(100))
).transform_filter(
    year_selector
).add_selection(
    click_selector,
    year_selector
)

circlesNoData = alt.Chart(df_bed_places_coords.to_altair()).mark_circle(size=100).encode(
    latitude='latitude:Q',
    longitude='longitude:Q',
    tooltip=['Country:N','Year:O','Number of bed-places:Q'],
    color= alt.value('lightgray'),
    opacity=alt.condition(click_selector, alt.value(1), alt.value(0.4)),
    size=alt.condition(click_selector, alt.value(200), alt.value(150))
).transform_filter(
    year_selector
).transform_filter(
    alt.datum.dataAvailable == False
)

(worldMap + circles + circlesNoData) & (bedPlaceChart + interactionDots + verticalLine + textLabels)


In [9]:
data.world_110m

<vega_datasets.core.World_110M at 0x7f90d0c91828>