# Europe

### Import Libraries

In [1]:
import pandas as pd
import seaborn as sns
import plotly.graph_objects as go
from ipywidgets import widgets
import numpy as np 
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
from datetime import datetime
from ipywidgets.embed import embed_minimal_html

%matplotlib inline

## Key indicators

**Source: [https://digital-agenda-data.eu/](https://digital-agenda-data.eu/)**

### Key indicators used:
1) Broadband take-up and coverage
- Standard fixed broadband coverage/availability (bb_scov)
- NGA broadband coverage/availability (bb_ngacov)
- Households with a broadband connection (h_broad)
- Households with fixed broadband connection (h_bbfix)

2) Broadband speeds and prices
- Share of fixed broadband subscriptions >= 100 Mbps (bb_speed100)
- Households with no access to Internet at home, because the costs are too high (all households) (h_xcost)

3) Mobile market
- 4G mobile broadband coverage (total) (mbb_ltecov)

4) Internet usage
- Households with access to the Internet at home (h_iacc)
- Individuals who have never used the internet (all individuals) (i_iux)

5) Take up of internet services (all individuals)
- Using online banking (i_iubk)
- Uploading self-created content to be shared (i_iuupl)
- Participating in social networks, over the internet, last 3 months (i_iusnet)
- Looking for a job or sending a job application (i_iujob)
- Doing an online course (i_iuolc)
- Taking part in on-line consultations or voting to define civic or political issues (i_iuvote)

6) eGovernment
- Individuals interacting online with public authorities, last 12 months (i_iugov12)

7) Digital skills
- Individuals with at least basic digital skills (i_dsk_bab)
- Science and technology graduates (st_grad)

8) ICT Specialists
- Individuals who have obtained ICT skills through formal educational institutions (i_skedu)

9) Security and privacy
- Individuals caught a virus or other computer infection resulting in loss of information or time (i_secvir1)
- Security concerns kept individual from ordering or buying goods or services for private use (i_sbgood)

### Preprocessing

In [None]:
key_indicators = pd.read_csv("data/digital-agenda-scoreboard-key-indicators.csv")
# Drop useless columns
key_indicators = key_indicators.drop(['observation', 'flag', 'note'], axis=1)
# Filter out some indicators
my_indicators = ['bb_scov', 'bb_ngacov', 'h_broad', 'h_bbfix', 'bb_speed100', 'h_xcost', 'mbb_ltecov', 'h_iacc', 'i_iux', 'i_iubk', 'i_iuupl', 'i_iusnet', 'i_iujob', 'i_iuolc', 'i_iuvote', 'i_iugov12', 'i_dsk_s_bab', 'i_skedu', 'i_secvir1', 'i_sbgood']
key_indicators = key_indicators[key_indicators['indicator'].isin(my_indicators)]
# Map the indicators' codes to the corresponding name
indicators_mapping = {'bb_scov' : 'Standard fixed broadband coverage/availability',
           "bb_ngacov" : 'NGA broadband coverage/availability',
           'h_broad' : 'Households with a broadband connection',
           'h_bbfix' : 'Households with fixed broadband connection',
           'bb_speed100' : 'Share of fixed broadband subscriptions >= 100 Mbps',
           'h_xcost' : 'Households with no access to Internet at home, because the costs are too high',
           'mbb_ltecov' : '4G mobile broadband coverage',
           'h_iacc' : 'Households with access to the Internet at home',
           'i_iux' : 'Individuals who have never used the internet (all individuals)',
           'i_iubk' : 'Using online banking',
           'i_iuupl' : 'Uploading self-created content to be shared',
           'i_iusnet' : 'Participating in social networks, over the internet, last 3 months',
           'i_iujob' : 'Looking for a job or sending a job application',
           'i_iuolc' : 'Doing an online course',
           'i_iuvote' : 'Taking part in on-line consultations or voting to define civic or political issues',
           'i_iugov12' : 'Individuals interacting online with public authorities, last 12 months',
           'i_dsk_s_bab' : 'Individuals with at least basic digital skills in Software domain',
           'i_skedu' : 'Individuals who have obtained ICT skills through formal educational institutions',
           'i_secvir1' : 'Individuals caught a virus or other computer infection resulting in loss of information or time',
           'i_sbgood' : 'Security concerns kept individual from ordering or buying goods or services for private use',
          }
key_indicators['indicator'] = key_indicators['indicator'].replace(indicators_mapping)

    
# Mapping between 'ref_area' and the name of the country
countries_mapping =  { 'EE' : 'Estonia',
                        'EL' : 'Greece',
                        'ES' : 'Spain',
                        'FI' : 'Finland',
                        'FR' : 'France',
                        'HR' : 'Croatia',
                        'HU' : 'Hungary',
                        'IE' : 'Ireland',
                        'IS' : 'Iceland',
                        'DE' : 'Deutschland',
                        'CZ' : 'Czech Rep.',
                        'DK' : 'Denmark',
                        'IT' : 'Italy',
                        'LT' : 'Lithuania',
                        'LU' : 'Luxembourg',
                        'LV' : 'Latvia',
                        'MT' : 'Malta',
                        'NL' : 'Netherlands',
                        'NO' : 'Norway',
                        'PT' : 'Portugal',
                        'PL' : 'Poland',
                        'EU' : 'Europe',
                        'RO' : 'Romania',
                        'SE' : 'Sweden',
                        'SI' : 'Slovenia',
                        'SK' : 'Slovakia',
                        'UK' : 'United Kingdom',
                        'AT' : 'Austria',
                        'BE' : 'Belgium',
                        'BG' : 'Bulgaria',
                        'CY' : 'Cyprus',
                        'CH' : 'Switzerland'
                     }
key_indicators['ref_area'] = key_indicators['ref_area'].replace(countries_mapping)

# Drop the index column
key_indicators.reset_index(drop=True, inplace=True)

# In some cases the types in 'time_period' are mixed ==> cast all to strings 
key_indicators['time_period'] = key_indicators['time_period'].apply(lambda j: str(j))
key_indicators['time_period'] = key_indicators['time_period'].apply(lambda j: j[:4])
def convert_to_date(j):
   if len(j.split('-')) == 1:
      return str(datetime.strptime(j, '%Y'))
   elif len(j.split('-')) == 2:
      return str(datetime.strptime(j, '%Y-%m'))
   else:
      return str(datetime.strptime(j, '%Y-%m-%d'))
# key_indicators['time_period'] = key_indicators['time_period'].apply(lambda j: convert_to_date(j))

In [None]:
labels = list(key_indicators['indicator'].unique())
key_indicators.head() 


### Plot

In [None]:
#key_indicators['indicator'].unique()
key_indicators['time_period'].unique()

In [None]:
countries_list = [country for country in countries_mapping.values()]

for country in countries_list:
        # Check if there are multiple breakdowns: if so, keep only the 'total' one
        if len(key_indicators['breakdown'].unique()) > 1:
            if len(key_indicators['unit_measure'].unique()) > 1:
                key_indicators = key_indicators.loc[(key_indicators['breakdown'].str.contains('total')) & (key_indicators['unit_measure'] == 'pc_ind')]
            else:
                key_indicators = key_indicators.loc[(data['breakdown'].str.contains('total'))]
                
#key_indicators.loc[key_indicators['time_period'] == '2019'].to_csv('indicators.csv')

In [None]:
year1 = widgets.IntSlider(
    value=2019,
    min=2002,
    max=2019,
    step=1,
    description='Year:',
    continuous_update=True
)

year  = widgets.Dropdown(
    options=list(key_indicators['time_period'].unique()),
    value='2019',
    description='Years: ',
)


container = widgets.HBox([year])

indicators_menu = widgets.Dropdown(
    options=list(key_indicators['indicator'].unique()),
    value='Uploading self-created content to be shared',
    description='Indicators: ',
)

trace1 = go.Bar(x=key_indicators['ref_area'], y = key_indicators['value'], opacity=0.75, textposition='auto')
layout=go.Layout(
                        title=dict(
                            text='Europe digital divide key indicators'
                        ),
                        barmode='overlay',
                        xaxis={'categoryorder':'total descending'}
                    )
g = go.FigureWidget(data=trace1,
                    layout = layout
                    )

In [None]:
def response(change):
    filter_list = [i and j for i, j in
                    zip(key_indicators['time_period'] == year.value,
                    key_indicators['indicator'] == indicators_menu.value)]
    temp_df = key_indicators[filter_list]
    x1 = temp_df['ref_area']
    y1 = temp_df['value']
    #x2 = temp_df['dep_delay']
    with g.batch_update():
        g.data[0].x = x1
        g.data[0].y = y1
        #g.data[1].x = x2
        g.layout.barmode = 'overlay'
        g.layout.xaxis.title = 'Area'
        g.layout.yaxis.title = 'Percentage'


indicators_menu.observe(response, names="value")
year.observe(response, names="value")

In [None]:
container2 = widgets.HBox([indicators_menu])
xd = widgets.VBox([container,
              container2,
              g])
xd

In [None]:
embed_minimal_html('export.html', views=[xd], title='Widgets')


### Experiment (no widget)

In [None]:
 labels = ['Standard fixed broadband coverage/availability',
          'NGA broadband coverage/availability',
          'Households with a broadband connection',
          'Households with fixed broadband connection',
          'Share of fixed broadband subscriptions >= 100 Mbps',
          'Households with no access to Internet at home, because the costs are too high',
          '4G mobile broadband coverage',
          'Households with access to the Internet at home',
          'Individuals who have never used the internet',
          'Individuals using online banking',
          'Individuals uploading self-created content to be shared',
          'Individuals participating in social networks, over the internet, last 3 months',
          'Individuals looking for a job or sending a job application',
          'Individuals doing an online course',
          'Individuals taking part in on-line consultations or voting to define civic or political issues',
          'Individuals interacting online with public authorities, last 12 months',
          'Individuals with at least basic digital skills in Software domain',
          'Individuals who have obtained ICT skills through formal educational institutions']

In [None]:
def generate_rgb_colors():
    palette = sns.color_palette('Paired') + sns.color_palette('Spectral') + sns.color_palette('bright') + sns.color_palette('Dark2')
    return palette

countries_list = [country for country in countries_mapping.values()]

# Get the x,y data of all the countries given a label
def get_data(label):
    x_data = []
    y_data = []
    for country in countries_list:
        data = key_indicators.loc[(key_indicators['indicator'] == label) & (key_indicators['ref_area'] == country)]
        # Check if there are multiple breakdowns: if so, keep only the 'total' one
        if len(data['breakdown'].unique()) > 1:
            if len(data['unit_measure'].unique()) > 1:
                data = data.loc[(data['breakdown'].str.contains('total')) & (data['unit_measure'] == 'pc_ind')]
            else:
                data = data.loc[(data['breakdown'].str.contains('total'))]
        data = data.sort_values('time_period')
        x_data.append(list(data['time_period']))
        y_data.append(list(data['value']))
    return x_data, y_data

current_indicator = labels[0]

# Build Figure
fig = go.FigureWidget()
palette = generate_rgb_colors() # Obtain n different hex colors, where n is the number of countries

for index, country in enumerate(countries_list):
    current_data = key_indicators.loc[(key_indicators['indicator'] == current_indicator) & (key_indicators['ref_area'] == country)]
    # Check if there are multiple breakdowns: if so, keep only the 'total' one
    if len(current_data['breakdown'].unique()) > 1:
        current_data = current_data.loc[current_data['breakdown'].str.contains('total')]
    current_data = current_data.sort_values('time_period')
    #if (country == 'Italy') or (country == 'Europe'):
    visible = True
    #else:
        #visible = 'legendonly'
    fig.add_trace(go.Bar(
        x=list(current_data['ref_area']),
        y=list(current_data['value']),
        name=country,
        visible=visible,
        textposition='auto',
        text=country,
        color="time_period"))

# Create one button for each label
buttons = []
for index, label in enumerate(labels):
    x_data, y_data = get_data(label)
    btn = dict(label=label, method='update', args=[{'x': x_data, 'y': y_data}])
    buttons.append(btn)


fig.update_layout(width=1200, height=600, title='Select an indicator and compare countries',
                  xaxis_title='Country',
                  yaxis_title='Percentage',
                  legend_title_text='Year',
                  showlegend=True,
                  updatemenus=[
                    dict(active=0, buttons=buttons, xanchor='right', yanchor='top', showactive=True, x=1.1, y=1.17)
                  ], xaxis={'categoryorder':'total descending'})

fig.show()

## DESI

**Source: [https://digital-agenda-data.eu/datasets/desi/](https://digital-agenda-data.eu/datasets/desi/)**

### DESI indicators used: https://digital-agenda-data.eu/datasets/desi/indicators
#### Digital Economy and Society Index (desi)
DESI overall index, calculated as the weighted average of the five main DESI dimensions:
- 1 Connectivity (25%) 
- 2 Human Capital (25%) 
- 3 Use of Internet (15%)
- 4 Integration of Digital Technology (20%)
- 5 Digital Public Services (15%) 

#### DESI Dimensions 
#### DESI Sub-dimensions 
#### DESI Individual Indicators 

In [2]:
desi = pd.read_csv("data/DESI.csv")
# Drop useless columns
desi = desi.drop(['observation', 'flag', 'note'], axis=1)
desi = desi.loc[desi['indicator'] == 'desi']


# Mapping between 'ref_area' and the name of the country
countries_mapping =  { 'EE' : 'Estonia',
                        'EL' : 'Greece',
                        'ES' : 'Spain',
                        'FI' : 'Finland',
                        'FR' : 'France',
                        'HR' : 'Croatia',
                        'HU' : 'Hungary',
                        'IE' : 'Ireland',
                        'IS' : 'Iceland',
                        'DE' : 'Deutschland',
                        'CZ' : 'Czech Rep.',
                        'DK' : 'Denmark',
                        'IT' : 'Italy',
                        'LT' : 'Lithuania',
                        'LU' : 'Luxembourg',
                        'LV' : 'Latvia',
                        'MT' : 'Malta',
                        'NL' : 'Netherlands',
                        'NO' : 'Norway',
                        'PT' : 'Portugal',
                        'PL' : 'Poland',
                        'EU' : 'Europe',
                        'RO' : 'Romania',
                        'SE' : 'Sweden',
                        'SI' : 'Slovenia',
                        'SK' : 'Slovakia',
                        'UK' : 'United Kingdom',
                        'AT' : 'Austria',
                        'BE' : 'Belgium',
                        'BG' : 'Bulgaria',
                        'CY' : 'Cyprus',
                        'CH' : 'Switzerland'
                     }
desi['ref_area'] = desi['ref_area'].replace(countries_mapping)

# Mapping between 'ref_area' and the name of the country
bd_mapping =  { 'desi_1_conn' : 'Connectivity',
                        'desi_2_hc' : 'Human Capital',
                        'desi_3_ui' : 'Use of Internet Services',
                        'desi_4_idt' : 'Integration of Digital Technology',
                        'desi_5_dps' : 'Digital Public Services'
                     }
desi['breakdown'] = desi['breakdown'].replace(bd_mapping)

# Drop the index column
desi.reset_index(drop=True, inplace=True)

In [42]:
import plotly.express as px
desi = desi.sort_values('time_period')

fig = px.bar(desi, x="ref_area", y="value", animation_frame="time_period",
         color="breakdown", range_y=[0,80], barmode = 'relative', animation_group = 'ref_area',
            hover_name = 'value', hover_data = {'value' : False, 'ref_area' : False, "time_period" : False, 'breakdown' : False })

fig.update_layout(width = 1200, height = 600, title='Select a year and compare the countries',
                  xaxis_title='Country',
                  yaxis_title='Value',
                  legend_title_text='Indicators:',
                  showlegend=True,
                  xaxis={'categoryorder':'total descending', 'tickangle' : 60, 'showgrid':False},
                  margin=dict(l=20, r=10, t=50, b=200), hovermode="x"
                 )
#fig.add_trace(go.Bar(desi.loc[desi['ref_area'] == 'Europe']))

'''def update_trace(trace):
    totx = []
    toty = []


    for a in range(5):
        x = []
        y = []
        for id in trace.ids:
            if id == "Europe":
                x.append(3)
                y.append("black")
            else:
                x.append(0)
                y.append("black")
        totx.append(x)
        toty.append(y)


    for i in range(5):
        trace.marker.line.width = totx[i]
        trace.marker.line.color = toty[i]
'''

fig['layout']['sliders'][0]['pad']=dict(r= 40, t= 130)
fig['layout']['sliders'][0]['currentvalue']['prefix']= 'Year: '
fig['layout']['sliders'][0]['steps'][1]['args'][1]['redraw']= False

print(fig['layout']['sliders'])

# animation button
#fig["layout"].pop("updatemenus") # optional, drop animation buttons
fig['layout']['updatemenus'][0]['pad']=dict(r= 30, t= 150)
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["frame"] = 200

#fig.update_traces(hovertemplate='%{y}%', overwrite=True)
#fig.for_each_trace(lambda trace: update_trace(trace))

fig.show()

(layout.Slider({
    'active': 0,
    'currentvalue': {'prefix': 'Year: '},
    'len': 0.9,
    'pad': {'r': 40, 't': 130},
    'steps': [{'args': [['2015'], {'frame': {'duration': 0, 'redraw': True},
                        'mode': 'immediate', 'fromcurrent': True, 'transition':
                        {'duration': 0, 'easing': 'linear'}}],
               'label': '2015',
               'method': 'animate'},
              {'args': [['2016'], {'frame': {'duration': 0, 'redraw': True},
                        'mode': 'immediate', 'fromcurrent': True, 'transition':
                        {'duration': 0, 'easing': 'linear'}, 'redraw': False}],
               'label': '2016',
               'method': 'animate'},
              {'args': [['2017'], {'frame': {'duration': 0, 'redraw': True},
                        'mode': 'immediate', 'fromcurrent': True, 'transition':
                        {'duration': 0, 'easing': 'linear'}}],
               'label': '2017',
               'method': 'ani

In [None]:
desi.head()

In [None]:
# PLOTLY
fig2 = go.Figure()

bds = desi['breakdown'].unique()
desi = desi.sort_values('time_period')






for bd in bds:
    print(desi.loc[desi['breakdown'] == bd])
    fig2.add_trace(go.Bar(
        visible = False,
        x=desi['ref_area'].unique(),
        y=desi.loc[desi['breakdown'] == bd]['value'],
        name=bd
    ))
        

# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig2.update_layout(width = 1200, height = 600, title='Select a year and compare the countries',
                  xaxis_title='Country',
                  yaxis_title='Value',
                  legend_title_text='Indicators:',
                  showlegend=True,
                  xaxis={'categoryorder':'total descending', 'tickangle' : 60, 'showgrid':False},
                  margin=dict(l=20, r=10, t=50, b=200), hovermode="x", barmode = 'relative'
                 )

fig2.update_traces(hovertemplate='%{y}')



fig2.show()

In [43]:
fig.write_html("barchart.html", auto_play = False)