In [1]:
import pandas as pd
from pyjstat import pyjstat
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
#url json stat
url_ict = 'http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/tin00074?nace_r2=ICT'
url_percentage_cloud_computing = 'http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/isoc_cicce_use?sizen_r2=M_C10_S951_XK&sizen_r2=L_C10_S951_XK&unit=PC_ENT&indic_is=E_CC'

In [3]:
# ict data
def get_ictdata():
    try:
        # get ict data from the eurostat website
        dataset_ict = pyjstat.Dataset.read(url_ict)

        # write to pandas dataframe
        df = dataset_ict.write('dataframe')

        # selecting columns geo , time , value and renaming them
        df_ict = df[['geo', 'time', 'value']].rename(
            columns={'geo': 'country', 'time': 'year', 'value': 'ict_percentage'})

        # sort the dataframe
        df_ict.sort_index(inplace=True)

        # aggregating data by Country and getting the average for each country over all the years
        df_ict_f = df_ict.fillna(0).groupby(['country']).mean().reset_index()

        # removing rows for European union as individual countries are only relevant for the analysis
        df_ict_final = (df_ict_f[df_ict_f['country'].str.startswith('Euro') == False]).round(decimals=2)

        print(f'\n ICT dataframe:\n', df_ict_final, f'\n')
        
    except Exception as e:
        
        print(e)
        
    return df_ict_final

In [4]:
# cloud computing data
def get_cloud_computing():
    try:
        # get cloud computing data from the eurostat website
        dataset_percentage_cloud_computing = pyjstat.Dataset.read(url_percentage_cloud_computing)

        # write to pandas dataframe
        df_cc = dataset_percentage_cloud_computing.write('dataframe')

        # selecting columns geo, time, value and renaming them
        df_cc_ = df_cc[['geo', 'time', 'value']].rename(
                                columns={'geo': 'country', 'time': 'year', 'value': 'percentage_cc_services'}).fillna(0)

        #  selecting
        df_cc_final = df_cc_.groupby(['country']).mean().reset_index()
        df_cloudcomputing = df_cc_final[(df_cc_final['country'].str.startswith('Euro')) == False & \
                                (df_cc_final['country'].str.startswith('European') == False)].round(decimals=2)
        print(f'Cloud computing dataframe:\n', df_cloudcomputing, f'\n')
    except Exception as e:
        
        print(e)
        
    return df_cloudcomputing


In [5]:
# gdp data
def get_gdp_data():
    try:
        # reading the gdp data
        df_gdp = pd.read_csv('gdp_data.csv', header='infer', delimiter='|', decimal=',')
        
        # not considering countries where Futurice is already present
        df_gdp_ = df_gdp[df_gdp['2008'].str.startswith('Office') == False].fillna(0)
        
        #replacing de culture with en
        df_gdp_['2008'] = df_gdp_['2008'].str.replace(',','.').astype(float)
        
        #
        df_gdp_['avg_gdp'] = df_gdp_.iloc[:, 1:7].mean(axis=1)
        df_gdp_avg = df_gdp_[['Country', 'avg_gdp']].round(decimals=2).rename(columns={'Country': 'country'})

        print(f'gdp dataframe:\n', df_gdp_avg , f'\n')
    except Exception as e:
        print(e)
        
    return df_gdp_avg

In [6]:
df_ict_final = get_ictdata()
df_cloudcomputing = get_cloud_computing()
df_gdp_avg = get_gdp_data()


 ICT dataframe:
                                              country  ict_percentage
0                                            Austria            3.05
1                                            Belgium            3.71
2                             Bosnia and Herzegovina            1.19
3                                           Bulgaria            5.18
4                                            Croatia            4.18
5                                             Cyprus            0.00
6                                            Czechia            4.02
7                                            Denmark            2.31
8                                            Estonia            4.95
10                                           Finland            4.07
11                                            France            3.74
12  Germany (until 1990 former territory of the FRG)            4.14
13                                            Greece            1.82
14              

In [10]:
# calculating attractiveness by countries
# Attractiveness of a country = GDP * percentage of ICT sector from GDP * usage of cloud computing in enterprises in a country
# joining the two euro stats datasets on country names
df_country = pd.merge(df_ict_final, df_cloudcomputing, how='inner', on='country').fillna(0)
df_country.head(5)

# joining the euro stats data with gdp data
df_country_ = pd.merge(df_country, df_gdp_avg, how='inner', on='country').fillna(0)

# calculting the attractive rate
df_country_['attractive_rate'] = (df_country_['ict_percentage'] * df_country_['percentage_cc_services'] \
                                           * df_country_['avg_gdp']).round(decimals=2)

# sorting the countries by attractive rate
df_final_ = df_country_[['country', 'attractive_rate']].round(decimals=2)\
                                                      .sort_values('attractive_rate', ascending=False)
# filtering only relevant countries
df_final = df_final_[df_final_['attractive_rate']>0]

print(f'AttractivenessByCountry Report:\n',df_final)

# Creating the attractive countries report
# writing the output to the csv file
try:
    df_final.to_csv('attractive_countries.csv', index=False)
    print('\n Report generation success !','\n check file attractive_countries.csv')
except Exception as e:
    print(e)


AttractivenessByCountry Report:
                    country  attractive_rate
7                   France        216284.36
11                   Italy        156197.13
22                   Spain         89193.82
0                  Belgium         76592.88
5                  Denmark         34059.26
17                  Poland         32996.42
4                  Czechia         22634.63
9                  Hungary         22490.03
15             Netherlands         12926.29
8                   Greece          9257.05
3                  Croatia          8549.94
20                Slovakia          8353.53
19                 Romania          7406.44
21                Slovenia          5135.11
2                 Bulgaria          4369.62
13               Lithuania          3846.75
14                   Malta          3142.35
6                  Estonia          3077.69
12                  Latvia          2591.20
18                Portugal          1984.22
16         North Macedonia           180.14