# World Happiness and Economic Freedom

In [1]:
import pandas as pd
import dwfunctions as dw
import requests
import json
from pandas.io.json import json_normalize

In [2]:
ef_2019_raw = pd.read_csv('economic_freedom_index2019.csv', encoding='latin-1')
dw.strip_column(ef_2019_raw, 'Country')
ef_2019_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 186 entries, 0 to 185
Data columns (total 34 columns):
CountryID                      186 non-null int64
Country Name                   186 non-null object
WEBNAME                        186 non-null object
Region                         186 non-null object
World Rank                     180 non-null float64
Region Rank                    180 non-null float64
2019 Score                     180 non-null float64
Property Rights                185 non-null float64
Judical Effectiveness          185 non-null float64
Government Integrity           185 non-null float64
Tax Burden                     180 non-null float64
Gov't Spending                 183 non-null float64
Fiscal Health                  183 non-null float64
Business Freedom               185 non-null float64
Labor Freedom                  184 non-null float64
Monetary Freedom               184 non-null float64
Trade Freedom                  182 non-null float64
Investment Freed

In [3]:
ef_2019_raw.head()

Unnamed: 0,CountryID,Country Name,WEBNAME,Region,World Rank,Region Rank,2019 Score,Property Rights,Judical Effectiveness,Government Integrity,...,Country,Population (Millions),"GDP (Billions, PPP)",GDP Growth Rate (%),5 Year GDP Growth Rate (%),GDP per Capita (PPP),Unemployment (%),Inflation (%),FDI Inflow (Millions),Public Debt (% of GDP)
0,1,Afghanistan,Afghanistan,Asia-Pacific,152.0,39.0,51.5,19.6,29.6,25.2,...,Afghanistan,35.5,$69.6,2.5,2.9,"$1,958",8.8,5.0,53.9,7.3
1,2,Albania,Albania,Europe,52.0,27.0,66.5,54.8,30.6,40.4,...,Albania,2.9,$36.0,3.9,2.5,"$12,507",13.9,2.0,1119.1,71.2
2,3,Algeria,Algeria,Middle East and North Africa,171.0,14.0,46.2,31.6,36.2,28.9,...,Algeria,41.5,$632.9,2.0,3.1,"$15,237",10.0,5.6,1203.0,25.8
3,4,Angola,Angola,Sub-Saharan Africa,156.0,33.0,50.6,35.9,26.6,20.5,...,Angola,28.2,$190.3,0.7,2.9,"$6,753",8.2,31.7,-2254.5,65.3
4,5,Argentina,Argentina,Americas,148.0,26.0,52.2,47.8,44.5,33.5,...,Argentina,44.1,$920.2,2.9,0.7,"$20,876",8.7,25.7,11857.0,52.6


In [4]:
ef_2019 = ef_2019_raw[['Country', '2019 Score']].rename(columns={'2019 Score':'Economic Freedom Score'}).sort_values(by=['Country'])

ef_2019.head()

Unnamed: 0,Country,Economic Freedom Score
0,Afghanistan,51.5
1,Albania,66.5
2,Algeria,46.2
3,Angola,50.6
4,Argentina,52.2


In [5]:
#2019 World Happiness Data is the 4th <table> element on page
#wh_2019_raw = pd.read_html('https://en.wikipedia.org/wiki/World_Happiness_Report')[4]
#wh_2019_raw.to_csv('worldHappiness2019.csv', index=False)

In [6]:
wh_2019 = pd.read_csv('worldHappiness2019.csv')
dw.strip_column(wh_2019, 'Country or region')
wh_2019.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [7]:
wh_2019 = wh_2019[['Country or region','Score']].rename(columns={'Country or region':'Country', 'Score':'Happiness Score'}).sort_values(by=['Country']).reset_index(drop=True)
wh_2019.head()

Unnamed: 0,Country,Happiness Score
0,Afghanistan,3.203
1,Albania,4.719
2,Algeria,5.211
3,Argentina,6.086
4,Armenia,4.559


In [8]:
ef_2019.info()
wh_2019.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 186 entries, 0 to 185
Data columns (total 2 columns):
Country                   186 non-null object
Economic Freedom Score    180 non-null float64
dtypes: float64(1), object(1)
memory usage: 4.4+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 2 columns):
Country            156 non-null object
Happiness Score    156 non-null float64
dtypes: float64(1), object(1)
memory usage: 2.6+ KB


In [9]:
wh_countries = dw.get_countries(wh_2019)
ef_countries = dw.get_countries(ef_2019)

dw.diff_df(wh_countries, ef_countries, 'WH', 'EF', False)

Unnamed: 0,Unnamed: 1,Country
WH,30,Congo (Brazzaville)
WH,31,Congo (Kinshasa)
WH,35,Czechia
WH,56,Hong Kong
WH,66,Ivory Coast
...,...,...
EF,166,Timor-Leste
EF,168,Tonga
EF,169,Trinidad and Tobago
EF,177,United States


In [10]:
newCountryName = {38:wh_countries.loc[30,'Country'], 37:wh_countries.loc[31,'Country'],
                  44:wh_countries.loc[35,'Country'], 72:wh_countries.loc[56,'Country'],
                  40:wh_countries.loc[66,'Country'], 92:wh_countries.loc[74,'Country'],
                  93:wh_countries.loc[75,'Country'], 25:wh_countries.loc[96,'Country'],
                  103:wh_countries.loc[104,'Country'], 150:wh_countries.loc[124,'Country'],
                  89:wh_countries.loc[128,'Country'], 157:wh_countries.loc[129,'Country'],
                  169:wh_countries.loc[140,'Country'], 177:wh_countries.loc[148,'Country'],
                 }

In [11]:
for key, value in newCountryName.items():
    dw.change_name(ef_2019, key, 'Country', value) 

In [12]:
wh_countries = dw.get_countries(wh_2019)
ef_countries = dw.get_countries(ef_2019)

In [13]:
discrepencies = dw.diff_df(wh_countries, ef_countries, 'WH', 'EF', False)

In [14]:
wh_2019 = dw.remove_country(discrepencies, 'WH', wh_2019)
ef_2019 = dw.remove_country(discrepencies, 'EF', ef_2019)

In [15]:
wh_countries = dw.get_countries(wh_2019)
ef_countries = dw.get_countries(ef_2019)
dw.diff_df(wh_countries, ef_countries, 'WH', 'EF', False)

Unnamed: 0,Unnamed: 1,Country


In [16]:
wh_ef = dw.merge_df(wh_2019, ef_2019, 'Country', 'left')
wh_ef.info()
wh_ef.head(10)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 154 entries, 0 to 153
Data columns (total 3 columns):
Country                   154 non-null object
Happiness Score           154 non-null float64
Economic Freedom Score    149 non-null float64
dtypes: float64(2), object(1)
memory usage: 4.8+ KB


Unnamed: 0,Country,Happiness Score,Economic Freedom Score
0,Afghanistan,3.203,51.5
1,Albania,4.719,66.5
2,Algeria,5.211,46.2
3,Argentina,6.086,52.2
4,Armenia,4.559,67.7
5,Australia,7.228,80.9
6,Austria,7.246,72.0
7,Azerbaijan,5.208,65.4
8,Bahrain,6.199,66.4
9,Bangladesh,4.456,55.6


In [17]:
wh_ef_2019 = wh_ef.dropna()
wh_ef_2019.info()
wh_ef_2019.head(10)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 149 entries, 0 to 153
Data columns (total 3 columns):
Country                   149 non-null object
Happiness Score           149 non-null float64
Economic Freedom Score    149 non-null float64
dtypes: float64(2), object(1)
memory usage: 4.7+ KB


Unnamed: 0,Country,Happiness Score,Economic Freedom Score
0,Afghanistan,3.203,51.5
1,Albania,4.719,66.5
2,Algeria,5.211,46.2
3,Argentina,6.086,52.2
4,Armenia,4.559,67.7
5,Australia,7.228,80.9
6,Austria,7.246,72.0
7,Azerbaijan,5.208,65.4
8,Bahrain,6.199,66.4
9,Bangladesh,4.456,55.6


## Add Region Column

In [18]:
regions = pd.read_csv("worldHappiness2015.csv", usecols=['Country','Region']).sort_values(by=['Country']).reset_index(drop=True)
dw.strip_column(regions, 'Country')

In [19]:
regions.head()

Unnamed: 0,Country,Region
0,Afghanistan,Southern Asia
1,Albania,Central and Eastern Europe
2,Algeria,Middle East and Northern Africa
3,Angola,Sub-Saharan Africa
4,Argentina,Latin America and Caribbean


In [20]:
wh_countries = dw.get_countries(wh_ef_2019)
region_countries = dw.get_countries(regions)

dw.diff_df(wh_countries, region_countries, 'WH', 'R', False)

Unnamed: 0,Unnamed: 1,Country
WH,35,Czechia
WH,42,Eswatini
WH,47,Gambia
WH,97,Namibia
WH,104,North Macedonia
WH,127,South Sudan
WH,138,Trinidad & Tobago
WH,146,United States of America
R,3,Angola
R,36,Czech Republic


In [22]:
newCountryName = {36:wh_countries.loc[35,'Country'], 133:wh_countries.loc[42,'Country'],
                  83:wh_countries.loc[104,'Country'], 131:wh_countries.loc[127,'Country'],
                  142:wh_countries.loc[138,'Country'], 150:wh_countries.loc[146,'Country']
                 }

In [23]:
for key, value in newCountryName.items():
    dw.change_name(regions, key, 'Country', value)

In [24]:
wh_countries = dw.get_countries(wh_ef_2019)
region_countries = dw.get_countries(regions)

discrepencies = dw.diff_df(wh_countries, region_countries, 'WH', 'R', False)

In [25]:
regions = dw.remove_country(discrepencies, 'R', regions)

In [26]:
wh_countries = dw.get_countries(wh_ef_2019)
region_countries = dw.get_countries(regions)

dw.diff_df(wh_countries, region_countries, 'WH', 'R', False)

Unnamed: 0,Unnamed: 1,Country
WH,47,Gambia
WH,97,Namibia


In [27]:
wh_ef_2019 = dw.merge_df(wh_ef_2019, regions, 'Country', 'left')
wh_ef_2019.info()
wh_ef_2019.head(10)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 149 entries, 0 to 148
Data columns (total 4 columns):
Country                   149 non-null object
Happiness Score           149 non-null float64
Economic Freedom Score    149 non-null float64
Region                    147 non-null object
dtypes: float64(2), object(2)
memory usage: 5.8+ KB


Unnamed: 0,Country,Happiness Score,Economic Freedom Score,Region
0,Afghanistan,3.203,51.5,Southern Asia
1,Albania,4.719,66.5,Central and Eastern Europe
2,Algeria,5.211,46.2,Middle East and Northern Africa
3,Argentina,6.086,52.2,Latin America and Caribbean
4,Armenia,4.559,67.7,Central and Eastern Europe
5,Australia,7.228,80.9,Australia and New Zealand
6,Austria,7.246,72.0,Western Europe
7,Azerbaijan,5.208,65.4,Central and Eastern Europe
8,Bahrain,6.199,66.4,Middle East and Northern Africa
9,Bangladesh,4.456,55.6,Southern Asia


In [28]:
#Add region for Gambia and Namibia
wh_ef_2019.Region[47] = 'Sub-Saharan Africa'
wh_ef_2019.Region[95] = 'Sub-Saharan Africa'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [29]:
wh_ef_2019.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 149 entries, 0 to 148
Data columns (total 4 columns):
Country                   149 non-null object
Happiness Score           149 non-null float64
Economic Freedom Score    149 non-null float64
Region                    149 non-null object
dtypes: float64(2), object(2)
memory usage: 10.8+ KB


## Add ISO 3 Country Codes to Dataframe

In [30]:
#Remove '#' to save .csv file to local drive
#iso_3 = pd.read_html('https://www.iban.com/country-codes')[0]
#iso_3 = iso_3[['Country', 'Alpha-3 code']]
#iso_3.to_csv('iso_3.csv', index=False)
iso_3 = pd.read_csv('iso_3.csv')

In [31]:
wh_countries = dw.get_countries(wh_ef_2019)
iso_countries = dw.get_countries(iso_3)
diff = dw.diff_df(wh_countries, iso_countries, 'WH', 'ISO', False)

In [32]:
diff.head(60)

Unnamed: 0,Unnamed: 1,Country
WH,14,Bolivia
WH,24,Central African Republic
WH,29,Comoros
WH,30,Congo (Brazzaville)
WH,31,Congo (Kinshasa)
WH,37,Dominican Republic
WH,47,Gambia
WH,61,Iran
WH,65,Ivory Coast
WH,71,Kosovo


In [36]:
diff.iloc[60:92]

Unnamed: 0,Unnamed: 1,Country
ISO,72,Falkland Islands (the) [Malvinas]
ISO,73,Faroe Islands (the)
ISO,74,Fiji
ISO,77,French Guiana
ISO,78,French Polynesia
ISO,79,French Southern Territories (the)
ISO,81,Gambia (the)
ISO,85,Gibraltar
ISO,87,Greenland
ISO,88,Grenada


In [37]:
diff.tail(60)

Unnamed: 0,Unnamed: 1,Country
ISO,143,Mayotte
ISO,145,Micronesia (Federated States of)
ISO,146,Moldova (the Republic of)
ISO,147,Monaco
ISO,150,Montserrat
ISO,155,Nauru
ISO,157,Netherlands (the)
ISO,158,New Caledonia
ISO,161,Niger (the)
ISO,163,Niue


In [38]:
newCountryName = {26:wh_countries.loc[14,'Country'], 42:wh_countries.loc[24,'Country'],
                  49:wh_countries.loc[29,'Country'], 50:wh_countries.loc[31,'Country'],
                  51:wh_countries.loc[30,'Country'], 63:wh_countries.loc[37,'Country'],
                  81:wh_countries.loc[47,'Country'], 105:wh_countries.loc[61,'Country'],
                  54:wh_countries.loc[65,'Country'], 122:wh_countries.loc[74,'Country'],
                  146:wh_countries.loc[89,'Country'], 157:wh_countries.loc[97,'Country'],
                  161:wh_countries.loc[100,'Country'], 132:wh_countries.loc[102,'Country'],
                  175:wh_countries.loc[108,'Country'], 183:wh_countries.loc[113,'Country'],
                  119:wh_countries.loc[123,'Country'], 217:wh_countries.loc[129,'Country'],
                  219:wh_countries.loc[131,'Country'], 225:wh_countries.loc[134,'Country'],
                  233:wh_countries.loc[140,'Country'], 234:wh_countries.loc[141,'Country'],
                  236:wh_countries.loc[142,'Country'], 240:wh_countries.loc[145,'Country'],
                  241:wh_countries.loc[146,'Country']
                 }

In [39]:
for key, value in newCountryName.items():
    dw.change_name(iso_3, key, 'Country', value)

In [40]:
wh_countries = dw.get_countries(wh_ef_2019)
iso_countries = dw.get_countries(iso_3)
dw.diff_df(wh_countries, iso_countries, 'WH', 'ISO', False)

Unnamed: 0,Unnamed: 1,Country
WH,71,Kosovo
ISO,1,Åland Islands
ISO,4,American Samoa
ISO,5,Andorra
ISO,6,Angola
ISO,...,...
ISO,242,Virgin Islands (British)
ISO,243,Virgin Islands (U.S.)
ISO,244,Wallis and Futuna
ISO,245,Western Sahara


In [41]:
wh_countries = dw.get_countries(wh_ef_2019)
iso_countries = dw.get_countries(iso_3)

In [42]:
discrepencies = dw.diff_df(wh_countries, iso_countries, 'WH', 'ISO', False)

In [43]:
iso_3 = dw.remove_country(discrepencies, 'ISO', iso_3)

In [44]:
wh_countries = dw.get_countries(wh_ef_2019)
iso_countries = dw.get_countries(iso_3)
dw.diff_df(wh_countries, iso_countries, 'WH', 'ISO', False)

Unnamed: 0,Unnamed: 1,Country
WH,71,Kosovo


In [45]:
wh_ef_2019.head()

Unnamed: 0,Country,Happiness Score,Economic Freedom Score,Region
0,Afghanistan,3.203,51.5,Southern Asia
1,Albania,4.719,66.5,Central and Eastern Europe
2,Algeria,5.211,46.2,Middle East and Northern Africa
3,Argentina,6.086,52.2,Latin America and Caribbean
4,Armenia,4.559,67.7,Central and Eastern Europe


In [46]:
wh_ef_2019 = dw.merge_df(wh_ef_2019, iso_3, 'Country', 'left')

In [47]:
wh_ef_2019 = wh_ef_2019[['Country', 'Alpha-3 code', 'Region', 'Happiness Score', 'Economic Freedom Score']]
wh_ef_2019.head(10)

Unnamed: 0,Country,Alpha-3 code,Region,Happiness Score,Economic Freedom Score
0,Afghanistan,AFG,Southern Asia,3.203,51.5
1,Albania,ALB,Central and Eastern Europe,4.719,66.5
2,Algeria,DZA,Middle East and Northern Africa,5.211,46.2
3,Argentina,ARG,Latin America and Caribbean,6.086,52.2
4,Armenia,ARM,Central and Eastern Europe,4.559,67.7
5,Australia,AUS,Australia and New Zealand,7.228,80.9
6,Austria,AUT,Western Europe,7.246,72.0
7,Azerbaijan,AZE,Central and Eastern Europe,5.208,65.4
8,Bahrain,BHR,Middle East and Northern Africa,6.199,66.4
9,Bangladesh,BGD,Southern Asia,4.456,55.6


In [48]:
wh_ef_2019.loc[71, 'Alpha-3 code'] = 'KSV'

In [49]:
wh_ef_2019.query('Country=="Kosovo"')

Unnamed: 0,Country,Alpha-3 code,Region,Happiness Score,Economic Freedom Score
71,Kosovo,KSV,Central and Eastern Europe,6.1,67.0


In [50]:
#Remove '#' to save .csv file to local drive
#wh_ef_2019.to_csv('wh_ef_final.csv', index=False)