In [1]:
import geojson
import json
import pandas as pd

Happiness report data for 2017 were taken from [Kaggle](https://www.kaggle.com/unsdsn/world-happiness). 

In [2]:
happy_data = pd.read_csv('happy_2017.csv')
happy_data.head()

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707
2,Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715
3,Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182


In [3]:
happy_data.shape

(155, 12)

In [4]:
happy_data.dtypes

Country                           object
Happiness.Rank                     int64
Happiness.Score                  float64
Whisker.high                     float64
Whisker.low                      float64
Economy..GDP.per.Capita.         float64
Family                           float64
Health..Life.Expectancy.         float64
Freedom                          float64
Generosity                       float64
Trust..Government.Corruption.    float64
Dystopia.Residual                float64
dtype: object

Table containing ISO-3 country code was taken from [here](https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes/blob/master/all/all.csv).

In [5]:
country_codes = pd.read_csv('country_codes.csv')
country_codes.head()

Unnamed: 0,name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF,Asia,Southern Asia,,142.0,34.0,
1,Åland Islands,AX,ALA,248,ISO 3166-2:AX,Europe,Northern Europe,,150.0,154.0,
2,Albania,AL,ALB,8,ISO 3166-2:AL,Europe,Southern Europe,,150.0,39.0,
3,Algeria,DZ,DZA,12,ISO 3166-2:DZ,Africa,Northern Africa,,2.0,15.0,
4,American Samoa,AS,ASM,16,ISO 3166-2:AS,Oceania,Polynesia,,9.0,61.0,


In [6]:
merged_data = pd.merge(happy_data, country_codes, how='outer', left_on=['Country'], right_on=['name'], indicator=True)
merged_data.head()

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,...,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code,_merge
0,Norway,1.0,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,...,NOR,578.0,ISO 3166-2:NO,Europe,Northern Europe,,150.0,154.0,,both
1,Denmark,2.0,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,...,DNK,208.0,ISO 3166-2:DK,Europe,Northern Europe,,150.0,154.0,,both
2,Iceland,3.0,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,...,ISL,352.0,ISO 3166-2:IS,Europe,Northern Europe,,150.0,154.0,,both
3,Switzerland,4.0,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,...,CHE,756.0,ISO 3166-2:CH,Europe,Western Europe,,150.0,155.0,,both
4,Finland,5.0,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,...,FIN,246.0,ISO 3166-2:FI,Europe,Northern Europe,,150.0,154.0,,both


In [7]:
merged_data[merged_data._merge == 'left_only'].Country.values

array(['United States', 'United Kingdom', 'Czech Republic',
       'Taiwan Province of China', 'Russia', 'South Korea', 'Moldova',
       'Bolivia', 'North Cyprus', 'Hong Kong S.A.R., China', 'Kosovo',
       'Venezuela', 'Macedonia', 'Vietnam', 'Palestinian Territories',
       'Iran', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Ivory Coast',
       'Syria', 'Tanzania'], dtype=object)

In [8]:
merged_data[merged_data._merge == 'right_only'].name.values

array(['Åland Islands', 'American Samoa', 'Andorra', 'Anguilla',
       'Antarctica', 'Antigua and Barbuda', 'Aruba', 'Bahamas',
       'Barbados', 'Bermuda', 'Bolivia (Plurinational State of)',
       'Bonaire, Sint Eustatius and Saba', 'Bouvet Island',
       'British Indian Ocean Territory', 'Brunei Darussalam',
       'Cabo Verde', 'Cayman Islands', 'Christmas Island',
       'Cocos (Keeling) Islands', 'Comoros', 'Congo',
       'Congo, Democratic Republic of the', 'Cook Islands',
       "Côte d'Ivoire", 'Cuba', 'Curaçao', 'Czechia', 'Djibouti',
       'Dominica', 'Equatorial Guinea', 'Eritrea', 'Eswatini',
       'Falkland Islands (Malvinas)', 'Faroe Islands', 'Fiji',
       'French Guiana', 'French Polynesia', 'French Southern Territories',
       'Gambia', 'Gibraltar', 'Greenland', 'Grenada', 'Guadeloupe',
       'Guam', 'Guernsey', 'Guinea-Bissau', 'Guyana',
       'Heard Island and McDonald Islands', 'Holy See', 'Hong Kong',
       'Iran (Islamic Republic of)', 'Isle of Man', 

In [9]:
rename_dict = {
    'United States': 'United States of America', 
    'United Kingdom': 'United Kingdom of Great Britain and Northern Ireland', 
    'Czech Republic': 'Czechia',
    'Taiwan Province of China': 'Taiwan, Province of China', 
    'Russia': 'Russian Federation', 
    'South Korea': 'Korea, Republic of', 
    'Moldova': 'Moldova, Republic of',
    'Bolivia': 'Bolivia (Plurinational State of)', 
    'Hong Kong S.A.R., China': 'Hong Kong', 
    'Venezuela': 'Venezuela (Bolivarian Republic of)', 
    'Macedonia': 'North Macedonia', 
    'Vietnam': 'Viet Nam', 
    'Palestinian Territories': 'Palestine, State of',
    'Iran': 'Iran (Islamic Republic of)', 
    'Congo (Brazzaville)': 'Congo', 
    'Congo (Kinshasa)': 'Congo, Democratic Republic of the', 
    'Ivory Coast': "Côte d'Ivoire",
    'Syria': 'Syrian Arab Republic', 
    'Tanzania': 'Tanzania, United Republic of' 
}
happy_data = happy_data.replace(rename_dict)
happy_data.head()

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,2.313707
2,Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,2.322715
3,Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182


In [10]:
happy_data = happy_data[~happy_data['Country'].isin(['North Cyprus', 'Kosovo'])]

In [11]:
merged_data = pd.merge(happy_data, country_codes, how='left', left_on=['Country'], right_on=['name'], indicator=True)
merged_data.head()

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,...,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code,_merge
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,...,NOR,578,ISO 3166-2:NO,Europe,Northern Europe,,150.0,154.0,,both
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,...,DNK,208,ISO 3166-2:DK,Europe,Northern Europe,,150.0,154.0,,both
2,Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,...,ISL,352,ISO 3166-2:IS,Europe,Northern Europe,,150.0,154.0,,both
3,Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,...,CHE,756,ISO 3166-2:CH,Europe,Western Europe,,150.0,155.0,,both
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,...,FIN,246,ISO 3166-2:FI,Europe,Northern Europe,,150.0,154.0,,both


In [12]:
merged_data[merged_data._merge == 'left_only'].Country.values

array([], dtype=object)

In [13]:
merged_data.shape

(153, 24)

GeoJSON was taken from [here](https://github.com/datasets/geo-countries/blob/master/data/countries.geojson).

In [14]:
with open('countries.geojson', 'r') as f:
    countries_geojson = json.load(f)
countries = {}
for i in range(len(countries_geojson['features'])):
    countries[countries_geojson['features'][i]['properties']['ISO_A3']] = countries_geojson['features'][i]['properties']['ADMIN']

In [15]:
merged_data[~merged_data['alpha-3'].isin(countries.keys())]

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,...,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code,_merge


In [16]:
merged_data.to_csv('happy_2017_cleaned.csv', index=False)

In [17]:
new_geojson = {
    "type": "FeatureCollection",
    "features": []
}
for i in range(len(countries_geojson['features'])):
    new_geojson['features'].append(countries_geojson['features'][i])
    new_geojson['features'][i]['id'] = countries_geojson['features'][i]['properties']['ISO_A3']
    country = countries_geojson['features'][i]['properties']['ISO_A3']
    row = merged_data[merged_data['alpha-3'] == country]
    if len(row) > 0:
        new_geojson['features'][i]['properties']['happiness_rank'] = int(row['Happiness.Rank'].values[0])
        new_geojson['features'][i]['properties']['happiness_score'] = round(row['Happiness.Score'].values[0], 2)
        new_geojson['features'][i]['properties']['gdp_per_capita'] = round(row['Economy..GDP.per.Capita.'].values[0], 2)
    else:
        new_geojson['features'][i]['properties']['happiness_rank'] = None
        new_geojson['features'][i]['properties']['happiness_score'] = None
        new_geojson['features'][i]['properties']['gdp_per_capita'] = None

In [18]:
with open('countries_cleaned.geojson', 'w') as outfile:
      geojson.dump(new_geojson, outfile)