In [None]:
# Install packages we need
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
import seaborn as sns
import requests
from bs4 import BeautifulSoup

In [None]:
# First download our UGS data from our csv file 
data= pd.read_csv('UGSdata.csv')
data

Unnamed: 0,Country,UGS
0,Finland,5.73
1,Iceland,5.47
2,Lithuania,5.46
3,New Zealand,5.33
4,Slovenia,5.32
5,Croatia,5.23
6,Montenegro,5.21
7,Italy,5.17
8,Slovakia,5.16
9,Estonia,5.15


In [None]:
# Download data from the World Bank on GDP per capita using API 

!pip install world-bank-data
import world_bank_data as wb

gdpcapita= pd.DataFrame(wb.get_series('NY.GDP.PCAP.CD'))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Edit the data

gdpcapita.to_csv('gdpcapita.csv')
gdpcapita = pd.read_csv('gdpcapita.csv')
gdpcapita = gdpcapita.drop('Series', axis=1)
gdpcapita = gdpcapita[gdpcapita.Year > 2010]
gdpcapita = gdpcapita.dropna()
gdpcapita = gdpcapita.sort_values(by=['Country', 'Year'])
gdpcapita = gdpcapita.drop_duplicates(subset=['Country'], keep='last')
gdpcapita = gdpcapita.set_index('Country')
gdpcapita.columns = ["Year", "GDP per Capita"]
gdpcapita= gdpcapita.drop('Year', axis=1)

In [None]:
# Merge this data 

datamerge1 = pd.merge(data, gdpcapita, how='left', on='Country')

In [None]:
datamerge1

Unnamed: 0,Country,UGS,GDP per Capita
0,Finland,5.73,53654.750296
1,Iceland,5.47,68727.636665
2,Lithuania,5.46,23723.340251
3,New Zealand,5.33,48781.026633
4,Slovenia,5.32,29291.400623
5,Croatia,5.23,17685.325283
6,Montenegro,5.21,9465.703999
7,Italy,5.17,35657.497563
8,Slovakia,5.16,
9,Estonia,5.15,27943.70122


In [None]:
# Now Scrape the happiness score 

wiki_list=pd.read_html('https://en.wikipedia.org/wiki/World_Happiness_Report')
len(wiki_list)

23

In [None]:
table=wiki_list[12]
table

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.809,1.285,1.500,0.961,0.662,0.160,0.478
1,2,Denmark,7.646,1.327,1.503,0.979,0.665,0.243,0.495
2,3,Switzerland,7.560,1.391,1.472,1.041,0.629,0.269,0.408
3,4,Iceland,7.504,1.327,1.548,1.001,0.662,0.362,0.145
4,5,Norway,7.488,1.424,1.495,1.008,0.670,0.288,0.434
...,...,...,...,...,...,...,...,...,...
148,149,Central African Republic,3.476,0.041,0.000,0.000,0.293,0.254,0.028
149,150,Rwanda,3.312,0.343,0.523,0.572,0.604,0.236,0.486
150,151,Zimbabwe,3.299,0.426,1.048,0.375,0.377,0.151,0.081
151,152,South Sudan,2.817,0.289,0.553,0.209,0.066,0.210,0.111


In [None]:
table=table[['Country or region','Score']]
table.columns = ["Country", "Score"]
table

Unnamed: 0,Country,Score
0,Finland,7.809
1,Denmark,7.646
2,Switzerland,7.560
3,Iceland,7.504
4,Norway,7.488
...,...,...
148,Central African Republic,3.476
149,Rwanda,3.312
150,Zimbabwe,3.299
151,South Sudan,2.817


In [None]:
# Now merge this into data

datamerge2 = pd.merge(datamerge1, table, how='left', on='Country')
datamerge2

Unnamed: 0,Country,UGS,GDP per Capita,Score
0,Finland,5.73,53654.750296,7.809
1,Iceland,5.47,68727.636665,7.504
2,Lithuania,5.46,23723.340251,6.215
3,New Zealand,5.33,48781.026633,7.3
4,Slovenia,5.32,29291.400623,6.363
5,Croatia,5.23,17685.325283,5.505
6,Montenegro,5.21,9465.703999,5.546
7,Italy,5.17,35657.497563,6.387
8,Slovakia,5.16,,6.281
9,Estonia,5.15,27943.70122,6.022


In [None]:
# Drop NaN values

datamerge2=datamerge2.dropna()
datamerge2

Unnamed: 0,Country,UGS,GDP per Capita,Score
0,Finland,5.73,53654.750296,7.809
1,Iceland,5.47,68727.636665,7.504
2,Lithuania,5.46,23723.340251,6.215
3,New Zealand,5.33,48781.026633,7.3
4,Slovenia,5.32,29291.400623,6.363
5,Croatia,5.23,17685.325283,5.505
6,Montenegro,5.21,9465.703999,5.546
7,Italy,5.17,35657.497563,6.387
9,Estonia,5.15,27943.70122,6.022
10,United States,5.13,70248.629,6.94


In [None]:
# Clean data

datamerge2['GDP Rank'] = datamerge2.groupby('Country')['GDP per Capita'].rank(ascending=False)

datamerge2=datamerge2.sort_values("GDP per Capita",ascending=False)

In [None]:
# Save Data

datamerge2.to_csv("happinessdata.csv")

