# World Happiness and Population Density

In [1]:
import pandas as pd
import dwfunctions as dw

In [2]:
popden_2017 = pd.read_csv('populationDensity.csv', usecols=['country', 'Population density (per km2, 2017)']).rename(columns={'country':'Country', 'Population density (per km2, 2017)':'Population Density'})
dw.strip_column(popden_2017, 'Country')
popden_2017.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229 entries, 0 to 228
Data columns (total 2 columns):
Country               229 non-null object
Population Density    229 non-null float64
dtypes: float64(1), object(1)
memory usage: 3.7+ KB


In [3]:
popden_2017.head()

Unnamed: 0,Country,Population Density
0,Afghanistan,54.4
1,Albania,106.9
2,Algeria,17.3
3,American Samoa,278.2
4,Andorra,163.8


In [4]:
wh_2017 = pd.read_csv('worldHappiness2017.csv', usecols=['Country', 'Happiness Score']).sort_values(by=['Country']).reset_index(drop=True)
dw.strip_column(wh_2017, 'Country')
wh_2017.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155 entries, 0 to 154
Data columns (total 2 columns):
Country            155 non-null object
Happiness Score    155 non-null float64
dtypes: float64(1), object(1)
memory usage: 2.5+ KB


In [5]:
wh_2017.head()

Unnamed: 0,Country,Happiness Score
0,Afghanistan,3.794
1,Albania,4.644
2,Algeria,5.872
3,Angola,3.795
4,Argentina,6.599


In [6]:
wh_countries = dw.get_countries(wh_2017)
popden_countries = dw.get_countries(popden_2017)

dw.diff_df(wh_countries, popden_countries, 'WH', 'PD', False)

Unnamed: 0,Unnamed: 1,Country
WH,16,Bolivia
WH,31,Congo (Brazzaville)
WH,32,Congo (Kinshasa)
WH,36,Czech Republic
WH,55,"Hong Kong S.A.R., China"
...,...,...
PD,221,Vanuatu
PD,222,Venezuela (Bolivarian Republic of)
PD,223,Viet Nam
PD,224,Wallis and Futuna Islands


In [7]:
newCountryName = {24:wh_countries.loc[16,'Country'], 56:wh_countries.loc[32,'Country'], 
                  48:wh_countries.loc[31,'Country'], 54:wh_countries.loc[36,'Country'],
                  43:wh_countries.loc[55,'Country'], 97:wh_countries.loc[60,'Country'],
                  201:wh_countries.loc[81,'Country'], 165:wh_countries.loc[90,'Country'],
                  192:wh_countries.loc[106,'Country'], 167:wh_countries.loc[115,'Country'],
                  55:wh_countries.loc[126,'Country'], 198:wh_countries.loc[133,'Country'],
                  216:wh_countries.loc[136,'Country'], 217:wh_countries.loc[147,'Country'],
                  222:wh_countries.loc[150,'Country'], 223:wh_countries.loc[151,'Country']
                 }
for key, value in newCountryName.items():
    dw.change_name(popden_2017, key, 'Country', value) 

In [8]:
wh_countries = dw.get_countries(wh_2017)
popden_countries = dw.get_countries(popden_2017)

In [9]:
discrepencies = dw.diff_df(wh_countries, popden_countries, 'WH', 'PD', False)

wh_2017 = dw.remove_country(discrepencies, 'WH', wh_2017)
popden_2017 = dw.remove_country(discrepencies, 'PD', popden_2017)

In [10]:
wh_countries = dw.get_countries(wh_2017)
popden_countries = dw.get_countries(popden_2017)
dw.diff_df(wh_countries, popden_countries, 'WH', 'PD', False)

Unnamed: 0,Unnamed: 1,Country


In [11]:
wh_popden = dw.merge_df(wh_2017, popden_2017, 'Country', 'left')
wh_popden.info()
wh_popden.head(10)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 151 entries, 0 to 150
Data columns (total 3 columns):
Country               151 non-null object
Happiness Score       151 non-null float64
Population Density    151 non-null float64
dtypes: float64(2), object(1)
memory usage: 4.7+ KB


Unnamed: 0,Country,Happiness Score,Population Density
0,Afghanistan,3.794,54.4
1,Albania,4.644,106.9
2,Algeria,5.872,17.3
3,Angola,3.795,23.9
4,Argentina,6.599,16.2
5,Armenia,4.376,102.9
6,Australia,7.284,3.2
7,Austria,7.006,106.0
8,Azerbaijan,5.234,118.9
9,Bahrain,6.087,1963.9


In [12]:
wh_2015 = pd.read_csv('worldHappiness2015.csv', usecols=['Country', 'Region']).sort_values(by=['Country']).reset_index(drop=True)
dw.strip_column(wh_2017, 'Country')

In [13]:
wh_popden = dw.merge_df(wh_popden, wh_2015, 'Country', 'left')

In [14]:
wh_popden.loc[13, 'Region'] = 'Latin America and Caribbean'
wh_popden.loc[55, 'Region'] = 'Eastern Asia'
wh_popden.loc[94, 'Region'] = 'Sub-Saharan Africa'
wh_popden.loc[121, 'Region'] = 'Sub-Saharan Africa'
wh_popden.loc[124, 'Region'] = 'Sub-Saharan Africa'

## Add ISO 3 Country Codes to Dataframe

In [15]:
iso_3 = pd.read_html('https://www.iban.com/country-codes')[0]
iso_3 = iso_3[['Country', 'Alpha-3 code']]

In [16]:
wh_countries = dw.get_countries(wh_popden)
iso_countries = dw.get_countries(iso_3)
dw.diff_df(wh_countries, iso_countries, 'WH', 'ISO', False)

Unnamed: 0,Unnamed: 1,Country
WH,16,Bolivia
WH,26,Central African Republic
WH,31,Congo (Brazzaville)
WH,32,Congo (Kinshasa)
WH,36,Czech Republic
...,...,...
ISO,241,Viet Nam
ISO,242,Virgin Islands (British)
ISO,243,Virgin Islands (U.S.)
ISO,244,Wallis and Futuna


In [17]:
newCountryName = {26:wh_countries.loc[16,'Country'], 42:wh_countries.loc[26,'Country'],
                  50:wh_countries.loc[32,'Country'], 51:wh_countries.loc[31,'Country'],
                  59:wh_countries.loc[36,'Country'], 63:wh_countries.loc[38,'Country'],
                  100:wh_countries.loc[55,'Country'], 105:wh_countries.loc[60,'Country'],
                  132:wh_countries.loc[79,'Country'], 146:wh_countries.loc[88,'Country'],
                  157:wh_countries.loc[96,'Country'], 161:wh_countries.loc[99,'Country'],
                  170:wh_countries.loc[103,'Country'], 175:wh_countries.loc[107,'Country'],
                  183:wh_countries.loc[112,'Country'], 119:wh_countries.loc[123,'Country'],
                  211:wh_countries.loc[127,'Country'], 216:wh_countries.loc[130,'Country'],
                  219:wh_countries.loc[132,'Country'], 233:wh_countries.loc[141,'Country'],
                  234:wh_countries.loc[142,'Country'], 236:wh_countries.loc[143,'Country'],
                  240:wh_countries.loc[146,'Country'], 241:wh_countries.loc[147,'Country']
                 }
for key, value in newCountryName.items():
    dw.change_name(iso_3, key, 'Country', value)

In [18]:
wh_countries = dw.get_countries(wh_popden)
iso_countries = dw.get_countries(iso_3)
dw.diff_df(wh_countries, iso_countries, 'WH', 'ISO', False)

Unnamed: 0,Unnamed: 1,Country
ISO,1,Åland Islands
ISO,4,American Samoa
ISO,5,Andorra
ISO,7,Anguilla
ISO,8,Antarctica
ISO,...,...
ISO,239,Vanuatu
ISO,242,Virgin Islands (British)
ISO,243,Virgin Islands (U.S.)
ISO,244,Wallis and Futuna


In [19]:
wh_countries = dw.get_countries(wh_popden)
iso_countries = dw.get_countries(iso_3)

In [20]:
discrepencies = dw.diff_df(wh_countries, iso_countries, 'WH', 'ISO', False)

In [21]:
iso_3 = dw.remove_country(discrepencies, 'ISO', iso_3)

In [22]:
wh_countries = dw.get_countries(wh_popden)
iso_countries = dw.get_countries(iso_3)
dw.diff_df(wh_countries, iso_countries, 'WH', 'ISO', False)

Unnamed: 0,Unnamed: 1,Country


In [23]:
wh_popden = dw.merge_df(wh_popden, iso_3, 'Country', 'left')
wh_popden = wh_popden[['Country', 'Alpha-3 code', 'Region', 'Happiness Score', 'Population Density']]
wh_popden.head(10)

Unnamed: 0,Country,Alpha-3 code,Region,Happiness Score,Population Density
0,Afghanistan,AFG,Southern Asia,3.794,54.4
1,Albania,ALB,Central and Eastern Europe,4.644,106.9
2,Algeria,DZA,Middle East and Northern Africa,5.872,17.3
3,Angola,AGO,Sub-Saharan Africa,3.795,23.9
4,Argentina,ARG,Latin America and Caribbean,6.599,16.2
5,Armenia,ARM,Central and Eastern Europe,4.376,102.9
6,Australia,AUS,Australia and New Zealand,7.284,3.2
7,Austria,AUT,Western Europe,7.006,106.0
8,Azerbaijan,AZE,Central and Eastern Europe,5.234,118.9
9,Bahrain,BHR,Middle East and Northern Africa,6.087,1963.9


In [24]:
wh_popden.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 151 entries, 0 to 150
Data columns (total 5 columns):
Country               151 non-null object
Alpha-3 code          151 non-null object
Region                151 non-null object
Happiness Score       151 non-null float64
Population Density    151 non-null float64
dtypes: float64(2), object(3)
memory usage: 7.1+ KB


In [25]:
#Remove '#' to save .csv file to local drive
#wh_popden.to_csv('wh_popden_final.csv', index=False)