In [1]:
# Dependencies
import pandas as pd

In [2]:
# Obtaining a list of all the countries and their associated three letter codes
countries = pd.read_html('https://countrycode.org/')

In [3]:
# Viewing the Dataframe
countries

[               COUNTRY COUNTRY CODE ISO CODES  POPULATION  AREA KM2  \
 0          Afghanistan           93  AF / AFG    29121286    647500   
 1              Albania          355  AL / ALB     2986952     28748   
 2              Algeria          213  DZ / DZA    34586184   2381740   
 3       American Samoa        1-684  AS / ASM       57881       199   
 4              Andorra          376  AD / AND       84000       468   
 ..                 ...          ...       ...         ...       ...   
 235  Wallis and Futuna          681  WF / WLF       16025       274   
 236     Western Sahara          212  EH / ESH      273008    266000   
 237              Yemen          967  YE / YEM    23495361    527970   
 238             Zambia          260  ZM / ZMB    13460305    752614   
 239           Zimbabwe          263  ZW / ZWE    11651858    390580   
 
           GDP $USD  
 0    20.65 Billion  
 1     12.8 Billion  
 2    215.7 Billion  
 3    462.2 Million  
 4      4.8 Billion  
 .

In [4]:
# Determing the type for the country variable
type(countries)

list

In [5]:
# Obtain the first key of the list
countries_list = countries[0]
countries_list

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES,POPULATION,AREA KM2,GDP $USD
0,Afghanistan,93,AF / AFG,29121286,647500,20.65 Billion
1,Albania,355,AL / ALB,2986952,28748,12.8 Billion
2,Algeria,213,DZ / DZA,34586184,2381740,215.7 Billion
3,American Samoa,1-684,AS / ASM,57881,199,462.2 Million
4,Andorra,376,AD / AND,84000,468,4.8 Billion
...,...,...,...,...,...,...
235,Wallis and Futuna,681,WF / WLF,16025,274,
236,Western Sahara,212,EH / ESH,273008,266000,
237,Yemen,967,YE / YEM,23495361,527970,43.89 Billion
238,Zambia,260,ZM / ZMB,13460305,752614,22.24 Billion


In [6]:
# Splitting the column relating to ISO CODES
iso_code_split = countries_list['ISO CODES'].str.split("/",n=1, expand=True)
iso_code_split

Unnamed: 0,0,1
0,AF,AFG
1,AL,ALB
2,DZ,DZA
3,AS,ASM
4,AD,AND
...,...,...
235,WF,WLF
236,EH,ESH
237,YE,YEM
238,ZM,ZMB


In [7]:
# Adding the split list back to the dataframe
countries_list['ISO CODE 2L'] = iso_code_split[0]
countries_list

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES,POPULATION,AREA KM2,GDP $USD,ISO CODE 2L
0,Afghanistan,93,AF / AFG,29121286,647500,20.65 Billion,AF
1,Albania,355,AL / ALB,2986952,28748,12.8 Billion,AL
2,Algeria,213,DZ / DZA,34586184,2381740,215.7 Billion,DZ
3,American Samoa,1-684,AS / ASM,57881,199,462.2 Million,AS
4,Andorra,376,AD / AND,84000,468,4.8 Billion,AD
...,...,...,...,...,...,...,...
235,Wallis and Futuna,681,WF / WLF,16025,274,,WF
236,Western Sahara,212,EH / ESH,273008,266000,,EH
237,Yemen,967,YE / YEM,23495361,527970,43.89 Billion,YE
238,Zambia,260,ZM / ZMB,13460305,752614,22.24 Billion,ZM


In [8]:
# Adding the second three letter code list back to the original dataframe
countries_list['ISO CODE 3L'] = iso_code_split[1]
countries_list

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES,POPULATION,AREA KM2,GDP $USD,ISO CODE 2L,ISO CODE 3L
0,Afghanistan,93,AF / AFG,29121286,647500,20.65 Billion,AF,AFG
1,Albania,355,AL / ALB,2986952,28748,12.8 Billion,AL,ALB
2,Algeria,213,DZ / DZA,34586184,2381740,215.7 Billion,DZ,DZA
3,American Samoa,1-684,AS / ASM,57881,199,462.2 Million,AS,ASM
4,Andorra,376,AD / AND,84000,468,4.8 Billion,AD,AND
...,...,...,...,...,...,...,...,...
235,Wallis and Futuna,681,WF / WLF,16025,274,,WF,WLF
236,Western Sahara,212,EH / ESH,273008,266000,,EH,ESH
237,Yemen,967,YE / YEM,23495361,527970,43.89 Billion,YE,YEM
238,Zambia,260,ZM / ZMB,13460305,752614,22.24 Billion,ZM,ZMB


In [9]:
# Creating a list of countries to iterate
list_of_countries = countries_list['COUNTRY'].tolist()
list_of_countries[:5]

['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra']

In [10]:
# Creating a list of the ISO codes to iterate over
_2l_ISO_CODES = countries_list['ISO CODE 2L'].tolist()
_2l_ISO_CODES[:5]

['AF ', 'AL ', 'DZ ', 'AS ', 'AD ']

In [11]:
# Removing the spaces within the list
_2l_ISO_CODES = [x.strip(' ') for x in _2l_ISO_CODES]
_2l_ISO_CODES[:5]

['AF', 'AL', 'DZ', 'AS', 'AD']

In [12]:
# Creating a list of the three letter ISO CODES to iterate over
_3l_ISO_CODES = countries_list['ISO CODE 3L'].tolist()
_3l_ISO_CODES[:5]

[' AFG', ' ALB', ' DZA', ' ASM', ' AND']

In [13]:
# Removing spaces within the list
_3l_ISO_CODES = [x.strip(' ') for x in _3l_ISO_CODES]
_3l_ISO_CODES[:5]

['AFG', 'ALB', 'DZA', 'ASM', 'AND']

In [14]:
# Creating a dictionary for the list of countries and their codes
countries_dict = {
    "Country":list_of_countries,
    "LOCATION":_3l_ISO_CODES,
    "ISO CODE 2 letter":_2l_ISO_CODES
}

In [15]:
# Converting the Dictionary to a dataframe
country_df = pd.DataFrame(countries_dict)
country_df

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter
0,Afghanistan,AFG,AF
1,Albania,ALB,AL
2,Algeria,DZA,DZ
3,American Samoa,ASM,AS
4,Andorra,AND,AD
...,...,...,...
235,Wallis and Futuna,WLF,WF
236,Western Sahara,ESH,EH
237,Yemen,YEM,YE
238,Zambia,ZMB,ZM


# Happiness Index

In [16]:
df = pd.read_csv('raw_data/happinessdata.csv')
df

Unnamed: 0,LOCATION,TIME,Life satisfaction in Cantril Ladder (World Happiness Report 2019),Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Code,Year,Life satisfaction in Cantril Ladder (World Happiness Report 2019).1,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19
0,AFG,1960,,,,,,,,AFG,2008.0,3.723590,,,,,,,,1960.0
1,AFG,1961,,,,,,,,AFG,2009.0,4.401778,,,,,,,,1961.0
2,AFG,1962,,,,,,,,AFG,2010.0,4.758381,,,,,,,,1962.0
3,AFG,1963,,,,,,,,AFG,2011.0,3.831719,,,,,,,,1963.0
4,AFG,1964,,,,,,,,AFG,2012.0,3.782938,,,,,,,,1964.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16099,WLD,2016,,,,,,,,,,,,,,,,,,
16100,WLD,2017,,,,,,,,,,,,,,,,,,
16101,WLD,2018,,,,,,,,,,,,,,,,,,
16102,WLD,2019,,,,,,,,,,,,,,,,,,


In [17]:
df1 = df[['LOCATION','TIME']]
df2 = df[['Code','Year','Life satisfaction in Cantril Ladder (World Happiness Report 2019).1']]

In [18]:
df1.dropna(inplace=True)
df2.dropna(inplace=True)

In [19]:
df1

Unnamed: 0,LOCATION,TIME
0,AFG,1960
1,AFG,1961
2,AFG,1962
3,AFG,1963
4,AFG,1964
...,...,...
16099,WLD,2016
16100,WLD,2017
16101,WLD,2018
16102,WLD,2019


In [20]:
df1.columns

Index(['LOCATION', 'TIME'], dtype='object')

In [21]:
df2['Year']=df2['Year'].astype("int")
df2

Unnamed: 0,Code,Year,Life satisfaction in Cantril Ladder (World Happiness Report 2019).1
0,AFG,2008,3.723590
1,AFG,2009,4.401778
2,AFG,2010,4.758381
3,AFG,2011,3.831719
4,AFG,2012,3.782938
...,...,...,...
1743,ZWE,2014,4.184451
1744,ZWE,2015,3.703191
1745,ZWE,2016,3.735400
1746,ZWE,2017,3.638300


In [22]:
df3 = df2.rename(columns={'Code':'LOCATION','Year':'TIME'})
df3

Unnamed: 0,LOCATION,TIME,Life satisfaction in Cantril Ladder (World Happiness Report 2019).1
0,AFG,2008,3.723590
1,AFG,2009,4.401778
2,AFG,2010,4.758381
3,AFG,2011,3.831719
4,AFG,2012,3.782938
...,...,...,...
1743,ZWE,2014,4.184451
1744,ZWE,2015,3.703191
1745,ZWE,2016,3.735400
1746,ZWE,2017,3.638300


In [23]:
# Merge the two frames
happiness_index = pd.merge(df1, df3, on=['TIME','LOCATION'],how='left')
happiness_index

Unnamed: 0,LOCATION,TIME,Life satisfaction in Cantril Ladder (World Happiness Report 2019).1
0,AFG,1960,
1,AFG,1961,
2,AFG,1962,
3,AFG,1963,
4,AFG,1964,
...,...,...,...
16099,WLD,2016,
16100,WLD,2017,
16101,WLD,2018,
16102,WLD,2019,


In [24]:
happiness_index_1 = happiness_index.fillna(0)
happiness_index_1

Unnamed: 0,LOCATION,TIME,Life satisfaction in Cantril Ladder (World Happiness Report 2019).1
0,AFG,1960,0.0
1,AFG,1961,0.0
2,AFG,1962,0.0
3,AFG,1963,0.0
4,AFG,1964,0.0
...,...,...,...
16099,WLD,2016,0.0
16100,WLD,2017,0.0
16101,WLD,2018,0.0
16102,WLD,2019,0.0


In [25]:
# Merging happiness index with countries df
happiness_index_final = pd.merge(country_df, happiness_index_1, on=['LOCATION'], how='inner')
happiness_index_final

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,TIME,Life satisfaction in Cantril Ladder (World Happiness Report 2019).1
0,Afghanistan,AFG,AF,1960,0.00000
1,Afghanistan,AFG,AF,1961,0.00000
2,Afghanistan,AFG,AF,1962,0.00000
3,Afghanistan,AFG,AF,1963,0.00000
4,Afghanistan,AFG,AF,1964,0.00000
...,...,...,...,...,...
13171,Zimbabwe,ZWE,ZW,2016,3.73540
13172,Zimbabwe,ZWE,ZW,2017,3.63830
13173,Zimbabwe,ZWE,ZW,2018,3.61648
13174,Zimbabwe,ZWE,ZW,2019,0.00000


In [26]:
happiness_index_final[(happiness_index_final['Country']=='Bhutan') & (happiness_index_final['TIME']>=1960)]


Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,TIME,Life satisfaction in Cantril Ladder (World Happiness Report 2019).1
1342,Bhutan,BTN,BT,1960,0.000000
1343,Bhutan,BTN,BT,1961,0.000000
1344,Bhutan,BTN,BT,1962,0.000000
1345,Bhutan,BTN,BT,1963,0.000000
1346,Bhutan,BTN,BT,1964,0.000000
...,...,...,...,...,...
1398,Bhutan,BTN,BT,2016,5.133463
1399,Bhutan,BTN,BT,2017,7.507524
1400,Bhutan,BTN,BT,2018,7.221523
1401,Bhutan,BTN,BT,2019,7.164323


In [27]:
# Combining all the datasets to implement machine learning algorithms
agriculture = pd.read_csv('cleaned_data/agricultureData.csv')
environment = pd.read_csv('cleaned_data/environmentData.csv')
finance = pd.read_csv('cleaned_data/financeData.csv')
health = pd.read_csv('cleaned_data/healthData.csv')
scitech = pd.read_csv('cleaned_data/scitechData.csv')

In [28]:
# Viewing the dataframes
industry_list = [agriculture,environment,finance,health,scitech]

for i in industry_list:
    print(i.columns)


Index(['Country', 'LOCATION', 'ISO CODE 2 letter', 'TIME',
       'Land area (hectare)', 'Agricultural land (% of land area)',
       'Forest area (% of land area)', 'Cereal yield (kg per hectare)',
       'Cash Crop yield(kg per hectare)',
       'Employment in agriculture (% of total employment) (modeled ILO estimate)',
       'Livestock production index (2004-2006 = 100)'],
      dtype='object')
Index(['Country', 'LOCATION', 'ISO CODE 2 letter', 'TIME',
       'PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)',
       'Access to electricity (% of population)',
       'Renewable electricity output (% of total electricity output)',
       'Urban population (% of total population)',
       'Electric power consumption (kWh per capita)'],
      dtype='object')
Index(['Country', 'LOCATION', 'TIME',
       'Automated teller machines (ATMs) (per 100,000 adults)',
       'Borrowers from commercial banks (per 1,000 adults)',
       'broad_money_growth_%', 'Listed domesti

In [32]:
happines_ML_1 = pd.merge(agriculture,environment, on=['Country', 'LOCATION', 'ISO CODE 2 letter', 'TIME'], how='inner' )
happines_ML_1

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter_x,TIME,Land area (hectare),Agricultural land (% of land area),Forest area (% of land area),Cereal yield (kg per hectare),Cash Crop yield(kg per hectare),Employment in agriculture (% of total employment) (modeled ILO estimate),Livestock production index (2004-2006 = 100),ISO CODE 2 letter_y,"PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)",Access to electricity (% of population),Renewable electricity output (% of total electricity output),Urban population (% of total population),Electric power consumption (kWh per capita)
0,Afghanistan,AFG,AF,1960,0.0,0.000000,6.857399,0.0,0.00,77.550771,42.936300,AF,64.951772,22.25299274,2.871174,8.401,138.332032
1,Afghanistan,AFG,AF,1961,65286000.0,57.745918,6.171659,1115.1,223.02,78.334112,43.370000,AF,64.958268,22.27526801,3.190194,8.684,138.470503
2,Afghanistan,AFG,AF,1962,65286000.0,57.837821,6.233999,1079.0,215.80,87.037902,43.990000,AF,64.964764,22.29756558,3.544660,8.976,138.609112
3,Afghanistan,AFG,AF,1963,65286000.0,57.914407,6.296968,985.8,197.16,78.334112,47.030000,AF,64.971261,22.31988546,3.938511,9.276,138.747860
4,Afghanistan,AFG,AF,1964,65286000.0,58.010906,6.360574,1082.8,216.56,79.125366,48.560000,AF,64.977759,22.34222769,4.376123,9.586,138.886746
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13171,Zimbabwe,ZWE,ZW,2016,38685000.0,41.876696,35.542457,581.8,116.36,67.066002,106.250000,ZW,21.726947,39.92399216,52.197034,32.296,610.346034
13172,Zimbabwe,ZWE,ZW,2017,38685000.0,0.000000,0.000000,616.0,123.20,66.764999,106.249894,ZW,22.251671,40.48204803,51.675063,32.237,610.956991
13173,Zimbabwe,ZWE,ZW,2018,38685000.0,0.000000,0.000000,0.0,0.00,66.306999,106.249787,ZW,22.229419,41.04158401,51.158313,32.209,611.568559
13174,Zimbabwe,ZWE,ZW,2019,0.0,0.000000,0.000000,0.0,0.00,66.543999,106.249681,ZW,22.207190,41.04199443,50.646730,32.210,612.180740


In [33]:
happines_ML_2 = pd.merge(happines_ML_1,finance, on=['Country', 'LOCATION', 'TIME'], how='inner')
happines_ML_2

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter_x,TIME,Land area (hectare),Agricultural land (% of land area),Forest area (% of land area),Cereal yield (kg per hectare),Cash Crop yield(kg per hectare),Employment in agriculture (% of total employment) (modeled ILO estimate),...,Urban population (% of total population),Electric power consumption (kWh per capita),"Automated teller machines (ATMs) (per 100,000 adults)","Borrowers from commercial banks (per 1,000 adults)",broad_money_growth_%,"Listed domestic companies, total","Foreign direct investment, net inflows (% of GDP)","Inflation, consumer prices (annual %)","Stocks traded, total value (% of GDP)","Total reserves (includes gold, current US$)"
0,Afghanistan,AFG,AF,1960,0.0,0.000000,6.857399,0.0,0.00,77.550771,...,8.401,138.332032,0.000000,0.000000,0.000000,0,0.000000,0.000000,0.0,5.069080e+07
1,Afghanistan,AFG,AF,1961,65286000.0,57.745918,6.171659,1115.1,223.02,78.334112,...,8.684,138.470503,0.000000,0.000000,10.291859,0,0.000000,0.000000,0.0,4.244450e+07
2,Afghanistan,AFG,AF,1962,65286000.0,57.837821,6.233999,1079.0,215.80,87.037902,...,8.976,138.609112,0.000000,0.000000,17.827298,0,0.000000,0.000000,0.0,4.059210e+07
3,Afghanistan,AFG,AF,1963,65286000.0,57.914407,6.296968,985.8,197.16,78.334112,...,9.276,138.747860,0.000000,0.000000,15.579196,0,0.000000,0.000000,0.0,4.554780e+07
4,Afghanistan,AFG,AF,1964,65286000.0,58.010906,6.360574,1082.8,216.56,79.125366,...,9.586,138.886746,0.000000,0.000000,22.888116,0,0.000000,0.000000,0.0,4.461968e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13171,Zimbabwe,ZWE,ZW,2016,38685000.0,41.876696,35.542457,581.8,116.36,67.066002,...,32.296,610.346034,7.073321,40.671346,19.048048,0,1.669274,-1.566413,0.0,4.071932e+08
13172,Zimbabwe,ZWE,ZW,2017,38685000.0,0.000000,0.000000,616.0,123.20,66.764999,...,32.237,610.956991,6.889214,40.956438,38.646489,0,1.121496,0.909733,0.0,2.926212e+08
13173,Zimbabwe,ZWE,ZW,2018,38685000.0,0.000000,0.000000,0.0,0.00,66.306999,...,32.209,611.568559,6.625208,142.527944,28.048464,0,3.062893,0.000000,0.0,8.695109e+07
13174,Zimbabwe,ZWE,ZW,2019,0.0,0.000000,0.000000,0.0,0.00,66.543999,...,32.210,612.180740,6.398139,90.239497,249.835278,0,1.305924,0.000000,0.0,1.512405e+08


In [34]:
happines_ML_3 = pd.merge(happines_ML_2, health, on=['Country', 'LOCATION','TIME'], how='inner')
happines_ML_3

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter_x,TIME,Land area (hectare),Agricultural land (% of land area),Forest area (% of land area),Cereal yield (kg per hectare),Cash Crop yield(kg per hectare),Employment in agriculture (% of total employment) (modeled ILO estimate),...,ISO CODE 2 letter,"Population, total",total_cancer_cases,total_cancer_deaths,total_obesity_numbers,birth_rate,death_rate,"Life expectancy at birth, total (years)","Immunization, DPT (% of children ages 12-23 months)","Immunization, measles (% of children ages 12-23 months)"
0,Afghanistan,AFG,AF,1960,0.0,0.000000,6.857399,0.0,0.00,77.550771,...,AF,8996973.0,0.000000,0.000000,0.00,461355.778467,289873.473087,32.446,0.0,0.0
1,Afghanistan,AFG,AF,1961,65286000.0,57.745918,6.171659,1115.1,223.02,78.334112,...,AF,9169410.0,0.000000,0.000000,0.00,471060.099930,290202.657090,32.962,0.0,0.0
2,Afghanistan,AFG,AF,1962,65286000.0,57.837821,6.233999,1079.0,215.80,87.037902,...,AF,9351441.0,0.000000,0.000000,0.00,481197.099537,290764.355013,33.471,0.0,0.0
3,Afghanistan,AFG,AF,1963,65286000.0,57.914407,6.296968,985.8,197.16,78.334112,...,AF,9543205.0,0.000000,0.000000,0.00,491761.353650,291554.455955,33.971,0.0,0.0
4,Afghanistan,AFG,AF,1964,65286000.0,58.010906,6.360574,1082.8,216.56,79.125366,...,AF,9744781.0,0.000000,0.000000,0.00,502723.507009,292557.815182,34.463,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13171,Zimbabwe,ZWE,ZW,2016,38685000.0,41.876696,35.542457,581.8,116.36,67.066002,...,ZW,14030390.0,79553.232681,9698.880172,2174710.45,461094.736960,116255.811540,60.294,90.0,95.0
13172,Zimbabwe,ZWE,ZW,2017,38685000.0,0.000000,0.000000,616.0,123.20,66.764999,...,ZW,14236745.0,80654.107052,9871.972337,0.00,451760.392340,114520.376780,60.812,89.0,90.0
13173,Zimbabwe,ZWE,ZW,2018,38685000.0,0.000000,0.000000,0.0,0.00,66.306999,...,ZW,14439018.0,0.000000,0.000000,0.00,442931.316168,113822.778894,61.195,89.0,88.0
13174,Zimbabwe,ZWE,ZW,2019,0.0,0.000000,0.000000,0.0,0.00,66.543999,...,ZW,14645468.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000,90.0,85.0


In [35]:
happines_ML_4 = pd.merge(happines_ML_3, scitech, on=['Country', 'LOCATION', 'TIME'], how='inner')
happines_ML_4

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter_x,TIME,Land area (hectare),Agricultural land (% of land area),Forest area (% of land area),Cereal yield (kg per hectare),Cash Crop yield(kg per hectare),Employment in agriculture (% of total employment) (modeled ILO estimate),...,birth_rate,death_rate,"Life expectancy at birth, total (years)","Immunization, DPT (% of children ages 12-23 months)","Immunization, measles (% of children ages 12-23 months)",ISO CODE 2 letter_y,High-technology exports (current US$),"Patent applications, residents",Scientific and technical journal articles,"Technical cooperation grants (BoP, current US$)"
0,Afghanistan,AFG,AF,1960,0.0,0.000000,6.857399,0.0,0.00,77.550771,...,461355.778467,289873.473087,32.446,0.0,0.0,AF,0.0,0,0.00,1510000
1,Afghanistan,AFG,AF,1961,65286000.0,57.745918,6.171659,1115.1,223.02,78.334112,...,471060.099930,290202.657090,32.962,0.0,0.0,AF,0.0,0,0.00,1620000
2,Afghanistan,AFG,AF,1962,65286000.0,57.837821,6.233999,1079.0,215.80,87.037902,...,481197.099537,290764.355013,33.471,0.0,0.0,AF,0.0,0,0.00,1870000
3,Afghanistan,AFG,AF,1963,65286000.0,57.914407,6.296968,985.8,197.16,78.334112,...,491761.353650,291554.455955,33.971,0.0,0.0,AF,0.0,0,0.00,1990000
4,Afghanistan,AFG,AF,1964,65286000.0,58.010906,6.360574,1082.8,216.56,79.125366,...,502723.507009,292557.815182,34.463,0.0,0.0,AF,0.0,0,0.00,2150000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13171,Zimbabwe,ZWE,ZW,2016,38685000.0,41.876696,35.542457,581.8,116.36,67.066002,...,461094.736960,116255.811540,60.294,90.0,95.0,ZW,13735165.0,8,313.80,59800000
13172,Zimbabwe,ZWE,ZW,2017,38685000.0,0.000000,0.000000,616.0,123.20,66.764999,...,451760.392340,114520.376780,60.812,89.0,90.0,ZW,11206210.0,0,340.45,52490000
13173,Zimbabwe,ZWE,ZW,2018,38685000.0,0.000000,0.000000,0.0,0.00,66.306999,...,442931.316168,113822.778894,61.195,89.0,88.0,ZW,9719357.0,0,359.33,54420000
13174,Zimbabwe,ZWE,ZW,2019,0.0,0.000000,0.000000,0.0,0.00,66.543999,...,0.000000,0.000000,0.000,90.0,85.0,ZW,27810712.0,0,0.00,0


In [38]:
happines_ML_5 = happines_ML_4.drop(columns=['ISO CODE 2 letter_x'])
happines_ML_5

Unnamed: 0,Country,LOCATION,TIME,Land area (hectare),Agricultural land (% of land area),Forest area (% of land area),Cereal yield (kg per hectare),Cash Crop yield(kg per hectare),Employment in agriculture (% of total employment) (modeled ILO estimate),Livestock production index (2004-2006 = 100),...,birth_rate,death_rate,"Life expectancy at birth, total (years)","Immunization, DPT (% of children ages 12-23 months)","Immunization, measles (% of children ages 12-23 months)",ISO CODE 2 letter_y,High-technology exports (current US$),"Patent applications, residents",Scientific and technical journal articles,"Technical cooperation grants (BoP, current US$)"
0,Afghanistan,AFG,1960,0.0,0.000000,6.857399,0.0,0.00,77.550771,42.936300,...,461355.778467,289873.473087,32.446,0.0,0.0,AF,0.0,0,0.00,1510000
1,Afghanistan,AFG,1961,65286000.0,57.745918,6.171659,1115.1,223.02,78.334112,43.370000,...,471060.099930,290202.657090,32.962,0.0,0.0,AF,0.0,0,0.00,1620000
2,Afghanistan,AFG,1962,65286000.0,57.837821,6.233999,1079.0,215.80,87.037902,43.990000,...,481197.099537,290764.355013,33.471,0.0,0.0,AF,0.0,0,0.00,1870000
3,Afghanistan,AFG,1963,65286000.0,57.914407,6.296968,985.8,197.16,78.334112,47.030000,...,491761.353650,291554.455955,33.971,0.0,0.0,AF,0.0,0,0.00,1990000
4,Afghanistan,AFG,1964,65286000.0,58.010906,6.360574,1082.8,216.56,79.125366,48.560000,...,502723.507009,292557.815182,34.463,0.0,0.0,AF,0.0,0,0.00,2150000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13171,Zimbabwe,ZWE,2016,38685000.0,41.876696,35.542457,581.8,116.36,67.066002,106.250000,...,461094.736960,116255.811540,60.294,90.0,95.0,ZW,13735165.0,8,313.80,59800000
13172,Zimbabwe,ZWE,2017,38685000.0,0.000000,0.000000,616.0,123.20,66.764999,106.249894,...,451760.392340,114520.376780,60.812,89.0,90.0,ZW,11206210.0,0,340.45,52490000
13173,Zimbabwe,ZWE,2018,38685000.0,0.000000,0.000000,0.0,0.00,66.306999,106.249787,...,442931.316168,113822.778894,61.195,89.0,88.0,ZW,9719357.0,0,359.33,54420000
13174,Zimbabwe,ZWE,2019,0.0,0.000000,0.000000,0.0,0.00,66.543999,106.249681,...,0.000000,0.000000,0.000,90.0,85.0,ZW,27810712.0,0,0.00,0


In [40]:
happines_ML_5.columns

Index(['Country', 'LOCATION', 'TIME', 'Land area (hectare)',
       'Agricultural land (% of land area)', 'Forest area (% of land area)',
       'Cereal yield (kg per hectare)', 'Cash Crop yield(kg per hectare)',
       'Employment in agriculture (% of total employment) (modeled ILO estimate)',
       'Livestock production index (2004-2006 = 100)', 'ISO CODE 2 letter_y',
       'PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)',
       'Access to electricity (% of population)',
       'Renewable electricity output (% of total electricity output)',
       'Urban population (% of total population)',
       'Electric power consumption (kWh per capita)',
       'Automated teller machines (ATMs) (per 100,000 adults)',
       'Borrowers from commercial banks (per 1,000 adults)',
       'broad_money_growth_%', 'Listed domestic companies, total',
       'Foreign direct investment, net inflows (% of GDP)',
       'Inflation, consumer prices (annual %)',
       'Stocks trad

In [45]:
happines_ML_6 = happines_ML_5.rename(columns={

    'LOCATION':'country_code', 'TIME':'Year', 'Land area (hectare)':'land_area_ha',
       'Agricultural land (% of land area)':'agri_land_area_ha', 'Forest area (% of land area)':'fores_area_ha',
       'Cereal yield (kg per hectare)':'cereal_yield_kh_ha', 'Cash Crop yield(kg per hectare)':'cash_crop_yield_kg_ha',
       'Employment in agriculture (% of total employment) (modeled ILO estimate)':'employment_in_agriculture_%',
       'Livestock production index (2004-2006 = 100)':'live_stock_production_100_index', 'ISO CODE 2 letter_y':'ISO CODE 2 letter_y',
       'PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)':'PM2.5 air pollution',
       'Access to electricity (% of population)':'access_to_electricity_pop_%',
       'Renewable electricity output (% of total electricity output)':'renewable_electricity_%',
       'Urban population (% of total population)':'urban_pop_%',
       'Electric power consumption (kWh per capita)':'electricity_consumption_kwh',
       'Automated teller machines (ATMs) (per 100,000 adults)':'atm_per_100000',
       'Borrowers from commercial banks (per 1,000 adults)':'number_of_bank_loans_per_1000',
       'broad_money_growth_%':'broad_money_growth_%', 'Listed domestic companies, total':'listed_domestic_companies',
       'Foreign direct investment, net inflows (% of GDP)':'foreign_investment',
       'Inflation, consumer prices (annual %)':'inflation_%',
       'Stocks traded, total value (% of GDP)':'stocks_traded',
       'Total reserves (includes gold, current US$)':'total_asset_reserves', 'Population, total':'population',
       'total_cancer_cases':'total_cancer_cases', 'total_cancer_deaths':'total_cancer_deaths', 'total_obesity_numbers':'total_obesity_numbers',
        'Life expectancy at birth, total (years)':'life_expectency',
       'Immunization, DPT (% of children ages 12-23 months)':'dtp_immunisation',
       'Immunization, measles (% of children ages 12-23 months)':'measles_immunisation',
        'High-technology exports (current US$)':'high_tech_exports',
       'Patent applications, residents':'patent_applications',
       'Scientific and technical journal articles':'scientific_publications',
       'Technical cooperation grants (BoP, current US$)':'government_technical_grants'

}).drop(columns=['ISO CODE 2 letter_y'])

In [46]:
happines_ML_6

Unnamed: 0,Country,country_code,Year,land_area_ha,agri_land_area_ha,fores_area_ha,cereal_yield_kh_ha,cash_crop_yield_kg_ha,employment_in_agriculture_%,live_stock_production_100_index,...,total_obesity_numbers,birth_rate,death_rate,life_expectency,dtp_immunisation,measles_immunisation,high_tech_exports,patent_applications,scientific_publications,government_technical_grants
0,Afghanistan,AFG,1960,0.0,0.000000,6.857399,0.0,0.00,77.550771,42.936300,...,0.00,461355.778467,289873.473087,32.446,0.0,0.0,0.0,0,0.00,1510000
1,Afghanistan,AFG,1961,65286000.0,57.745918,6.171659,1115.1,223.02,78.334112,43.370000,...,0.00,471060.099930,290202.657090,32.962,0.0,0.0,0.0,0,0.00,1620000
2,Afghanistan,AFG,1962,65286000.0,57.837821,6.233999,1079.0,215.80,87.037902,43.990000,...,0.00,481197.099537,290764.355013,33.471,0.0,0.0,0.0,0,0.00,1870000
3,Afghanistan,AFG,1963,65286000.0,57.914407,6.296968,985.8,197.16,78.334112,47.030000,...,0.00,491761.353650,291554.455955,33.971,0.0,0.0,0.0,0,0.00,1990000
4,Afghanistan,AFG,1964,65286000.0,58.010906,6.360574,1082.8,216.56,79.125366,48.560000,...,0.00,502723.507009,292557.815182,34.463,0.0,0.0,0.0,0,0.00,2150000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13171,Zimbabwe,ZWE,2016,38685000.0,41.876696,35.542457,581.8,116.36,67.066002,106.250000,...,2174710.45,461094.736960,116255.811540,60.294,90.0,95.0,13735165.0,8,313.80,59800000
13172,Zimbabwe,ZWE,2017,38685000.0,0.000000,0.000000,616.0,123.20,66.764999,106.249894,...,0.00,451760.392340,114520.376780,60.812,89.0,90.0,11206210.0,0,340.45,52490000
13173,Zimbabwe,ZWE,2018,38685000.0,0.000000,0.000000,0.0,0.00,66.306999,106.249787,...,0.00,442931.316168,113822.778894,61.195,89.0,88.0,9719357.0,0,359.33,54420000
13174,Zimbabwe,ZWE,2019,0.0,0.000000,0.000000,0.0,0.00,66.543999,106.249681,...,0.00,0.000000,0.000000,0.000,90.0,85.0,27810712.0,0,0.00,0


In [47]:
# Exporting data
happines_ML_6.to_csv('cleaned_data/happinessData.csv')