In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import time
import random
import country_converter as coco
import copy
import sqlite3
import matplotlib.pyplot as plt
from plotnine import *
pd.options.display.max_rows = 10000
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold # Cross validation 
from sklearn.model_selection import cross_validate # Cross validation 
from sklearn.model_selection import GridSearchCV # Cross validation + param. tuning.

# For pre-processing data 
from sklearn import preprocessing as pp 
from sklearn.compose import ColumnTransformer 

# For splits and CV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold # Cross validation 
from sklearn.model_selection import cross_validate # Cross validation 
from sklearn.model_selection import GridSearchCV # Cross validation + param. tuning.

# Machine learning methods 
from sklearn.linear_model import LinearRegression as LM
from sklearn.neighbors import KNeighborsRegressor as KNN
from sklearn.tree import DecisionTreeRegressor as DTree
from sklearn.ensemble import BaggingRegressor as Bag
from sklearn.ensemble import RandomForestRegressor as RF

# For evaluating our model's performance
import sklearn.metrics as m

# Pipeline to combine modeling elements
from sklearn.pipeline import Pipeline
warnings.filterwarnings('ignore')

In [2]:
url_un = "https://www.un.org/about-us/member-states"
page_un = requests.get(url_un)
page_un.status_code
# Check connection

200

In [3]:
# Parse the content
soup_un = BeautifulSoup(page_un.content, 'html.parser')
print(soup_un.prettify())

<!DOCTYPE html>
<html dir="ltr" lang="en">
 <head profile="http://www.w3.org/1999/xhtml/vocab">
  <meta charset="utf-8"/>
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
  <meta content="" name="description"/>
  <meta content="United Nations" name="author"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="Drupal 7 (http://drupal.org)" name="Generator"/>
  <link href="/en/about-us/member-states" rel="canonical"/>
  <link href="/en/node/119289" rel="shortlink"/>
  <link href="https://www.un.org/sites/un2.un.org/themes/bootstrap_un2/favicon.ico" rel="shortcut icon" type="image/vnd.microsoft.icon"/>
  <title>
   Member States | United Nations
  </title>
  <meta content="Member States | United Nations" name="DC.Title"/>
  <meta content="" name="DC

In [4]:
# Find all the country name. 
soup_un.find_all('h2')

[<h2 class="element-invisible">Search the United Nations</h2>,
 <h2 class="mb-0">Afghanistan</h2>,
 <h2 class="mb-0">Albania</h2>,
 <h2 class="mb-0">Algeria</h2>,
 <h2 class="mb-0">Andorra</h2>,
 <h2 class="mb-0">Angola</h2>,
 <h2 class="mb-0">Antigua and Barbuda</h2>,
 <h2 class="mb-0">Argentina</h2>,
 <h2 class="mb-0">Armenia</h2>,
 <h2 class="mb-0">Australia</h2>,
 <h2 class="mb-0">Austria</h2>,
 <h2 class="mb-0">Azerbaijan</h2>,
 <h2 class="mb-0">Bahamas</h2>,
 <h2 class="mb-0">Bahrain</h2>,
 <h2 class="mb-0">Bangladesh</h2>,
 <h2 class="mb-0">Barbados</h2>,
 <h2 class="mb-0"><a href="https://www.un.org/en/about-us/member-states/belarus">Belarus</a></h2>,
 <h2 class="mb-0">Belgium</h2>,
 <h2 class="mb-0">Belize</h2>,
 <h2 class="mb-0"><a href="https://www.un.org/en/about-us/member-states/benin">Benin</a></h2>,
 <h2 class="mb-0">Bhutan</h2>,
 <h2 class="mb-0"><a href="https://www.un.org/en/about-us/member-states/bolivia">Bolivia (Plurinational State of)</a></h2>,
 <h2 class="mb-0"><

In [5]:
# Make the country name to a DataFrame
un_list = [i.get_text() for i in soup_un.find_all('h2')]
un_list.pop(0)
un_df = pd.DataFrame (un_list, columns = ['Name'])
un_df['Name'] = coco.convert (names = un_df['Name'], to = 'name_short')
un_df

Unnamed: 0,Name
0,Afghanistan
1,Albania
2,Algeria
3,Andorra
4,Angola
5,Antigua and Barbuda
6,Argentina
7,Armenia
8,Australia
9,Austria


In [6]:
url_pfi20 = "https://rsf.org/en/ranking/2020"
page_pfi20 = requests.get(url_pfi20)
page_pfi20.status_code

200

In [7]:
soup_pfi20 = BeautifulSoup(page_pfi20.content, 'html.parser')
pfi20_list = [i.get_text() for i in soup_pfi20.find_all('span')]
pfi20_list
pfi20_df = pd.DataFrame (pfi20_list)
pfi20_df = pfi20_df.drop(labels = range(0,11), axis = 0)
pfi20_df = pfi20_df.reset_index()
pfi20_df = pfi20_df.drop(['index'], axis = 1)
pfi20_df = pfi20_df.drop(labels = range(719,727), axis = 0)
pfi20_df

Unnamed: 0,0
0,1
1,Norway
2,7.84
3,
4,2
5,Finland
6,7.93
7,
8,3
9,Denmark


In [8]:
pfi20_df_rank = pfi20_df[pfi20_df.index%4 == 0].reset_index()
pfi20_df_rank = pfi20_df_rank.drop(['index'], axis = 1)
pfi20_df_name = pfi20_df[pfi20_df.index%4 == 1].reset_index()
pfi20_df_name = pfi20_df_name.drop(['index'], axis = 1)

In [9]:
pfi20_df_rank.columns = list('a')
pfi20_df_rank = pfi20_df_rank.rename(columns = {'a':'PFI_Rank'})
pfi20_df_name.columns = list('a')
pfi20_df_name = pfi20_df_name.rename(columns = {'a':'Name'})
pfi20_df = pd.concat([pfi20_df_name,pfi20_df_rank], axis = 1)
pfi20_df

Unnamed: 0,Name,PFI_Rank
0,Norway,1
1,Finland,2
2,Denmark,3
3,Sweden,4
4,Netherlands,5
5,Jamaica,6
6,Costa Rica,7
7,Switzerland,8
8,New Zealand,9
9,Portugal,10


In [10]:
pfi20_df['Name'] = coco.convert(names = pfi20_df['Name'], to = 'name_short')

OECS not found in regex
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara


In [11]:
pfi20_df = pfi20_df.drop([132])

In [12]:
pfi20_df = pfi20_df.merge(un_df, how = 'inner', on = 'Name')
pfi20_df

Unnamed: 0,Name,PFI_Rank
0,Norway,1
1,Finland,2
2,Denmark,3
3,Sweden,4
4,Netherlands,5
5,Jamaica,6
6,Costa Rica,7
7,Switzerland,8
8,New Zealand,9
9,Portugal,10


In [13]:
pfi20_df = pfi20_df.rename(columns = {'Name':'Year&Country'})
pfi20_df['Year&Country'] = "2020, " + pfi20_df['Year&Country'].astype(str)
pfi20_df

Unnamed: 0,Year&Country,PFI_Rank
0,"2020, Norway",1
1,"2020, Finland",2
2,"2020, Denmark",3
3,"2020, Sweden",4
4,"2020, Netherlands",5
5,"2020, Jamaica",6
6,"2020, Costa Rica",7
7,"2020, Switzerland",8
8,"2020, New Zealand",9
9,"2020, Portugal",10


In [14]:
url_pfi19 = "https://rsf.org/en/ranking/2019"
page_pfi19 = requests.get(url_pfi19)
soup_pfi19 = BeautifulSoup(page_pfi19.content, 'html.parser')
pfi19_list = [i.get_text() for i in soup_pfi19.find_all('span')]
pfi19_list
pfi19_df = pd.DataFrame (pfi19_list)
pfi19_df = pfi19_df.drop(labels = range(0,11), axis = 0)
pfi19_df = pfi19_df.reset_index()
pfi19_df = pfi19_df.drop(['index'], axis = 1)
pfi19_df = pfi19_df.drop(labels = range(719,727), axis = 0)
pfi19_df_rank = pfi19_df[pfi19_df.index%4 == 0].reset_index()
pfi19_df_rank = pfi19_df_rank.drop(['index'], axis = 1)
pfi19_df_name = pfi19_df[pfi19_df.index%4 == 1].reset_index()
pfi19_df_name = pfi19_df_name.drop(['index'], axis = 1)
pfi19_df_rank.columns = list('a')
pfi19_df_rank = pfi19_df_rank.rename(columns = {'a':'PFI_Rank'})
pfi19_df_name.columns = list('a')
pfi19_df_name = pfi19_df_name.rename(columns = {'a':'Name'})
pfi19_df = pd.concat([pfi19_df_name,pfi19_df_rank], axis = 1)
pfi19_df['Name'] = coco.convert(names = pfi19_df['Name'], to = 'name_short')
pfi19_df

OECS not found in regex
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara


Unnamed: 0,Name,PFI_Rank
0,Norway,1
1,Finland,2
2,Sweden,3
3,Netherlands,4
4,Denmark,5
5,Switzerland,6
6,New Zealand,7
7,Jamaica,8
8,Belgium,9
9,Costa Rica,10


In [15]:
pfi19_df = pfi19_df.drop([134])
pfi19_df = pfi19_df.merge(un_df, how = 'inner', on = 'Name')
pfi19_df = pfi19_df.rename(columns = {'Name':'Year&Country'})
pfi19_df['Year&Country'] = "2019, " + pfi19_df['Year&Country'].astype(str)
pfi19_df

Unnamed: 0,Year&Country,PFI_Rank
0,"2019, Norway",1
1,"2019, Finland",2
2,"2019, Sweden",3
3,"2019, Netherlands",4
4,"2019, Denmark",5
5,"2019, Switzerland",6
6,"2019, New Zealand",7
7,"2019, Jamaica",8
8,"2019, Belgium",9
9,"2019, Costa Rica",10


In [16]:
url_pfi18 = "https://rsf.org/en/ranking/2018"
page_pfi18 = requests.get(url_pfi18)
soup_pfi18 = BeautifulSoup(page_pfi18.content, 'html.parser')
pfi18_list = [i.get_text() for i in soup_pfi18.find_all('span')]
pfi18_list
pfi18_df = pd.DataFrame (pfi18_list)
pfi18_df = pfi18_df.drop(labels = range(0,11), axis = 0)
pfi18_df = pfi18_df.reset_index()
pfi18_df = pfi18_df.drop(['index'], axis = 1)
pfi18_df = pfi18_df.drop(labels = range(719,727), axis = 0)
pfi18_df_rank = pfi18_df[pfi18_df.index%4 == 0].reset_index()
pfi18_df_rank = pfi18_df_rank.drop(['index'], axis = 1)
pfi18_df_name = pfi18_df[pfi18_df.index%4 == 1].reset_index()
pfi18_df_name = pfi18_df_name.drop(['index'], axis = 1)
pfi18_df_rank.columns = list('a')
pfi18_df_rank = pfi18_df_rank.rename(columns = {'a':'PFI_Rank'})
pfi18_df_name.columns = list('a')
pfi18_df_name = pfi18_df_name.rename(columns = {'a':'Name'})
pfi18_df = pd.concat([pfi18_df_name,pfi18_df_rank], axis = 1)
pfi18_df['Name'] = coco.convert(names = pfi18_df['Name'], to = 'name_short')
pfi18_df

OECS not found in regex
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara


Unnamed: 0,Name,PFI_Rank
0,Norway,1
1,Sweden,2
2,Netherlands,3
3,Finland,4
4,Switzerland,5
5,Jamaica,6
6,Belgium,7
7,New Zealand,8
8,Denmark,9
9,Costa Rica,10


In [17]:
pfi18_df = pfi18_df.drop([134])
pfi18_df = pfi18_df.merge(un_df, how = 'inner', on = 'Name')
pfi18_df = pfi18_df.rename(columns = {'Name':'Year&Country'})
pfi18_df['Year&Country'] = "2018, " + pfi18_df['Year&Country'].astype(str)
pfi18_df

Unnamed: 0,Year&Country,PFI_Rank
0,"2018, Norway",1
1,"2018, Sweden",2
2,"2018, Netherlands",3
3,"2018, Finland",4
4,"2018, Switzerland",5
5,"2018, Jamaica",6
6,"2018, Belgium",7
7,"2018, New Zealand",8
8,"2018, Denmark",9
9,"2018, Costa Rica",10


In [18]:
url_pfi17 = "https://rsf.org/en/ranking/2017"
page_pfi17 = requests.get(url_pfi17)
soup_pfi17 = BeautifulSoup(page_pfi17.content, 'html.parser')
pfi17_list = [i.get_text() for i in soup_pfi17.find_all('span')]
pfi17_list
pfi17_df = pd.DataFrame (pfi17_list)
pfi17_df = pfi17_df.drop(labels = range(0,11), axis = 0)
pfi17_df = pfi17_df.reset_index()
pfi17_df = pfi17_df.drop(['index'], axis = 1)
pfi17_df = pfi17_df.drop(labels = range(719,727), axis = 0)
pfi17_df_rank = pfi17_df[pfi17_df.index%4 == 0].reset_index()
pfi17_df_rank = pfi17_df_rank.drop(['index'], axis = 1)
pfi17_df_name = pfi17_df[pfi17_df.index%4 == 1].reset_index()
pfi17_df_name = pfi17_df_name.drop(['index'], axis = 1)
pfi17_df_rank.columns = list('a')
pfi17_df_rank = pfi17_df_rank.rename(columns = {'a':'PFI_Rank'})
pfi17_df_name.columns = list('a')
pfi17_df_name = pfi17_df_name.rename(columns = {'a':'Name'})
pfi17_df = pd.concat([pfi17_df_name,pfi17_df_rank], axis = 1)
pfi17_df['Name'] = coco.convert(names = pfi17_df['Name'], to = 'name_short')
pfi17_df

OECS not found in regex
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara


Unnamed: 0,Name,PFI_Rank
0,Norway,1
1,Sweden,2
2,Finland,3
3,Denmark,4
4,Netherlands,5
5,Costa Rica,6
6,Switzerland,7
7,Jamaica,8
8,Belgium,9
9,Iceland,10


In [19]:
pfi17_df = pfi17_df.drop([132])
pfi17_df = pfi17_df.merge(un_df, how = 'inner', on = 'Name')
pfi17_df = pfi17_df.rename(columns = {'Name':'Year&Country'})
pfi17_df['Year&Country'] = "2017, " + pfi17_df['Year&Country'].astype(str)
pfi17_df

Unnamed: 0,Year&Country,PFI_Rank
0,"2017, Norway",1
1,"2017, Sweden",2
2,"2017, Finland",3
3,"2017, Denmark",4
4,"2017, Netherlands",5
5,"2017, Costa Rica",6
6,"2017, Switzerland",7
7,"2017, Jamaica",8
8,"2017, Belgium",9
9,"2017, Iceland",10


In [20]:
url_pfi16 = "https://rsf.org/en/ranking/2016"
page_pfi16 = requests.get(url_pfi16)
soup_pfi16 = BeautifulSoup(page_pfi16.content, 'html.parser')
pfi16_list = [i.get_text() for i in soup_pfi16.find_all('span')]
pfi16_list
pfi16_df = pd.DataFrame (pfi16_list)
pfi16_df = pfi16_df.drop(labels = range(0,11), axis = 0)
pfi16_df = pfi16_df.reset_index()
pfi16_df = pfi16_df.drop(['index'], axis = 1)
pfi16_df = pfi16_df.drop(labels = range(719,727), axis = 0)
pfi16_df_rank = pfi16_df[pfi16_df.index%4 == 0].reset_index()
pfi16_df_rank = pfi16_df_rank.drop(['index'], axis = 1)
pfi16_df_name = pfi16_df[pfi16_df.index%4 == 1].reset_index()
pfi16_df_name = pfi16_df_name.drop(['index'], axis = 1)
pfi16_df_rank.columns = list('a')
pfi16_df_rank = pfi16_df_rank.rename(columns = {'a':'PFI_Rank'})
pfi16_df_name.columns = list('a')
pfi16_df_name = pfi16_df_name.rename(columns = {'a':'Name'})
pfi16_df = pd.concat([pfi16_df_name,pfi16_df_rank], axis = 1)
pfi16_df['Name'] = coco.convert(names = pfi16_df['Name'], to = 'name_short')
pfi16_df

OECS not found in regex
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara


Unnamed: 0,Name,PFI_Rank
0,Finland,1
1,Netherlands,2
2,Norway,3
3,Denmark,4
4,New Zealand,5
5,Costa Rica,6
6,Switzerland,7
7,Sweden,8
8,Ireland,9
9,Jamaica,10


In [21]:
pfi16_df = pfi16_df.drop([130])
pfi16_df = pfi16_df.merge(un_df, how = 'inner', on = 'Name')
pfi16_df = pfi16_df.rename(columns = {'Name':'Year&Country'})
pfi16_df['Year&Country'] = "2016, " + pfi16_df['Year&Country'].astype(str)
pfi16_df

Unnamed: 0,Year&Country,PFI_Rank
0,"2016, Finland",1
1,"2016, Netherlands",2
2,"2016, Norway",3
3,"2016, Denmark",4
4,"2016, New Zealand",5
5,"2016, Costa Rica",6
6,"2016, Switzerland",7
7,"2016, Sweden",8
8,"2016, Ireland",9
9,"2016, Jamaica",10


In [22]:
url_pfi15 = "https://rsf.org/en/ranking/2015"
page_pfi15 = requests.get(url_pfi15)
soup_pfi15 = BeautifulSoup(page_pfi15.content, 'html.parser')
pfi15_list = [i.get_text() for i in soup_pfi15.find_all('span')]
pfi15_list
pfi15_df = pd.DataFrame (pfi15_list)
pfi15_df = pfi15_df.drop(labels = range(0,11), axis = 0)
pfi15_df = pfi15_df.reset_index()
pfi15_df = pfi15_df.drop(['index'], axis = 1)
pfi15_df = pfi15_df.drop(labels = range(707,715), axis = 0)
pfi15_df_rank = pfi15_df[pfi15_df.index%4 == 0].reset_index()
pfi15_df_rank = pfi15_df_rank.drop(['index'], axis = 1)
pfi15_df_name = pfi15_df[pfi15_df.index%4 == 1].reset_index()
pfi15_df_name = pfi15_df_name.drop(['index'], axis = 1)
pfi15_df_rank.columns = list('a')
pfi15_df_rank = pfi15_df_rank.rename(columns = {'a':'PFI_Rank'})
pfi15_df_name.columns = list('a')
pfi15_df_name = pfi15_df_name.rename(columns = {'a':'Name'})
pfi15_df = pd.concat([pfi15_df_name,pfi15_df_rank], axis = 1)
pfi15_df['Name'] = coco.convert(names = pfi15_df['Name'], to = 'name_short')
pfi15_df

More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara
More then one regular expression match for Morocco / Western Sahara


Unnamed: 0,Name,PFI_Rank
0,Finland,1
1,Norway,2
2,Denmark,3
3,Netherlands,4
4,Sweden,5
5,New Zealand,6
6,Austria,7
7,Canada,8
8,Jamaica,9
9,Estonia,10


In [23]:
pfi15_df = pfi15_df.drop([126])
pfi15_df = pfi15_df.merge(un_df, how = 'inner', on = 'Name')
pfi15_df = pfi15_df.rename(columns = {'Name':'Year&Country'})
pfi15_df['Year&Country'] = "2015, " + pfi15_df['Year&Country'].astype(str)
pfi15_df

Unnamed: 0,Year&Country,PFI_Rank
0,"2015, Finland",1
1,"2015, Norway",2
2,"2015, Denmark",3
3,"2015, Netherlands",4
4,"2015, Sweden",5
5,"2015, New Zealand",6
6,"2015, Austria",7
7,"2015, Canada",8
8,"2015, Jamaica",9
9,"2015, Estonia",10


In [24]:
pfi_frames = [pfi20_df, pfi19_df, pfi18_df, pfi17_df, pfi16_df, pfi15_df]
pfi_df = pd.concat(pfi_frames)
pfi_df = pfi_df.reset_index()
pfi_df = pfi_df.drop(['index'], axis = 1)
pfi_df

Unnamed: 0,Year&Country,PFI_Rank
0,"2020, Norway",1
1,"2020, Finland",2
2,"2020, Denmark",3
3,"2020, Sweden",4
4,"2020, Netherlands",5
5,"2020, Jamaica",6
6,"2020, Costa Rica",7
7,"2020, Switzerland",8
8,"2020, New Zealand",9
9,"2020, Portugal",10


In [27]:
ief20_df = pd.read_excel('/Users/Lawrence/Desktop/Georgetown_G1/PPOL_564_DS/Final_Project/PPOL_564_Final_Project_Zhan_Naiqiu_nz155/Index_Econ/index2020_data.xls', index_col = 0)
ief20_df = ief20_df.loc[:,['Country Name','2020 Score']]
ief20_df = ief20_df.rename(columns = {'Country Name':'Name'})
ief20_df['Name'] = coco.convert(names = ief20_df['Name'], to = 'name_short')
ief20_df = ief20_df.merge(un_df, how = 'inner', on = 'Name')
ief20_df = ief20_df.rename(columns = {'Name':'Year&Country'})
ief20_df = ief20_df.rename(columns = {'2020 Score':'IEF_Score'})
ief20_df['Year&Country'] = "2020, " + ief20_df['Year&Country'].astype(str)
ief20_df

Unnamed: 0,Year&Country,IEF_Score
0,"2020, Afghanistan",54.7
1,"2020, Albania",66.9
2,"2020, Algeria",46.9
3,"2020, Angola",52.2
4,"2020, Argentina",53.1
5,"2020, Armenia",70.6
6,"2020, Australia",82.6
7,"2020, Austria",73.3
8,"2020, Azerbaijan",69.3
9,"2020, Bahamas",64.5


In [28]:
ief19_df = pd.read_excel('/Users/Lawrence/Desktop/Georgetown_G1/PPOL_564_DS/Final_Project/PPOL_564_Final_Project_Zhan_Naiqiu_nz155/Index_Econ/index2019_data.xls', index_col = 0)
ief19_df = ief19_df.loc[:,['Country Name','2019 Score']]
ief19_df = ief19_df.rename(columns = {'Country Name':'Name'})
ief19_df['Name'] = coco.convert(names = ief19_df['Name'], to = 'name_short')
ief19_df = ief19_df.merge(un_df, how = 'inner', on = 'Name')
ief19_df = ief19_df.rename(columns = {'Name':'Year&Country'})
ief19_df = ief19_df.rename(columns = {'2019 Score':'IEF_Score'})
ief19_df['Year&Country'] = "2019, " + ief19_df['Year&Country'].astype(str)
ief19_df

Unnamed: 0,Year&Country,IEF_Score
0,"2019, Afghanistan",51.5
1,"2019, Albania",66.5
2,"2019, Algeria",46.2
3,"2019, Angola",50.6
4,"2019, Argentina",52.2
5,"2019, Armenia",67.7
6,"2019, Australia",80.9
7,"2019, Austria",72.0
8,"2019, Azerbaijan",65.4
9,"2019, Bahamas",62.9


In [29]:
ief18_df = pd.read_excel('/Users/Lawrence/Desktop/Georgetown_G1/PPOL_564_DS/Final_Project/PPOL_564_Final_Project_Zhan_Naiqiu_nz155/Index_Econ/index2018_data.xls', index_col = 0)
ief18_df = ief18_df.loc[:,['Country Name','2018 Score']]
ief18_df = ief18_df.rename(columns = {'Country Name':'Name'})
ief18_df['Name'] = coco.convert(names = ief18_df['Name'], to = 'name_short')
ief18_df = ief18_df.merge(un_df, how = 'inner', on = 'Name')
ief18_df = ief18_df.rename(columns = {'Name':'Year&Country'})
ief18_df = ief18_df.rename(columns = {'2018 Score':'IEF_Score'})
ief18_df['Year&Country'] = "2018, " + ief18_df['Year&Country'].astype(str)
ief18_df

Unnamed: 0,Year&Country,IEF_Score
0,"2018, Afghanistan",51.3
1,"2018, Albania",64.5
2,"2018, Algeria",44.7
3,"2018, Angola",48.6
4,"2018, Argentina",52.3
5,"2018, Armenia",68.7
6,"2018, Australia",80.9
7,"2018, Austria",71.8
8,"2018, Azerbaijan",64.3
9,"2018, Bahamas",63.3


In [30]:
ief17_df = pd.read_excel('/Users/Lawrence/Desktop/Georgetown_G1/PPOL_564_DS/Final_Project/PPOL_564_Final_Project_Zhan_Naiqiu_nz155/Index_Econ/index2017_data.xls', index_col = 0)
ief17_df = ief17_df.loc[:,['Country Name','2017 Score']]
ief17_df = ief17_df.rename(columns = {'Country Name':'Name'})
ief17_df['Name'] = coco.convert(names = ief17_df['Name'], to = 'name_short')
ief17_df = ief17_df.merge(un_df, how = 'inner', on = 'Name')
ief17_df = ief17_df.rename(columns = {'Name':'Year&Country'})
ief17_df = ief17_df.rename(columns = {'2017 Score':'IEF_Score'})
ief17_df['Year&Country'] = "2017, " + ief17_df['Year&Country'].astype(str)
ief17_df

Unnamed: 0,Year&Country,IEF_Score
0,"2017, Afghanistan",48.885716
1,"2017, Albania",64.353003
2,"2017, Algeria",46.528581
3,"2017, Angola",48.515521
4,"2017, Argentina",50.409088
5,"2017, Armenia",70.306017
6,"2017, Australia",81.015797
7,"2017, Austria",72.265759
8,"2017, Azerbaijan",63.599004
9,"2017, Bahamas",61.081741


In [31]:
ief16_df = pd.read_excel('/Users/Lawrence/Desktop/Georgetown_G1/PPOL_564_DS/Final_Project/PPOL_564_Final_Project_Zhan_Naiqiu_nz155/Index_Econ/index2016_data.xls', index_col = 0)
ief16_df = ief16_df.loc[:,['Country Name','2016 Score']]
ief16_df = ief16_df.rename(columns = {'Country Name':'Name'})
ief16_df['Name'] = coco.convert(names = ief16_df['Name'], to = 'name_short')
ief16_df = ief16_df.merge(un_df, how = 'inner', on = 'Name')
ief16_df = ief16_df.rename(columns = {'Name':'Year&Country'})
ief16_df = ief16_df.rename(columns = {'2016 Score':'IEF_Score'})
ief16_df['Year&Country'] = "2016, " + ief16_df['Year&Country'].astype(str)
ief16_df

Unnamed: 0,Year&Country,IEF_Score
0,"2016, Afghanistan",
1,"2016, Albania",65.92
2,"2016, Algeria",50.06
3,"2016, Angola",48.94
4,"2016, Argentina",43.77
5,"2016, Armenia",66.96
6,"2016, Australia",80.34
7,"2016, Austria",71.67
8,"2016, Azerbaijan",60.2
9,"2016, Bahamas",70.92


In [32]:
ief15_df = pd.read_excel('/Users/Lawrence/Desktop/Georgetown_G1/PPOL_564_DS/Final_Project/PPOL_564_Final_Project_Zhan_Naiqiu_nz155/Index_Econ/index2015_data.xls', index_col = 0)
ief15_df = ief15_df.loc[:,['Country Name','2015 Score']]
ief15_df = ief15_df.rename(columns = {'Country Name':'Name'})
ief15_df['Name'] = coco.convert(names = ief15_df['Name'], to = 'name_short')
ief15_df = ief15_df.merge(un_df, how = 'inner', on = 'Name')
ief15_df = ief15_df.rename(columns = {'Name':'Year&Country'})
ief15_df = ief15_df.rename(columns = {'2015 Score':'IEF_Score'})
ief15_df['Year&Country'] = "2015, " + ief15_df['Year&Country'].astype(str)
ief15_df

nan not found in ISO3
nan not found in ISO3
nan not found in ISO3
nan not found in ISO3
nan not found in ISO3
nan not found in ISO3


Unnamed: 0,Year&Country,IEF_Score
0,"2015, Afghanistan",
1,"2015, Albania",65.650034
2,"2015, Algeria",48.881858
3,"2015, Angola",47.885804
4,"2015, Argentina",44.138922
5,"2015, Armenia",67.125092
6,"2015, Australia",81.387287
7,"2015, Austria",71.185607
8,"2015, Azerbaijan",61.029808
9,"2015, Bahamas",68.711722


In [39]:
ief_frames = [ief20_df, ief19_df, ief18_df, ief17_df, ief16_df, ief15_df]
ief_df = pd.concat(ief_frames)
ief_df = ief_df.reset_index()
ief_df = ief_df.drop(['index'], axis = 1)
ief_df = ief_df.dropna()

Unnamed: 0,Year&Country,IEF_Score
0,"2020, Afghanistan",54.7
1,"2020, Albania",66.9
2,"2020, Algeria",46.9
3,"2020, Angola",52.2
4,"2020, Argentina",53.1
5,"2020, Armenia",70.6
6,"2020, Australia",82.6
7,"2020, Austria",73.3
8,"2020, Azerbaijan",69.3
9,"2020, Bahamas",64.5


In [41]:
final_df = pfi_df.merge(ief_df, how = 'inner', on = 'Year&Country')

In [42]:
final_df

Unnamed: 0,Year&Country,PFI_Rank,IEF_Score
0,"2020, Norway",1,73.4
1,"2020, Finland",2,75.7
2,"2020, Denmark",3,78.3
3,"2020, Sweden",4,74.9
4,"2020, Netherlands",5,77.0
5,"2020, Jamaica",6,68.5
6,"2020, Costa Rica",7,65.8
7,"2020, Switzerland",8,82.0
8,"2020, New Zealand",9,84.1
9,"2020, Portugal",10,67.0
