# Introduction

## Import Relevant Libraries

In [1]:
import pandas as pd 
import altair as alt
import geopandas as gpd
import os
import re
import warnings
warnings.filterwarnings('ignore')
from altair_data_server import data_server
alt.data_transformers.register('data_server', data_server)
alt.data_transformers.enable('data_server')
from scipy.interpolate import PchipInterpolator

# Independent/Dependent Variables

## Happiness (Dependent Variable)

In [2]:
happiness_path = 'Datasets(Raw)/Indep_Dep_Vars/Happiness/'

replaces = {"Eswatini, Kingdom of" : "Kingdom of Eswatini",
            "Swaziland" : "Kingdom of Eswatini",
            "Czech Republic" : "Czechia"
            }

dataframes = []

for filename in os.scandir(happiness_path):
    print("Working on: " + filename.path)

    curr_df = pd.read_csv(filename)

    # some countries have an asterisk next to their name
    curr_df['Country'] = curr_df["Country"].str.replace("*","",regex=True)

    # replace certain country names so it's all uniform
    curr_df['Country'] = curr_df['Country'].replace(replaces)

    # add "_year" after the Happiness Score for columns
    match = re.search(r"\d{4}", filename.path)
    year = match.group(0) if match else None
    curr_df.rename(columns = {"Happiness Score" : "Happiness Score " + year,
                            "Social support":"Social Support " + year,
                            "Freedom to make life choices": "Freedom " + year,
                            "Generosity": "Generosity " + year,
                            'Perceptions of corruption':"Corruption " + year
                            },
                   inplace = True)
    
    # Congo is sometimes split up, so turn it into one row
    congo_rows = curr_df[curr_df['Country'].isin(['Congo (Brazzaville)', 'Congo (Kinshasa)'])]
    if not congo_rows.empty:
        avg_row = congo_rows.mean(numeric_only=True)
        new_row = congo_rows.iloc[0].copy()
        new_row[new_row.index.str.startswith('Happiness Score ')] = avg_row
        # Set the new country name
        new_row['Country'] = 'Congo'
        # Remove the original rows from the DataFrame
        curr_df = curr_df[~curr_df['Country'].isin(['Congo (Brazzaville)', 'Congo (Kinshasa)'])]
        # Add the new row with the average values to the DataFrame
        curr_df = curr_df.append(new_row, ignore_index=True)

    dataframes.append(curr_df)

Working on: Datasets(Raw)/Indep_Dep_Vars/Happiness/2020.csv
Working on: Datasets(Raw)/Indep_Dep_Vars/Happiness/2021.csv
Working on: Datasets(Raw)/Indep_Dep_Vars/Happiness/2022.csv
Working on: Datasets(Raw)/Indep_Dep_Vars/Happiness/2019.csv


In [3]:
happiness_df = dataframes[0][['Country']]

for i, df in enumerate(dataframes):
    filtered_columns = df.filter(regex=r"Happiness Score")

    country_happiness_df = pd.concat([df['Country'], filtered_columns], axis=1)

    # Merge the result DataFrame with the country_happiness_df on the 'Country' column
    happiness_df = happiness_df.merge(country_happiness_df, on='Country', how='right')

# Split the DataFrame into the first column and the remaining columns
first_column = happiness_df.iloc[:, 0:1]
remaining_columns = happiness_df.iloc[:, 1:]

# Sort the remaining columns
sorted_remaining_columns = remaining_columns.sort_index(axis=1)

# Concatenate the first column with the sorted remaining columns
happiness_df = pd.concat([first_column, sorted_remaining_columns], axis=1)

happiness_df['Happiness Score 2022'] = happiness_df['Happiness Score 2022'].str.replace(',','.')
happiness_df['Happiness Score 2022'] = pd.to_numeric(happiness_df['Happiness Score 2022'], errors ='ignore').astype(pd.Float64Dtype())

happiness_df.dropna(subset=['Happiness Score 2019', 'Happiness Score 2020',
                            'Happiness Score 2021',	'Happiness Score 2022'],
                    inplace=True)

happiness_df.reset_index(drop=True, inplace=True)
happiness_df.columns = ['Country', 2019, 2020, 2021, 2022]
happiness_df

Unnamed: 0,Country,Happiness Score 2019,Happiness Score 2020,Happiness Score 2021,Happiness Score 2022
0,Finland,7.769,7.8087,7.842,7.821
1,Denmark,7.600,7.6456,7.620,7.636
2,Norway,7.554,7.4880,7.392,7.365
3,Iceland,7.494,7.5045,7.554,7.557
4,Netherlands,7.488,7.4489,7.464,7.415
...,...,...,...,...,...
137,Yemen,3.380,3.5274,3.658,4.197
138,Rwanda,3.334,3.3123,3.415,3.268
139,Tanzania,3.231,3.4762,3.623,3.702
140,Afghanistan,3.203,2.5669,2.523,2.404


In [4]:
happiness_df.to_csv('Datasets(Cleaned)/Indep_Dep_Vars/Happiness.csv', index=False)

## Level of Authoritarianism (Independent Variable)

In [5]:
authoritarianism_path = 'Datasets(Raw)/Indep_Dep_Vars/democracy.csv'
auth_df = pd.read_csv(authoritarianism_path)
auth_df = auth_df[auth_df['Year'] >= 2019]
auth_df = auth_df.pivot(index='Entity', columns='Year', values='democracy_eiu')
auth_df.columns = [f'Democracy Score {col}' for col in auth_df.columns]
auth_df.reset_index(inplace=True)
auth_df.rename(columns={'Entity':'Country'}, inplace=True)
auth_df

Unnamed: 0,Country,Democracy Score 2019,Democracy Score 2020,Democracy Score 2021,Democracy Score 2022
0,Afghanistan,2.850000,2.85000,0.320000,0.320000
1,Africa,4.221200,4.11640,4.071600,4.075000
2,Albania,5.890000,6.08000,6.110000,6.410000
3,Algeria,4.010000,3.77000,3.770000,3.660000
4,Angola,3.720000,3.66000,3.370000,3.960000
...,...,...,...,...,...
169,Vietnam,3.080000,2.94000,2.940000,2.730000
170,World,5.439641,5.36976,5.281377,5.293174
171,Yemen,1.950000,1.95000,1.950000,1.950000
172,Zambia,5.090000,4.86000,5.720000,5.800000


In [6]:
countries = pd.read_csv('Datasets(Raw)/geoJSON/countries.csv')
countries = set(countries['Country'])
auth_countries = set(auth_df['Country'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = auth_countries - countries

print(countries_not_in_set)

{'Fiji', 'Hong Kong', 'Palestine', 'Oman', 'Eswatini', 'Cape Verde', 'South America', 'Djibouti', 'Guinea-Bissau', 'Sudan', 'Taiwan', 'Equatorial Guinea', 'Guyana', 'Bhutan', 'Syria', 'Qatar', 'Angola', "Cote d'Ivoire", 'Cuba', 'Papua New Guinea', 'Haiti', 'North America', 'Oceania', 'Africa', 'Trinidad and Tobago', 'North Macedonia', 'Eritrea', 'Democratic Republic of Congo', 'Central African Republic', 'Europe', 'Burundi', 'Suriname', 'North Korea', 'World', 'Timor', 'Asia'}


In [7]:
# original : replace to 
replaces = {"Eswatini" : "Kingdom of Eswatini",
            "Palestine" : "Palestinian Territories",
            "Cote d'Ivoire" : "Ivory Coast",
            "North Macedonia" : "Kosovo"
            }

removes = ['World','Qatar','Burundi','Central African Republic',
           'South America','Timor','Sudan','Asia','Guyana','North Macedonia',
           'Guinea-Bissau', 'Cuba','Equatorial Guinea','Bhutan','Suriname',
           'Haiti','Cape Verde','Europe','Syria','Eritrea','Hong Kong',
           'Africa','Papua New Guinea','Oman','North America', 'Angola',
           'Djibouti','North Korea','Fiji', 'Trinidad and Tobago', 'Taiwan', 'Oceania',"Democratic Republic of Congo"]

In [8]:
# Replace values in the 'Entity' column using the replaces dictionary
auth_df['Country'] = auth_df['Country'].replace(replaces)

# Remove rows corresponding to the values in the removes list
auth_df = auth_df[~auth_df['Country'].isin(removes)]

In [9]:
dem_countries = set(auth_df['Country'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = dem_countries - countries

print(countries_not_in_set)

set()


In [10]:
test = countries - dem_countries
test

set()

In [11]:
# Define the custom function to determine the label based on the democracy score
def get_label(score):
    if 0 <= score <= 4:
        return "Authoritarian"
    elif 4 < score <= 6:
        return "Hybrid Regime"
    elif 6 < score <= 8:
        return "Flawed Democracy"
    elif 8 < score <= 10:
        return "Full Democracy"
    else:
        return "Unknown"

# Apply the custom function to create the label columns
auth_df['2019 Label'] = auth_df['Democracy Score 2019'].apply(get_label)
auth_df['2020 Label'] = auth_df['Democracy Score 2020'].apply(get_label)
auth_df['2021 Label'] = auth_df['Democracy Score 2021'].apply(get_label)
auth_df['2022 Label'] = auth_df['Democracy Score 2022'].apply(get_label)

In [12]:
auth_df.reset_index(inplace=True, drop=True)

In [13]:
auth_df

Unnamed: 0,Country,Democracy Score 2019,Democracy Score 2020,Democracy Score 2021,Democracy Score 2022,2019 Label,2020 Label,2021 Label,2022 Label
0,Afghanistan,2.85,2.85,0.32,0.32,Authoritarian,Authoritarian,Authoritarian,Authoritarian
1,Albania,5.89,6.08,6.11,6.41,Hybrid Regime,Flawed Democracy,Flawed Democracy,Flawed Democracy
2,Algeria,4.01,3.77,3.77,3.66,Hybrid Regime,Authoritarian,Authoritarian,Authoritarian
3,Argentina,7.02,6.95,6.81,6.85,Flawed Democracy,Flawed Democracy,Flawed Democracy,Flawed Democracy
4,Armenia,5.54,5.35,5.49,5.63,Hybrid Regime,Hybrid Regime,Hybrid Regime,Hybrid Regime
...,...,...,...,...,...,...,...,...,...
137,Venezuela,2.88,2.76,2.11,2.23,Authoritarian,Authoritarian,Authoritarian,Authoritarian
138,Vietnam,3.08,2.94,2.94,2.73,Authoritarian,Authoritarian,Authoritarian,Authoritarian
139,Yemen,1.95,1.95,1.95,1.95,Authoritarian,Authoritarian,Authoritarian,Authoritarian
140,Zambia,5.09,4.86,5.72,5.80,Hybrid Regime,Hybrid Regime,Hybrid Regime,Hybrid Regime


In [14]:
auth_df.to_csv('Datasets(Cleaned)/Indep_Dep_Vars/Authoritarianism.csv', index=False)

# Control Variables

## GDP Per Capita in Terms of Purchasing Power Parity Adjusted to Constant Dollars
[Source](https://data.worldbank.org/indicator/NY.GDP.PCAP.PP.CD)

In [15]:
gdp_path = 'Datasets(Raw)/Controls/GDP_per_capita_PPP_(current_international_$).csv'

gdp_df = pd.read_csv(gdp_path)

gdp_df.drop(columns = ['Country Code','Indicator Name','Indicator Code'], inplace = True)

gdp_df.dropna(how='all', axis=1, inplace=True)

columns_to_drop = list(range(1990, 2019))
columns_to_drop = [str(year) for year in columns_to_drop]

gdp_df.drop(columns=columns_to_drop, inplace=True)

gdp_df.dropna(subset=['2019', '2020', '2021'], inplace=True)

gdp_df.reset_index(drop=True, inplace=True)

gdp_df.rename(columns={'Country Name':'Country'}, inplace=True)

columns_to_round = ['2019', '2020', '2021']
gdp_df[columns_to_round] = gdp_df[columns_to_round].round(2)

gdp_df

Unnamed: 0,Country,2019,2020,2021
0,Aruba,42501.64,34971.01,42698.36
1,Africa Eastern and Southern,3777.97,3621.06,3839.47
2,Afghanistan,2167.70,2076.14,1665.81
3,Africa Western and Central,4264.73,4174.50,4409.45
4,Angola,6881.08,6362.64,6491.13
...,...,...,...,...
233,Samoa,6613.98,6394.38,6079.76
234,Kosovo,11796.15,11292.86,13055.90
235,South Africa,14436.83,13517.78,14624.42
236,Zambia,3514.69,3358.01,3555.92


In [16]:
gdp_countries = set(gdp_df['Country'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = gdp_countries - countries

print(countries_not_in_set)

{'European Union', 'Fiji', 'Oman', 'Caribbean small states', 'Middle income', 'Latin America & Caribbean', 'Brunei Darussalam', 'Bahamas, The', 'Seychelles', 'Lower middle income', 'Equatorial Guinea', 'Middle East & North Africa (IDA & IBRD countries)', 'Timor-Leste', 'Fragile and conflict affected situations', 'St. Vincent and the Grenadines', 'Middle East & North Africa', 'Grenada', 'IDA & IBRD total', 'Europe & Central Asia', 'Trinidad and Tobago', 'Egypt, Arab Rep.', 'Congo, Dem. Rep.', 'Cabo Verde', 'Suriname', 'Low & middle income', 'World', 'Upper middle income', 'West Bank and Gaza', 'Lao PDR', 'Eswatini', 'Central Europe and the Baltics', 'Djibouti', 'Bermuda', 'Late-demographic dividend', 'Marshall Islands', 'East Asia & Pacific', 'Gambia, The', 'East Asia & Pacific (IDA & IBRD countries)', 'IDA total', 'North America', 'Turks and Caicos Islands', 'North Macedonia', 'Low income', 'Tuvalu', 'IDA blend', 'Euro area', 'Africa Western and Central', 'Tonga', 'Barbados', 'Guinea-B

In [17]:
# original : replace to 
replaces = {"Cote d'Ivoire" : "Ivory Coast",
            'Iran, Islamic Rep.' : 'Iran',
            'Korea, Rep.' : 'South Korea',
            'Slovak Republic' : 'Slovakia',
            'Egypt, Arab Rep.' : 'Egypt',
            'Turkiye' : 'Turkey',
            'Congo, Rep.' : 'Congo',
            'Eswatini' : 'Kingdom of Eswatini',
            'Russian Federation' : 'Russia',
            'Gambia, The' : 'Gambia',
            'Kyrgyz Republic' : 'Kyrgyzstan',
            'Middle East & North Africa (excluding high income)' : 'Kuwait' ,
            'Lao PDR' : 'Laos',
            'West Bank and Gaza' : 'Palestinian Territories',
            'Latin America & Caribbean' : 'Venezuela',
            'Europe & Central Asia' : 'Turkmenistan',
            'Pre-demographic dividend' : 'Yemen'
            }

removes = ['Early-demographic dividend', 'Sudan', 'East Asia & Pacific', 
           'Sub-Saharan Africa', 'Bahamas, The', 'Aruba', 'Turks and Caicos Islands', 'East Asia & Pacific (IDA & IBRD countries)', 'Euro area', 'Qatar', 'Bermuda', 'Late-demographic dividend', 'North Macedonia', 'Dominica', 'IDA & IBRD total', 'Brunei Darussalam', 'Somalia', 'Post-demographic dividend', 'Cayman Islands', 'Other small states', 'Low & middle income', 'Samoa', 'Djibouti', 'North America', 'World', 'Micronesia, Fed. Sts.' , 'Nauru', 'Sub-Saharan Africa (IDA & IBRD countries)', 'Central Europe and the Baltics', 'Palau', 'IDA blend', 'Puerto Rico', 'Suriname', 'Upper middle income', 'Africa Eastern and Southern', 'Tonga', 'Low income', 'Middle income', 'Pacific island small states', 'Trinidad and Tobago', 'European Union','IDA only', 'St. Lucia', 'Middle East & North Africa', 'Timor-Leste', 'High income', 'East Asia & Pacific (excluding high income)', 'Burundi', 'Europe & Central Asia (excluding high income)',  'South Asia', 'St. Vincent and the Grenadines', 'Hong Kong SAR, China', 'Fragile and conflict affected situations', 'Europe & Central Asia (IDA & IBRD countries)', 'Kiribati', 'Curacao', 'Guinea-Bissau', 'Latin America & Caribbean (excluding high income)', 'OECD members', 'Heavily indebted poor countries (HIPC)', 'Sao Tome and Principe', 'Oman', 'Middle East & North Africa (IDA & IBRD countries)','Solomon Islands', 'Belize', 'Cabo Verde', 'Congo, Dem. Rep.', 'Antigua and Barbuda', 'Central African Republic', 'South Asia (IDA & IBRD)','Guyana', 'Latin America & the Caribbean (IDA & IBRD countries)', 'Fiji', 'Macao SAR, China', 'IDA total', 'Tuvalu', 'Marshall Islands', 'Barbados', 'Vanuatu', 'Sub-Saharan Africa (excluding high income)', 'Least developed countries: UN classification','Gambia, The', 'Caribbean small states','Bhutan', 'Equatorial Guinea', 'St. Kitts and Nevis', 'Arab World', 'Africa Western and Central', 'Seychelles', 'Papua New Guinea', 'Haiti', 'Maldives', 'Angola', 'IBRD only', 'Small states', 'Grenada', 'Lower middle income']

In [18]:
# Replace values in the 'Entity' column using the replaces dictionary
gdp_df['Country'] = gdp_df['Country'].replace(replaces)

# Remove rows corresponding to the values in the removes list
gdp_df = gdp_df[~gdp_df['Country'].isin(removes)]

In [19]:
gdp_countries = set(gdp_df['Country'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = gdp_countries - countries

print(countries_not_in_set)

set()


In [20]:
test = countries - gdp_countries
test

set()

In [21]:
def interpolate_gdp(row):
    years = [2019, 2020, 2021]
    gdp = [row['2019'], row['2020'], row['2021']]
    interpolator = PchipInterpolator(years, gdp)
    return float(interpolator(2022))

gdp_df['2022'] = gdp_df.apply(interpolate_gdp, axis=1)

# Round to 2 decimal places
gdp_df['2022'] = gdp_df['2022'].round(2)


In [22]:
gdp_df.reset_index(inplace=True, drop=True)
gdp_df

Unnamed: 0,Country,2019,2020,2021,2022
0,Afghanistan,2167.70,2076.14,1665.81,1139.17
1,Albania,14407.37,14033.98,15810.43,18333.66
2,United Arab Emirates,74811.69,71374.18,76609.20,88719.24
3,Argentina,23003.28,20763.29,23649.67,31016.03
4,Armenia,14921.82,14089.24,15592.49,18760.90
...,...,...,...,...,...
137,Vietnam,10684.69,11022.96,11676.11,12544.13
138,Kosovo,11796.15,11292.86,13055.90,15825.52
139,South Africa,14436.83,13517.78,14624.42,17569.16
140,Zambia,3514.69,3358.01,3555.92,4067.19


In [23]:
gdp_df.to_csv('Datasets(Cleaned)/Controls/GDP.csv', index = False)

## Healthy Life Expectancy

In [24]:
hle_path = 'Datasets(Raw)/Controls/HLE_by_country.csv'

hle_df = pd.read_csv(hle_path)

hle_df.rename(columns={'Unnamed: 0':'Country', 'Unnamed: 1':'Year'}, inplace=True)
selected_columns = [
    'Country',
    'Year',
    'Life expectancy at birth (years)',
    'Life expectancy at age 60 (years)',
    'Healthy life expectancy (HALE) at birth (years)',
    'Healthy life expectancy (HALE) at age 60 (years)',
]
# this is for both sexes
hle_df = hle_df[selected_columns]
hle_df.drop(0, inplace=True)

columns_to_convert = [
    'Life expectancy at birth (years)',
    'Life expectancy at age 60 (years)',
    'Healthy life expectancy (HALE) at birth (years)',
    'Healthy life expectancy (HALE) at age 60 (years)',
]
hle_df[columns_to_convert] = hle_df[columns_to_convert].astype(float)

hle_df['Year'] = hle_df['Year'].astype(int)

hle_df

Unnamed: 0,Country,Year,Life expectancy at birth (years),Life expectancy at age 60 (years),Healthy life expectancy (HALE) at birth (years),Healthy life expectancy (HALE) at age 60 (years)
1,Afghanistan,2019,63.2,15.2,53.9,10.8
2,Afghanistan,2015,61.7,15.6,52.6,11.2
3,Afghanistan,2010,59.9,15.1,51.1,10.9
4,Afghanistan,2000,55.0,13.9,46.8,10.2
5,Albania,2019,78.0,21.0,69.1,16.6
...,...,...,...,...,...,...
728,Zambia,2000,44.5,13.2,39.0,10.0
729,Zimbabwe,2019,60.7,15.6,53.1,11.5
730,Zimbabwe,2015,58.5,15.1,51.2,11.2
731,Zimbabwe,2010,51.5,14.1,45.2,10.5


In [25]:
pivot_df = hle_df.melt(id_vars=["Country", "Year"], 
                       value_vars=['Life expectancy at birth (years)', 
                                   'Life expectancy at age 60 (years)', 
                                   'Healthy life expectancy (HALE) at birth (years)', 
                                   'Healthy life expectancy (HALE) at age 60 (years)'], 
                       var_name='Measure', 
                       value_name='Value')

pivot_df['Column'] = pivot_df['Measure'] + " " + pivot_df['Year'].astype(str)

hle = pivot_df.pivot(index='Country', columns='Column', values='Value').reset_index()


In [26]:
hle

Column,Country,Healthy life expectancy (HALE) at age 60 (years) 2000,Healthy life expectancy (HALE) at age 60 (years) 2010,Healthy life expectancy (HALE) at age 60 (years) 2015,Healthy life expectancy (HALE) at age 60 (years) 2019,Healthy life expectancy (HALE) at birth (years) 2000,Healthy life expectancy (HALE) at birth (years) 2010,Healthy life expectancy (HALE) at birth (years) 2015,Healthy life expectancy (HALE) at birth (years) 2019,Life expectancy at age 60 (years) 2000,Life expectancy at age 60 (years) 2010,Life expectancy at age 60 (years) 2015,Life expectancy at age 60 (years) 2019,Life expectancy at birth (years) 2000,Life expectancy at birth (years) 2010,Life expectancy at birth (years) 2015,Life expectancy at birth (years) 2019
0,Afghanistan,10.2,10.9,11.2,10.8,46.8,51.1,52.6,53.9,13.9,15.1,15.6,15.2,55.0,59.9,61.7,63.2
1,Albania,15.0,16.7,16.7,16.6,65.2,67.6,69.0,69.1,19.0,21.3,21.1,21.0,73.5,76.2,77.8,78.0
2,Algeria,14.6,15.6,15.9,16.0,62.7,65.5,66.0,66.4,19.8,21.4,21.8,22.0,72.2,75.9,76.5,77.1
3,Angola,10.7,11.9,12.4,12.6,42.9,50.6,53.7,54.8,14.4,16.0,16.7,17.0,49.3,58.1,61.7,63.1
4,Antigua and Barbuda,15.7,15.8,15.8,15.8,65.5,66.7,66.8,67.0,20.3,20.5,20.4,20.6,74.6,75.9,76.1,76.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178,Venezuela (Bolivarian Republic of),16.0,16.5,16.4,16.3,64.7,65.3,65.1,64.4,21.3,21.9,21.8,21.7,74.1,74.8,74.7,73.9
179,Viet Nam,14.3,14.5,14.7,14.8,63.3,64.5,64.9,65.3,18.9,19.2,19.4,19.6,71.4,72.7,73.2,73.7
180,Yemen,12.6,13.3,13.5,13.3,54.5,58.6,58.4,57.5,16.8,17.8,18.0,17.8,62.7,67.7,67.5,66.6
181,Zambia,10.0,11.7,12.3,12.6,39.0,49.4,52.7,54.4,13.2,15.4,16.1,16.5,44.5,56.7,60.5,62.5


In [27]:
hle_countries = set(hle['Country'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = hle_countries - countries

print(countries_not_in_set)

{'Fiji', 'Micronesia (Federated States of)', 'Oman', 'Eswatini', "Lao People's Democratic Republic", 'South Sudan', 'Tonga', 'Somalia', 'Türkiye', 'Barbados', 'Brunei Darussalam', 'Djibouti', 'Seychelles', 'Guinea-Bissau', 'Sudan', 'Maldives', 'Democratic Republic of the Congo', 'Russian Federation', 'Equatorial Guinea', 'Viet Nam', 'Guyana', 'Bahamas', 'Republic of Korea', 'Solomon Islands', 'United Republic of Tanzania', 'Iran (Islamic Republic of)', 'Timor-Leste', 'United States of America', 'Bhutan', 'Qatar', 'Angola', 'Cuba', 'Grenada', 'Papua New Guinea', 'Haiti', "Democratic People's Republic of Korea", 'Samoa', 'Venezuela (Bolivarian Republic of)', "Côte d'Ivoire", 'Belize', 'Kiribati', 'United Kingdom of Great Britain and Northern Ireland', 'Bolivia (Plurinational State of)', 'Trinidad and Tobago', 'Eritrea', 'North Macedonia', 'Central African Republic', 'Saint Vincent and the Grenadines', 'Sao Tome and Principe', 'Syrian Arab Republic', 'Antigua and Barbuda', 'Republic of Mo

In [28]:
# original : replace to 
replaces = {"Viet Nam" : "Vietnam",
            'United Republic of Tanzania' : "Tanzania",
            'Bolivia (Plurinational State of)' : "Bolivia",
            'Iran (Islamic Republic of)' : "Iran",
            "Democratic People's Republic of Korea" : "South Korea", 
            "Côte d'Ivoire" : "Ivory Coast", 
            'Türkiye' : "Turkey",
            'Republic of Moldova' : "Moldova",
            'Eswatini' : "Kingdom of Eswatini",
            "Lao People's Democratic Republic" : "Laos",
            'United States of America' : "United States",
            'Democratic Republic of the Congo' : "Congo",
            'United Kingdom of Great Britain and Northern Ireland' : "United Kingdom",
            "Russian Federation":"Russia",
            "Republic of Korea" : "South Korea",
            'Venezuela (Bolivarian Republic of)' : "Venezuela",
            'North Macedonia' : "Kosovo",
            'Syrian Arab Republic' : 'Palestinian Territories'
            }

removes = ['South Sudan', 'Angola','Saint Vincent and the Grenadines', "Fiji", 
           "Vanuatu", 'Djibouti', 'Bahamas', 'Guyana','Cabo Verde', 'Somalia', 'Seychelles', 'Cuba','Tonga', 'Guinea-Bissau', 'Belize',  'Eritrea', 'Haiti', 'Sao Tome and Principe', 'Burundi', 'Oman', 'Qatar', 'Suriname', 'Maldives', 'Samoa', 'Grenada', 'Barbados', 'Trinidad and Tobago','Brunei Darussalam', 'Timor-Leste', 'Antigua and Barbuda', 'Saint Lucia', 'Equatorial Guinea','Micronesia (Federated States of)', 'Sudan', 'Central African Republic','Papua New Guinea','Kiribati', 'Bhutan', 'Solomon Islands'
           ]

In [29]:
# Replace values in the 'Entity' column using the replaces dictionary
hle['Country'] = hle['Country'].replace(replaces)

# Remove rows corresponding to the values in the removes list
hle = hle[~hle['Country'].isin(removes)]

In [30]:
hle_countries = set(hle['Country'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = hle_countries - countries

print(countries_not_in_set)

set()


In [31]:
test = countries - hle_countries
test

set()

In [32]:
hle = hle.drop_duplicates(subset='Country', keep='first')
hle.reset_index(inplace = True, drop=True)

In [33]:
hle

Column,Country,Healthy life expectancy (HALE) at age 60 (years) 2000,Healthy life expectancy (HALE) at age 60 (years) 2010,Healthy life expectancy (HALE) at age 60 (years) 2015,Healthy life expectancy (HALE) at age 60 (years) 2019,Healthy life expectancy (HALE) at birth (years) 2000,Healthy life expectancy (HALE) at birth (years) 2010,Healthy life expectancy (HALE) at birth (years) 2015,Healthy life expectancy (HALE) at birth (years) 2019,Life expectancy at age 60 (years) 2000,Life expectancy at age 60 (years) 2010,Life expectancy at age 60 (years) 2015,Life expectancy at age 60 (years) 2019,Life expectancy at birth (years) 2000,Life expectancy at birth (years) 2010,Life expectancy at birth (years) 2015,Life expectancy at birth (years) 2019
0,Afghanistan,10.2,10.9,11.2,10.8,46.8,51.1,52.6,53.9,13.9,15.1,15.6,15.2,55.0,59.9,61.7,63.2
1,Albania,15.0,16.7,16.7,16.6,65.2,67.6,69.0,69.1,19.0,21.3,21.1,21.0,73.5,76.2,77.8,78.0
2,Algeria,14.6,15.6,15.9,16.0,62.7,65.5,66.0,66.4,19.8,21.4,21.8,22.0,72.2,75.9,76.5,77.1
3,Argentina,15.6,16.0,16.2,16.3,65.1,66.3,66.9,67.1,20.2,20.6,21.0,21.1,74.1,75.4,76.2,76.6
4,Armenia,14.5,14.6,15.0,15.7,63.5,64.8,66.0,67.1,18.8,18.8,19.4,20.4,71.9,73.1,74.5,76.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,Venezuela,16.0,16.5,16.4,16.3,64.7,65.3,65.1,64.4,21.3,21.9,21.8,21.7,74.1,74.8,74.7,73.9
138,Vietnam,14.3,14.5,14.7,14.8,63.3,64.5,64.9,65.3,18.9,19.2,19.4,19.6,71.4,72.7,73.2,73.7
139,Yemen,12.6,13.3,13.5,13.3,54.5,58.6,58.4,57.5,16.8,17.8,18.0,17.8,62.7,67.7,67.5,66.6
140,Zambia,10.0,11.7,12.3,12.6,39.0,49.4,52.7,54.4,13.2,15.4,16.1,16.5,44.5,56.7,60.5,62.5


In [34]:
# For 'Life expectancy at birth' metrics
le_at_birth = hle.filter(regex='Country|Life expectancy at birth')
new_column_names = {
    "Life expectancy at birth (years) 2000": "2000",
    "Life expectancy at birth (years) 2010": "2010",
    "Life expectancy at birth (years) 2015": "2015",
    "Life expectancy at birth (years) 2019": "2019",
    # Add mappings for other columns as needed.
}
le_at_birth.rename(columns= new_column_names, inplace=True)

# For 'Healthy life expectancy (HALE) at birth' metrics
hle_at_birth = hle.filter(regex='Country|Healthy life expectancy \(HALE\) at birth')
new_column_names = {
    "Healthy life expectancy (HALE) at birth (years) 2000": "2000",
    "Healthy life expectancy (HALE) at birth (years) 2010": "2010",
    "Healthy life expectancy (HALE) at birth (years) 2015": "2015",
    "Healthy life expectancy (HALE) at birth (years) 2019": "2019",
    # Add mappings for other columns as needed.
}
hle_at_birth.rename(columns =new_column_names, inplace=True)

# For 'Life expectancy at age 60' metrics
le_at_60 = hle.filter(regex='Country|Life expectancy at age 60')
new_column_names = {
    "Life expectancy at age 60 (years) 2000": "2000",
    "Life expectancy at age 60 (years) 2010": "2010",
    "Life expectancy at age 60 (years) 2015": "2015",
    "Life expectancy at age 60 (years) 2019": "2019",
    # Add mappings for other columns as needed.
}
le_at_60.rename(columns =new_column_names, inplace=True)

# For 'Healthy life expectancy (HALE) at age 60' metrics
hle_at_60 = hle.filter(regex='Country|Healthy life expectancy \(HALE\) at age 60')
new_column_names = {
    "Healthy life expectancy (HALE) at age 60 (years) 2000": "2000",
    "Healthy life expectancy (HALE) at age 60 (years) 2010": "2010",
    "Healthy life expectancy (HALE) at age 60 (years) 2015": "2015",
    "Healthy life expectancy (HALE) at age 60 (years) 2019": "2019",
    # Add mappings for other columns as needed.
}
hle_at_60.rename(columns =new_column_names, inplace=True)

In [35]:
def interpolate_data(df):
    # Years present in the original data
    years_original = ['2000', '2010', '2015', '2019']
    
    # Years to be interpolated
    years_to_interpolate = ['2020', '2021', '2022']

    # Iterate over each row (i.e., each country)
    for index, row in df.iterrows():
        # Retrieve original values for the years present in the data
        values = [row[year] for year in years_original]

        # Create the interpolator
        interpolator = PchipInterpolator(list(map(int, years_original)), values)
        
        # Interpolate the missing years
        for year in years_to_interpolate:
            df.loc[index, year] = interpolator(float(year))
            df[year] = df[year].round(2)
        df.reset_index(inplace=True, drop=True)
    return df




In [36]:
le_at_birth = interpolate_data(le_at_birth)
hle_at_birth = interpolate_data(hle_at_birth)
le_at_60 = interpolate_data(le_at_60)
hle_at_60 = interpolate_data(hle_at_60)

In [37]:
le_at_birth.to_csv('Datasets(Cleaned)/Controls/Life_Expectancy/le_at_birth.csv',index=False)
hle_at_birth.to_csv('Datasets(Cleaned)/Controls/Life_Expectancy/hle_at_birth.csv',index=False)
le_at_60.to_csv('Datasets(Cleaned)/Controls/Life_Expectancy/le_at_60.csv',index=False)
hle_at_60.to_csv('Datasets(Cleaned)/Controls/Life_Expectancy/hle_at_60.csv',index=False)

## Social Support

In [38]:
ss_df = dataframes[0][['Country']]

for i, df in enumerate(dataframes):
    filtered_columns = df.filter(regex=r"Social Support")

    country_ss_df = pd.concat([df['Country'], filtered_columns], axis=1)

    # Merge the result DataFrame with the country_happiness_df on the 'Country' column
    ss_df = ss_df.merge(country_ss_df, on='Country', how='right')

# Split the DataFrame into the first column and the remaining columns
first_column = ss_df.iloc[:, 0:1]
remaining_columns = ss_df.iloc[:, 1:]

# Sort the remaining columns
sorted_remaining_columns = remaining_columns.sort_index(axis=1)

# Concatenate the first column with the sorted remaining columns
ss_df = pd.concat([first_column, sorted_remaining_columns], axis=1)

ss_df['Social Support 2022'] = ss_df['Social Support 2022'].str.replace(',','.')
ss_df['Social Support 2022'] = pd.to_numeric(ss_df['Social Support 2022'], errors ='ignore').astype(pd.Float64Dtype())

ss_df.dropna(subset=['Social Support 2019', 'Social Support 2020',
                            'Social Support 2021',	'Social Support 2022'],
                    inplace=True)

ss_df.reset_index(drop=True, inplace=True)
ss_df.columns = ['Country', 2019, 2020, 2021, 2022]
ss_df

Unnamed: 0,Country,Social Support 2019,Social Support 2020,Social Support 2021,Social Support 2022
0,Finland,1.587,0.954330,0.954,1.258
1,Denmark,1.573,0.955991,0.954,1.243
2,Norway,1.582,0.952487,0.954,1.239
3,Iceland,1.624,0.974670,0.983,1.32
4,Netherlands,1.522,0.939139,0.942,1.206
...,...,...,...,...,...
137,Yemen,1.163,0.817981,0.832,1.043
138,Rwanda,0.711,0.540835,0.552,0.133
139,Tanzania,0.885,0.688933,0.702,0.597
140,Afghanistan,0.517,0.470367,0.463,0.0


In [39]:
ss_df.to_csv('Datasets(Cleaned)/Controls/social_support.csv', index=False)

## Freedom

In [40]:
freedom_df = dataframes[0][['Country']]

for i, df in enumerate(dataframes):
    filtered_columns = df.filter(regex=r"Freedom")

    country_freedom_df = pd.concat([df['Country'], filtered_columns], axis=1)

    # Merge the result DataFrame with the country_happiness_df on the 'Country' column
    freedom_df = freedom_df.merge(country_freedom_df, on='Country', how='right')

# Split the DataFrame into the first column and the remaining columns
first_column = freedom_df.iloc[:, 0:1]
remaining_columns = freedom_df.iloc[:, 1:]

# Sort the remaining columns
sorted_remaining_columns = remaining_columns.sort_index(axis=1)

# Concatenate the first column with the sorted remaining columns
freedom_df = pd.concat([first_column, sorted_remaining_columns], axis=1)

freedom_df['Freedom 2022'] = freedom_df['Freedom 2022'].str.replace(',','.')
freedom_df['Freedom 2022'] = pd.to_numeric(freedom_df['Freedom 2022'], errors ='ignore').astype(pd.Float64Dtype())

freedom_df.dropna(subset=['Freedom 2019', 'Freedom 2020', 'Freedom 2021', 'Freedom 2022'], inplace=True)

freedom_df.drop(columns=['Explained by: Freedom to make life choices_x',
                         'Explained by: Freedom to make life choices_y'],inplace=True)

freedom_df.reset_index(drop=True, inplace=True)
freedom_df.columns = ['Country', 2019, 2020, 2021, 2022]
freedom_df

Unnamed: 0,Country,Freedom 2019,Freedom 2020,Freedom 2021,Freedom 2022
0,Finland,0.596,0.949172,0.949,0.736
1,Denmark,0.592,0.951444,0.946,0.719
2,Norway,0.603,0.955750,0.960,0.728
3,Iceland,0.591,0.948892,0.955,0.718
4,Netherlands,0.557,0.908548,0.913,0.651
...,...,...,...,...,...
137,Yemen,0.143,0.599920,0.602,0.33
138,Rwanda,0.555,0.900589,0.897,0.621
139,Tanzania,0.417,0.821540,0.833,0.578
140,Afghanistan,0.000,0.396573,0.382,0.0


In [41]:
freedom_df.to_csv('Datasets(Cleaned)/Controls/freedom.csv',index=False)

## Generosity

In [44]:
generosity_df = dataframes[0][['Country']]

for i, df in enumerate(dataframes):
    filtered_columns = df.filter(regex=r"Generosity")

    country_generosity_df = pd.concat([df['Country'], filtered_columns], axis=1)

    # Merge the result DataFrame with the country_happiness_df on the 'Country' column
    generosity_df = generosity_df.merge(country_generosity_df, on='Country', how='right')

# Split the DataFrame into the first column and the remaining columns
first_column = generosity_df.iloc[:, 0:1]
remaining_columns = generosity_df.iloc[:, 1:]

# Sort the remaining columns
sorted_remaining_columns = remaining_columns.sort_index(axis=1)

# Concatenate the first column with the sorted remaining columns
generosity_df = pd.concat([first_column, sorted_remaining_columns], axis=1)

generosity_df['Generosity 2022'] = generosity_df['Generosity 2022'].str.replace(',','.')
generosity_df['Generosity 2022'] = pd.to_numeric(generosity_df['Generosity 2022'], errors ='ignore').astype(pd.Float64Dtype())

generosity_df.dropna(subset=['Generosity 2019', 'Generosity 2020', 'Generosity 2021', 'Generosity 2022'], inplace=True)

generosity_df.drop(columns=['Explained by: Generosity_x',
                         'Explained by: Generosity_y'],inplace=True)

generosity_df.reset_index(drop=True, inplace=True)

generosity_df.columns = ['Country', 2019, 2020, 2021, 2022]
generosity_df

Unnamed: 0,Country,2019,2020,2021,2022
0,Finland,0.153,-0.059482,-0.098,0.109
1,Denmark,0.252,0.066202,0.030,0.188
2,Norway,0.271,0.134533,0.093,0.217
3,Iceland,0.354,0.246944,0.160,0.27
4,Netherlands,0.322,0.207612,0.175,0.271
...,...,...,...,...,...
137,Yemen,0.108,-0.157735,-0.147,0.09
138,Rwanda,0.217,0.055484,0.061,0.187
139,Tanzania,0.276,0.109669,0.183,0.248
140,Afghanistan,0.158,-0.096429,-0.102,0.089


In [None]:
generosity_df.to_csv('Datasets(Cleaned)/Controls/generosity.csv', index=False)

## Corruption

In [47]:
corruption_df = dataframes[0][['Country']]

for i, df in enumerate(dataframes):
    filtered_columns = df.filter(regex=r"Corruption")

    country_corruption_df = pd.concat([df['Country'], filtered_columns], axis=1)

    # Merge the result DataFrame with the country_happiness_df on the 'Country' column
    corruption_df = corruption_df.merge(country_corruption_df, on='Country', how='right')

# Split the DataFrame into the first column and the remaining columns
first_column = corruption_df.iloc[:, 0:1]
remaining_columns = corruption_df.iloc[:, 1:]

# Sort the remaining columns
sorted_remaining_columns = remaining_columns.sort_index(axis=1)

# Concatenate the first column with the sorted remaining columns
corruption_df = pd.concat([first_column, sorted_remaining_columns], axis=1)

corruption_df['Corruption 2022'] = corruption_df['Corruption 2022'].str.replace(',','.')
corruption_df['Corruption 2022'] = pd.to_numeric(corruption_df['Corruption 2022'], errors ='ignore').astype(pd.Float64Dtype())

corruption_df.dropna(subset=['Corruption 2019', 'Corruption 2020', 'Corruption 2021', 'Corruption 2022'], inplace=True)


corruption_df.reset_index(drop=True, inplace=True)

corruption_df.columns = ['Country', 2019, 2020, 2021, 2022]
corruption_df

Unnamed: 0,Country,2019,2020,2021,2022
0,Finland,0.393,0.195445,0.186,0.534
1,Denmark,0.410,0.168489,0.179,0.532
2,Norway,0.341,0.263218,0.270,0.474
3,Iceland,0.118,0.711710,0.673,0.191
4,Netherlands,0.298,0.364717,0.338,0.419
...,...,...,...,...,...
137,Yemen,0.077,0.800288,0.800,0.098
138,Rwanda,0.411,0.183541,0.167,0.544
139,Tanzania,0.147,0.619799,0.577,0.27
140,Afghanistan,0.025,0.933687,0.924,0.005


In [None]:
corruption_df.to_csv('Datasets(Cleaned)/Controls/corruption.csv', index=False)

# Instrumental Variables

## Political Regime Characteristics (1800-2019)
[Source](https://www.systemicpeace.org/inscrdata.htm) 

In [38]:
pr_df = pd.read_excel('Datasets(Raw)/Instruments/regime.xls')
pr_df.rename(columns={'country':'Country',"year":"Year"}, inplace=True)
keep = ["Country", "Year", "polity"]
pr_df = pr_df[keep]

In [39]:
# Unpivot/Melt the DataFrame
pr_melted = pr_df.melt(id_vars=["Country", "Year"], 
                       value_vars=[ "polity"],
                       var_name="Metric", 
                       value_name="Value")

# Combine 'Year' and 'Metric' into a single column
pr_melted['YearMetric'] = pr_melted['Year'].astype(str)

# Drop duplicates based on 'Country' and 'YearMetric'
pr_melted.drop_duplicates(subset=['Country', 'YearMetric'], inplace=True)

# Pivot the DataFrame
pr_pivoted = pr_melted.pivot(index='Country', columns='YearMetric', values='Value')

# Display the pivoted DataFrame
pr_pivoted.reset_index(inplace=True)

pr_pivoted


YearMetric,Country,1776,1777,1778,1779,1780,1781,1782,1783,1784,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Afghanistan,,,,,,,,,,...,-66.0,-66.0,-66.0,-1.0,-1.0,-1.0,-1.0,-1.0,,
1,Albania,,,,,,,,,,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,,
2,Algeria,,,,,,,,,,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,
3,Angola,,,,,,,,,,...,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,,
4,Argentina,,,,,,,,,,...,8.0,8.0,8.0,8.0,9.0,9.0,9.0,9.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,Yemen North,,,,,,,,,,...,,,,,,,,,,
191,Yemen South,,,,,,,,,,...,,,,,,,,,,
192,Yugoslavia,,,,,,,,,,...,,,,,,,,,,
193,Zambia,,,,,,,,,,...,7.0,7.0,7.0,7.0,7.0,6.0,6.0,6.0,,


In [40]:
pr_countries = set(pr_pivoted['Country'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = pr_countries - countries

print(countries_not_in_set)

{'Burundi', 'Korea', 'Swaziland', 'Prussia', 'Sudan-North', 'Qatar', 'South Sudan', 'Guyana', 'UAE', 'Equatorial Guinea', 'Guinea-Bissau', 'Papua New Guinea', 'Serbia and Montenegro', 'Yemen North', 'Slovak Republic', 'Eritrea', 'Saxony', 'Central African Republic', 'Haiti', 'Czech Republic', 'Djibouti', 'Bosnia', 'Orange Free State', 'Oman', 'Parma', 'Timor Leste', 'Germany East', 'Sardinia', 'Sudan', 'Fiji', 'Congo-Brazzaville', 'Germany West', 'Suriname', 'Taiwan', 'Korea South', 'Wuerttemburg', 'Yemen South', 'Myanmar (Burma)', 'Tuscany', 'Two Sicilies', 'Syria', 'Angola', 'USSR', 'Vietnam North', 'Somalia', 'Trinidad and Tobago', 'Solomon Islands', 'Bavaria', 'Congo Kinshasa', 'Cape Verde', 'United Province CA', 'Bhutan', 'Cuba', 'Yugoslavia', "Cote D'Ivoire", 'Modena', 'Korea North', 'Czechoslovakia', 'Papal States', 'Baden', 'South Vietnam', 'Gran Colombia', 'Macedonia', 'Congo Brazzaville'}


In [41]:
# original : replace to 
replaces = { "Korea": "South Korea",
            "Czechoslovakia": "Czechia",
            "Swaziland": "Kingdom of Eswatini",
            'Serbia and Montenegro' : "Serbia",
            'Myanmar (Burma)' : "Myanmar",
            "Vietnam North" : "Vietnam",
            'UAE' : "United Arab Emirates",
            "Cote D'Ivoire" : "Ivory Coast",
            'USSR': "Russia",
            'Slovak Republic' : "Slovakia",
            "Bosnia" : "Bosnia and Herzegovina",
            'Congo-Brazzaville': "Congo",
            "Prussia" : "Iceland",
            'Two Sicilies' : "Malta",
            "Syria":"Palestinian Territories"
            }

removes = ['Guyana', 'Angola', 'Prussia', 'Cape Verde', 'Suriname', 
           'Equatorial Guinea', 'Korea North','Taiwan', 'Haiti', 'Saxony', 'Yugoslavia', 'Djibouti', 'South Sudan', 
            'Czech Republic', 'Tuscany', 'Modena', 'Sudan', 
           'Papal States', 'Trinidad and Tobago','Cuba', 'Fiji', 
           'Guinea-Bissau','Burundi', 'Oman', 'South Vietnam', 'Eritrea', 'Yemen South', 'Timor Leste', 'Bavaria', 'Somalia',  'Gran Colombia', 'Germany East', 'Central African Republic', 'Papua New Guinea', 'Orange Free State', 'Germany West', 'Sardinia', 'Baden', 'Wuerttemburg', 'Congo Kinshasa', 'United Province CA', 'Macedonia', 'Korea South', 'Sudan-North', 'Qatar', 'Parma', 'Bhutan','Yemen North', 'Solomon Islands','Congo Brazzaville'
           ]

In [42]:
# Replace values in the 'Entity' column using the replaces dictionary
pr_pivoted['Country'] = pr_pivoted['Country'].replace(replaces)

# Remove rows corresponding to the values in the removes list
pr_pivoted = pr_pivoted[~pr_pivoted['Country'].isin(removes)]

In [43]:
pr_countries = set(pr_pivoted['Country'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = pr_countries - countries

print(countries_not_in_set)

set()


In [44]:
test = countries - pr_countries
test

set()

In [45]:
cols_to_drop = [str(year) for year in range(1776, 1800)]
pr_pivoted = pr_pivoted.drop(columns=cols_to_drop)
pr_pivoted

YearMetric,Country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Afghanistan,-6.0,-6.0,-6.0,-6.0,-6.0,-6.0,-6.0,-6.0,-6.0,...,-66.0,-66.0,-66.0,-1.0,-1.0,-1.0,-1.0,-1.0,,
1,Albania,,,,,,,,,,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,,
2,Algeria,,,,,,,,,,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,
4,Argentina,,,,,,,,,,...,8.0,8.0,8.0,8.0,9.0,9.0,9.0,9.0,,
5,Armenia,,,,,,,,,,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,7.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,Vietnam,,,,,,,,,,...,-7.0,-7.0,-7.0,-7.0,-7.0,-7.0,-7.0,-7.0,,
187,Vietnam,,,,,,,,,,...,,,,,,,,,,
189,Yemen,,,,,,,,,,...,-2.0,3.0,3.0,-77.0,-77.0,-77.0,-77.0,-77.0,,
193,Zambia,,,,,,,,,,...,7.0,7.0,7.0,7.0,7.0,6.0,6.0,6.0,,


In [46]:
pr_pivoted.to_csv("Datasets(Cleaned)/Instruments/political_regime.csv",index=False)

## UCDP/PRIO Armed Conflict Dataset (1946 - 2021)
Conflict Type: 1 means extrasystemic, 2 means interstate, 3 means intrastate, and 4 means internationalized intrastate

In [47]:
armed_conflict_df = pd.read_csv('Datasets(Raw)/Instruments/armed_conflict.csv')
armed_conflict_df.rename(columns={'side_a':'Country A',
                      'side_b':'Country B',
                      "year":"Year",
                      "type_of_conflict":"Conflict Type",
                      "cumulative_intensity":"Cumulative Intensity"}, inplace=True)
keep = ['Country A', 'Country B','Year','Conflict Type','Cumulative Intensity']
armed_conflict_df = armed_conflict_df[keep]
armed_conflict_df

Unnamed: 0,Country A,Country B,Year,Conflict Type,Cumulative Intensity
0,Government of India,GNLA,2012,3,0
1,Government of India,GNLA,2014,3,0
2,Government of Egypt,Government of Israel,1967,2,1
3,Government of Egypt,Government of Israel,1969,2,1
4,Government of Egypt,Government of Israel,1970,2,1
...,...,...,...,...,...
2563,Government of Myanmar (Burma),LNUP,1979,3,0
2564,Government of Myanmar (Burma),LNUP,1980,3,0
2565,Government of Myanmar (Burma),LNUP,1981,3,0
2566,Government of Myanmar (Burma),LNUP,1982,3,0


In [48]:
armed_conflict_df['Country A'] = armed_conflict_df['Country A'].str.replace('Government of ', '', regex=False)
armed_conflict_df['Country B'] = armed_conflict_df['Country B'].str.replace('Government of ', '', regex=False)


In [49]:
armed_conflict_df

Unnamed: 0,Country A,Country B,Year,Conflict Type,Cumulative Intensity
0,India,GNLA,2012,3,0
1,India,GNLA,2014,3,0
2,Egypt,Israel,1967,2,1
3,Egypt,Israel,1969,2,1
4,Egypt,Israel,1970,2,1
...,...,...,...,...,...
2563,Myanmar (Burma),LNUP,1979,3,0
2564,Myanmar (Burma),LNUP,1980,3,0
2565,Myanmar (Burma),LNUP,1981,3,0
2566,Myanmar (Burma),LNUP,1982,3,0


In [50]:
a_countries = set(armed_conflict_df['Country A'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = a_countries - countries

print(countries_not_in_set)

{'Burundi', 'Australia, United Kingdom, United States of America', 'Serbia (Yugoslavia)', 'South Sudan', 'Guinea-Bissau', 'Papua New Guinea', 'Russia (Soviet Union)', 'Bosnia-Herzegovina', 'Eritrea', 'Central African Republic', 'Haiti', 'DR Congo (Zaire)', 'Djibouti', 'Oman', 'Cambodia (Kampuchea)', 'Sudan', 'Zimbabwe (Rhodesia)', 'Hyderabad', 'North Korea', 'Yemen (North Yemen)', 'Suriname', 'United States of America', 'Myanmar (Burma)', 'Syria', 'North Macedonia', 'Angola', 'Egypt, Iraq, Jordan, Lebanon, Syria', 'Somalia', 'Trinidad and Tobago', 'Cuba', 'South Vietnam', 'South Yemen', 'Grenada'}


In [60]:
# original : replace to 
replaces = {'Zimbabwe (Rhodesia)': "Zimbabwe",
            'Myanmar (Burma)' : "Myanmar",
            'Serbia (Yugoslavia)' : "Serbia",
            'South Yemen' : "Yemen", 
            'Cambodia (Kampuchea)' : "Cambodia",
            "Yemen (North Yemen)":"Yemen",
            'Bosnia-Herzegovina' : 'Bosnia and Herzegovina',
            'South Vietnam' : "Vietnam",
            "United States of America":"United States",
            'Russia (Soviet Union)' : "Russia",

            }

removes = ['Grenada', 'Cuba', 'North Korea', 'South Sudan', 'Sudan', 'Syria', 'Hyderabad', 'Australia, United Kingdom, United States of America', 'Eritrea', 'DR Congo (Zaire)', 'Guinea-Bissau', 'Egypt, Iraq, Jordan, Lebanon, Syria', 'Djibouti', 'Burundi', 'Angola','Papua New Guinea', 'Oman', 'Haiti', 'Somalia', 'Central African Republic','North Macedonia','Suriname', 'Trinidad and Tobago'
           ]

In [61]:
# Replace values in the 'Entity' column using the replaces dictionary
armed_conflict_df['Country A'] = armed_conflict_df['Country A'].replace(replaces)

# Remove rows corresponding to the values in the removes list
armed_conflict_df = armed_conflict_df[~armed_conflict_df['Country A'].isin(removes)]

In [62]:
a_countries = set(armed_conflict_df['Country A'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = a_countries - countries

print(countries_not_in_set)

set()


In [63]:
test = countries - pr_countries
test

set()

In [64]:
b_countries = set(armed_conflict_df['Country B'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = b_countries - countries

print(countries_not_in_set)

{'Burundi', 'FSLN', 'Myanmar ', 'Eritrea', 'Djibouti', 'UFRA', 'Sudan', 'Manipur', 'CPP', 'FLN , MNA', 'Khmer Issarak', 'Syria', 'Angola', 'Military faction (forces of Abd as-Salam Arif), NCRC', 'Somalia', 'Solomon Islands', 'Palestine', 'Cuba', 'Lashkar-e-Islam, TTP, TTP - TA', 'Macedonia'}


In [68]:
# original : replace to 
replaces = {'al-Murabitun, AQIM' : "Mali",
            'ELN, MIR' : "Colombia",
            'First Liberation Army, Second Liberation Army' : 'Myanmar',
            'OPON Forces' : "Nigeria",
            'ABSDF' : "Myanmar",
            'UTO':"Japan",
            'AIAI':"Philippines",
            'Istiqlal' : 'Morocco',
            'LNUP': 'Liberia',
            'MRTA, Sendero Luminoso': 'Peru',
            'PDPA': 'Afghanistan',
            'ZANU, ZAPU': 'Zimbabwe',
            'BLA': 'Pakistan' ,
            'EGP, FAR II, ORPA': 'Guatemala',
            'JVP': 'Sri Lanka',
            'ASG, BIFM, MNLF - NM': 'Philippines',
            'Military faction (forces of Suret Husseinov)': 'Azerbaijan',
            'LAA, NSF': 'Lebanon',
            'FARC, M-19': 'Colombia',
            'FARC dissidents': 'Colombia',
            'Lashkar-e-Islam, TTP': 'Pakistan',
            "Harakat-i Inqilab-i Islami-yi Afghanistan , Hizb-i Islami-yi Afghanistan, Hizb-i Islami-yi Afghanistan - Khalis faction, Jabha-yi Nijat-i Milli-yi Afghanistan , Jam'iyyat-i Islami-yi Afghanistan, Mahaz-i Milli-yi Islami-yi Afghanistan": 'Afghanistan',
            'NDPVF': 'Nigeria',
            'FLM': 'Mauritania',
            'MNLF': 'Philippines',
            'SSIA': 'Sri Lanka',
            'RCSS, SSPP': 'Myanmar',
            'FAR I': 'Chad',
            'ELN, FARC, M-19': 'Colombia',
            'ULA': 'Peru',
            'ASG, BIFM, MILF': 'Philippines',
            'ELN': 'Colombia',
            'Military faction (forces of Samuel Doe)': 'Liberia',
            'LNPA, LTS(p)A': 'Liberia',
            'Cobras, Cocoyes': 'Ivory Coast',
            'CPB, CPB-RF': 'Philippines',
            'AIS, GIA': 'Algeria',
            'Presidential guard': "Bolivia",
            'Fatah': 'Palestinian Territories',
            'MEK': 'Iran',
            'ABSU': 'Nigeria',
            'ATTF': 'Chad',
            'Résistance Armée Tunisienne': 'Tunisia',
            'EPRDF': 'Ethiopia',
            'IZL [Etzel]': 'Israel',
            'PBCP-J': 'Philippines',
            'MPA/Republic of Anjouan': 'Comoros',
            'KDP': 'Iraq',
            'Serbian irregulars, Serbian Republic of Bosnia-Herzegovina': 'Bosnia and Herzegovina',
            'ETIM': 'China',
            'KNPP': 'Myanmar',
            'MCC, PWG': 'India',
            'Forces of Michel Aoun': 'Lebanon',
            'NRF, Taleban': 'Afghanistan',
            'UPA': 'Ukraine',
            'ADF, LRA, WNBF': 'Congo',
            'CPM': 'India',
            'EPRLF, LTTE, TELO': 'Sri Lanka',
            'NC': 'Nepal',
            'Kashmir insurgents': 'India',
            'ATTF, NLFT': 'India',
            'NDFB - RD': 'India',
            'LRM': 'Myanmar',
            'Cocoyes, Ntsiloulous': 'Congo',
            'ELN, EPL, FARC, M-19': 'Colombia',
            'SSRA, SURA': 'Sudan',
            'CPB, CPB-RF, PVO - "White Band" faction': 'Philippines',
            'KCP, PREPAK': 'India',
            'PLA, UNLF': 'India',
            "Jama'atu Ahlis Sunna Lidda'awati wal-Jihad": 'Nigeria',
            'NLFT': 'India',
            'IPOB': 'Nigeria',
            'CMA': 'Cambodia',
            'MTD': 'Chad',
            'CPT': 'Philippines',
            'Republic of South Ossetia': 'Georgia',
            'Military faction (forces of Eduardo A. Lonardi Doucet), Military faction (forces of Samuel Toranzo Calderón)': 'Uruguay',
            'NRA, UNRF': 'Uganda',
            'JIG': 'Nigeria',
            'GUNT': 'Chad',
            'ELN, FARC': 'Colombia',
            'FAN': 'Argentina',
            'ETA': 'Spain',
            'Tibet': 'China',
            'NLFT-B': 'India' ,
            'United Armed Forces of Novorossiya': 'Ukraine',
            'Hizb-i Islami-yi Afghanistan': 'Afghanistan',
            'BLA, UBA': 'Pakistan',
            'Military faction (forces of Alfredo Stroessner)': 'Paraguay',
            'ONLF': 'Ethiopia',
            'Independent Nasserite Movement': 'Yemen',
            'Kikosi Maalum': 'Tanzania',
            'Parliamentary Forces': 'Sierra Leone',
            'NNC': 'Nigeria',
            'KNU, KNUP': 'Myanmar',
            'KDP, PUK': 'Iraq' ,
            'PSLF': 'Philippines',
            'CNR, CSNPD, FNT, MDD': 'Djibouti',
            "Indonesian People's Army": 'Indonesia',
            'SSPP': 'Myanmar',
            'EDU, EPRP': 'Ethiopia',
            'SSA': 'Myanmar',
            'Fronasa, Kikosi Maalum, UNLF': 'Uganda',
            'APF': 'Nepal',
            'ASG, MILF, MNLF - HM': 'Philippines',
            'RCSS': 'Myanmar',
            'PFT, UTO': 'Kyrgyzstan',
            'Taiwan': 'China' ,
            'MFL-MUF': 'Sri Lanka',
            'CPI-ML-J, MCC, PWG': 'India',
            'RPF': 'Rwanda',
            'NUG': 'Myanmar',
            'PAIGC': 'Guinea',
            'DSE': 'Myanmar ',
            'FACT': 'Chad',
            'FUNCINPEC, KPNLF, KR': 'Cambodia',
            'UFDD': 'Chad',
            'ALiR': 'Albania',
            'EPDM, TPLF': 'Ethiopia',
            'Mujahid Party': 'Bangladesh',
            'EPRLF, LTTE': 'Sri Lanka' ,
            'NLA': 'Macedonia',
            'Military faction (forces of Benjamin Mejia)': 'Honduras',
            "Military faction (forces loyal to Léon M'Ba)": 'Gabon',
            "Hizb-i Islami-yi Afghanistan, Hizb-i Wahdat, Jam'iyyat-i Islami-yi Afghanistan": 'Afghanistan',
            'AFRC, RUF': 'Sierra Leone',
            'FUNA': 'Angola',
            'MNR': 'Bolivia',
            'First Liberation Army': 'Nepal',
            "Hizb-i Islami-yi Afghanistan, Hizb-i Islami-yi Afghanistan - Khalis faction, Hizb-i Wahdat, Jam'iyyat-i Islami-yi Afghanistan": 'Afghanistan',
            'Ambazonia insurgents': 'Cameroon',
            'Patani insurgents': 'Thailand' ,
            'EPRDF, Forces of Harar garrison': 'Ethiopia',
            'UWSA': 'Myanmar',
            'UNLFW': 'Myanmar',
            'al-Mahdi Army, Ansar al-Islam, IS, RJF': 'Iraq',
            'MPA': 'Philippines',
            'Hezbollah': 'Lebanon',
            'AQAP, Forces of Hadi': 'Yemen',
            'National Guard and Mkhedrioni': 'Georgia',
            'FLN': 'Algeria',
            'FLOSY': 'Yemen',
            'Republic of Croatia': 'Croatia',
            "Lord's Army, LRA, UPA": 'Uganda',
            'FLAA': 'Mexico',
            'FUCD': 'Colombia',
            'CPI': 'India',
            'NDFB-S': 'India',
            'SURA': 'Russia',
            'Fatah, PNA': 'Palestinian Territories',
            'FDLR': 'Congo',
            'FARF': 'Colombia',
            'ANC': 'South Africa',
            'al-Mahdi Army, IS': 'Iraq',
            "God's Army, KNU": 'Myanmar',
            'Hizb-i Islami-yi Afghanistan, Hizb-i Wahdat, Junbish-i Milli-yi Islami': 'Afghanistan',
            'ASG, MILF, MNLF - NM': 'Philippines',
            'MDRM': 'Macedonia',
            'ERP': 'Guatemala',
            'URNG': 'Guatemala',
            'Ansar al-Islam, IS, RJF': 'Iraq',
            'Renamo': 'Mozambique',
            'RFDG': 'Guinea',
            'Ansar Bayt al-Maqdis': 'Egypt',
            'Ansar Dine, Military faction (Red Berets)': 'Mali',
            'al-Mahdi Army, Ansar al-Islam, IS': 'Iraq',
            "Harakat-i Inqilab-i Islami-yi Afghanistan , Hizb-i Islami-yi Afghanistan, Hizb-i Islami-yi Afghanistan - Khalis faction, Ittihad-i Islami Bara-yi Azadi-yi Afghanistan , Jabha-yi Nijat-i Milli-yi Afghanistan , Jam'iyyat-i Islami-yi Afghanistan, Mahaz-i Milli-yi Islami-yi Afghanistan": 'Afghanistan',
            'PBCP, PBCP-J': 'Bangladesh',
            'FNL': 'Burundi',
            'Military faction (forces of Augusto Pinochet, Toribio Merino and Leigh Guzman)': 'Chile',
            'MDJT': 'Chad',
            'MNJ': 'Peru',
            'DPR, LPR, United Armed Forces of Novorossiya': 'Ukraine',
            'Jondullah': 'Iran',
            'CPN-M': 'Nepal',
            'FAP': 'Peru',
            'NRA, UFM, UNRF': 'Uganda',
            'ASG, MILF': 'Philippines',
            'North Kalimantan Liberation Army': 'Indonesia',
            'Forces of the House of Representatives': 'Libya',
            'AMB, Fatah, Hamas, PIJ, PNA': 'Palestinian Territories',
            'TKP-ML': 'Turkey',
            'Yemen (North Yemen)': 'Yemen',
            'Forces of Khalifa al-Ghawil, Forces of the House of Representatives, PFLL': 'Libya',
            'LURD': 'Liberia',
            'Military faction (forces of Charles Arube)': 'Solomon Islands',
            'Sikh insurgents': 'India',
            'Taleban': 'Afghanistan',
            'PMR': 'Moldova' ,
            'Forces of Khudoberdiyev': 'Tajikistan',
            'HSM, NRA, UPDA': 'Somalia',
            'FAN, FAT': 'Angola',
            'Forces of Norodom Ranariddh, KR': 'Cambodia',
            'Military faction (Guatemala)': 'Guatemala',
            'Military faction (forces of Mohamed Madbouh)': 'Egypt',
            'Amal, NSF': 'Lebanon',
            'OLA': 'Peru',
            'Islamic Legion, MOSANAT, Revolutionary Forces of 1 April': 'Libya',
            'Military faction (forces of Ekow Dennis and Edward Adjei-Ampofo)': 'Ghana',
            'Military faction (forces of Abdul Wahab al-Shawaf)': 'Yemen',
            'BMA': 'Thailand',
            'SSA, SSNLO, SURA': 'Sudan',
            'TRC': 'Chad',
            'AIAI, ONLF': 'Ethiopia',
            'Military faction (constitutionalists)': 'Congo',
            'NLC': 'Nigeria',
            'Ansar Dine, AQIM': 'Mali',
            'PUK': 'Iraq',
            'TTP': 'Pakistan',
            'UCK': 'Kosovo',
            'APCO': 'Philippines',
            'KPNLF, KR': 'Cambodia',
            'OLF': 'Ethiopia',
            'Bandera Roja': 'Venezuela',
            'OAS': 'Algeria',
            'LRA': 'Uganda',
            'MKP': 'Greece',
            'PF, ZANU': 'Zimbabwe',
            'Frolinat': 'Chad',
            'INPFL, NPFL': 'Liberia',
            'Zviadists': 'Georgia',
            'Monima': 'Comoros',
            'AQIM, JAK-T': 'Algeria',
            'ASG, MNLF': 'Philippines',
            'Mukti Bahini': 'Bangladesh',
            'AIS': 'Algeria',
            'Hizb-i Islami-yi Afghanistan, Hizb-i Wahdat, Junbish-i Milli-yi Islami, Taleban': 'Afghanistan',
            'Military faction (forces of Moisés Giroldi)': 'Panama',
            'Non PLO groups, PLO': 'Palestinian Territories',
            'Military faction (forces of Ibrahim Saleh)': 'Yemen',
            'RIRA': 'United Kingdom',
            'NSCN-K': 'Myanmar' ,
            'Vietnam (North Vietnam)': 'Vietnam',
            'LPR': 'Ukraine',
            'FDR': 'Congo',
            'Jondullah, PJAK': 'Iran',
            'KNUP': 'Myanmar',
            'Fatah, Hamas, PIJ': 'Palestinian Territories',
            'DPR, LPR': 'Ukraine',
            "Hizb-i Islami-yi Afghanistan, Hizb-i Islami-yi Afghanistan - Khalis faction, Hizb-i Wahdat, Jam'iyyat-i Islami-yi Afghanistan, Military faction (forces of Shahnawaz Tanay)": 'Afghanistan',
            'ASG, BIFM': 'Philippines',
            'JSM': 'Algeria',
            'MQM': 'Pakistan',
            'Myanmar (Burma)': 'Myanmar',
            'PLA': 'India',
            'ATNMC': 'India',
            'CNRD, FDLR, FDLR-RUD': 'Congo',
            'KNUFNS, KPNLF, KR': 'Cambodia',
            'RUF, WSB': 'Sierra Leone',
            'DKBA 5': 'Myanmar' ,
            'PFLP, PFLP-GC': 'Palestine',
            'ALF': 'Sudan',
            'AFRC, Kamajors, RUF': 'Sierra Leone',
            'Military faction (forces of Hezekiah Ochuka)': 'Kenya',
            'Military faction (forces of Mengistu Neway)': 'Ethiopia',
            'AQIM': 'Algeria',
            'ELN, EPL, FARC': 'Colombia',
            'RUF': 'Sierra Leone',
            "Takfir wa'l Hijra": 'Egypt',
            'Ninjas': 'Congo',
            'PLO, Rejectionist Front': 'Palestine',
            'Republic of Slovenia': 'Slovenia',
            'ELN, FARC dissidents': 'Colombia',
            'Republic of South Moluccas': 'Indonesia',
            'FPR': 'Rwanda',
            'Lebanese Forces - Hobeika faction, NUF': 'Lebanon',
            'Popular Revolutionary Movement': 'Peru',
            'LTTE, TELO': 'Sri Lanka',
            'JSS/SB': 'Uganda',
            "Jam'iyyat-i Islami-yi Afghanistan": 'Afghanistan',
            'MPF': 'Philippines',
            'Forces of Carlos Castillo Armas': 'Guatemala',
            'MTA': 'Turkey',
            'Republic of Azerbaijan': 'Azerbaijan',
            "Harakat-i Inqilab-i Islami-yi Afghanistan , Harakat-i Islami-yi Afghanistan, Hizb-i Islami-yi Afghanistan, Hizb-i Islami-yi Afghanistan - Khalis faction, Ittihad-i Islami Bara-yi Azadi-yi Afghanistan , Jabha-yi Nijat-i Milli-yi Afghanistan , Jam'iyyat-i Islami-yi Afghanistan, Mahaz-i Milli-yi Islami-yi Afghanistan": 'Afghanistan',
            'AQIM, JNIM': 'Algeria',
            'CPP, Military faction (forces of Honasan, Abenina & Zumel)': 'Philippines',
            'Democratic Republic of Yemen': 'Yemen',
            'Military faction (forces of Nicolae Ceausescu), NSF': 'Romania',
            'KDPI': 'Iran',
            'Ansar al-Islam, IS': 'Iraq',
            'CCO': 'Colombia',
            'ULFA': 'India',
            'CDR': 'Cuba',
            'KNUFNS': 'Myanmar',
            'PNDF': 'Congo',
            'EDU, TPLF': 'Ethiopia',
            'MNF': 'Iraq',
            'NSH': 'Sudan',
            'IMU, Jamaat-ul-Ahrar, Lashkar-e-Islam, TTP': 'Afghanistan',
            'ZANU': 'Zimbabwe',
            'CNR, CSNPD, FNT': 'Chad',
            'Military faction (forces of Jerry John Rawlings)': 'Ghana',
            'NPFL': 'Liberia',
            'HSM, UPA, UPDA': 'Somalia',
            'NRC': 'Nigeria',
            'Forces of Mullo Abdullo': 'Tajikistan',
            'Croatian irregulars, Republic of Croatia': 'Croatia',
            'LURD, MODEL': 'Liberia',
            'ASG, BIFM-K': 'Philippines',
            'AQIM, MUJAO': 'Algeria',
            'KIO': 'Myanmar' ,
            'UIFSA': 'Uganda',
            'PIJ': 'Palestine',
            'FARC': 'Colombia',
            'MILF': 'Philippines',
            'FMLN': 'El Salvador',
            'Contras': 'Nicaragua',
            'Ahlul Sunnah Jamaa': 'Nigeria',
            'FAR I, FAR II': 'Rwanda',
            'Serbian irregulars, Serbian Republic of Krajina': 'Croatia',
            'Sultanate of Sulu': 'Philippines',
            'CCMSR': 'Chad',
            'Autonomous Province of Western Bosnia': 'Bosnia and Herzegovina',
            'IMU': 'Uzbekistan',
            'CPA, RPF': 'Rwanda',
            'ASL': 'Syria',
            'Hizb-i Islami-yi Afghanistan, Taleban': 'Afghanistan',
            'UNLF': 'India',
            'FUNA, NRA, UNRF': 'Uganda',
            'KCP, PREPAK, UNLF': 'India',
            'EGP, FAR I, FAR II, ORPA': 'Guatemala',
            'SLM': 'Sudan',
            'DPR': 'Ukraine',
            'BRAS': 'Brazil',
            'AQAP, Forces of Hadi, GPC': 'Yemen',
            'EGP, FAR II': 'Guatemala',
            'Syria': 'Syria',
            'PNA': 'Palestine',
            'SCIRI': 'Iraq',
            'Lao Issara': 'Laos',
            'FAN, FAP': 'Paraguay',
            'Opposition coalition (Febreristas, Liberals and Communists)': 'Paraguay',
            'Military faction (forces of Patrick Nzeogwu)': 'Nigeria',
            'Forces of Muammar Gaddafi, NTC': 'Libya',
            'Ansar Dine, AQIM, MUJAO, Signed-in-Blood Battalion': 'Mali',
            "Harakit Sawa'id Misr": 'Egypt',
            'Palestinian insurgents': 'Palestine',
            'MNDAA': 'Myanmar' ,
            'GIA': 'Algeria',
            'TAK, Yurtta Sulh Konseyi': 'Turkey',
            "Harakit Sawa'id Misr, Jama'at Ansar al-Islam": 'Egypt',
            'Military faction (Lesotho)': 'Lesotho',
            'DKBA 5, KNU': 'Myanmar' ,
            'Republic of Artsakh': 'Azerbaijan',
            'Ntsiloulous': 'Congo',
            'FRCI': 'Ivory Coast',
            'MPLA, UNITA': 'Angola',
            'MFDC': 'Senegal',
            'Military faction (colorados)': 'Paraguay',
            'United Kingdom, United States of America': 'United Kingdom',
            'al-Harakat al-Islamiyah, ASG, BIFM, Maute group': 'Philippines',
            'Ansarallah, AQAP': 'Yemen',
            'Croatian irregulars, Croatian Republic of Bosnia-Herzegovina': 'Croatia',
            'PWG': 'India',
            'KNU': 'Myanmar',
            'ASG': 'Philippines',
            'al-Qaida': 'Afghanistan',
            'FDSI-CI, FRCI': 'Ivory Coast',
            'LTTE': 'Sri Lanka',
            'AQIM, GIA': 'Algeria',
            'ALP, RPF': 'Afghanistan',
            'Ogaden Liberation Front': 'Ethiopia',
            'DHKP-C': 'Turkey',
            'Mau Mau': 'Kenya',
            'CPI-ML': 'India',
            'Fatah, Hamas, PFLP, PIJ, PNA': 'Palestine',
            'OPM': 'Indonesia',
            'ARDUF': 'Eritrea',
            'Viet minh': 'Vietnam',
            'Puerto Rican Nationalist Party': 'United States',
            'IGLF': 'Libya',
            'NSF': 'Nepal',
            'BLA, BRA': 'Pakistan',
            'Al-Shabaab': 'Somalia',
            'Darul Islam, Permesta Movement, PRRI': 'Indonesia',
            'Maidan': 'Ukraine',
            'National Liberation Army': 'Colombia',
            'PKK': 'Turkey',
            "Hizb-i Islami-yi Afghanistan, Hizb-i Islami-yi Afghanistan - Khalis faction, Hizb-i Wahdat, Jam'iyyat-i Islami-yi Afghanistan, Mahaz-i Milli-yi Islami-yi Afghanistan": 'Afghanistan',
            'Frelimo': 'Mozambique',
            'AN': 'Angola',
            'CPB': 'Colombia',
            'LRA, UPA': 'Uganda',
            'ZAPU': 'Zimbabwe',
            'PULF': 'Manipur',
            "Jam'iyyat-i Islami-yi Afghanistan, Taleban, UIFSA": 'Afghanistan',
            'EZLN': 'Mexico',
            'CPB, PVO - "White Band" faction': 'Philippines',
            'ARSA': 'Myanmar',
            'MLN/Tupamaros': 'Uruguay',
            'EPRDF, Military faction (forces of Amsha Desta and Merid Negusie)': 'Ethiopia',
            'Forces of Khudoberdiyev, UTO': 'Tajikistan',
            'Forces of Mullo Abdullo, IMU': 'Afghanistan',
            'ERP, FPL': 'El Salvador',
            'Chechen Republic of Ichkeria': 'Russia',
            'NRA': 'Nepal',
            'YSP - AFI': 'Yemen' ,
            'Republic of Armenia': 'Armenia',
            'Ansarallah, AQAP, Forces of Hadi': 'Yemen',
            'Republic of Abkhazia': 'Georgia',
            'NDFB': 'India',
            'ADF': 'Congo',
            'SWAPO': 'Namibia',
            'AQAP': 'Yemen',
            'KDP-QM, PUK': 'Iraq',
            'ERP, Montoneros': 'Argentina',
            'ADF, LRA':  'Uganda',
            'Republic of Biafra': 'Nigeria',
            'Forces of the House of Representatives, Zintan Military Council': 'Libya',
            'JNIM': 'Mali',
            'SNUF': 'Somalia',
            'TPLF': 'Ethiopia',
            "al-Gama'a al-Islamiyya": 'Egypt',
            'GAM': 'Indonesia' ,
            'NUF': 'Nigeria' ,
            'BLA, BLF, BRA': 'Pakistan',
            'Hamas': 'Palestinian Territories' ,
            'SALF': 'Sudan',
            'Russia (Soviet Union)': 'Russia',
            'GNLA': 'India',
            'FNLA, MPLA, UNITA': 'Angola',
            'NDF': 'Philippines',
            'BDPS': 'Myanmar' ,
            'FIAA': 'Chad',
            'Military faction (free Officers Movement)': 'Egypt' ,
            'ADF, LRA, UNRF II':  'Congo',
            'FNLA': 'Angola',
            'High Council of Afghanistan Islamic Emirate, Taleban': 'Afghanistan',
            'Neutralists, Pathet Lao': 'Laos',
            'Huk': 'Philippines',
            'Baloch Ittehad, BLA': 'Pakistan',
            'FLN, MNA': 'Algeria',
            'United States of America': 'United States',
            'Taiwanese insurgents': 'China',
            'CPI-Maoist': 'India',
            'Forces of Norodom Ranariddh, KR, Military faction (Forces of Hun Sen)': 'Cambodia',
            'EPR': 'El Salvador',
            'EPRP, TPLF': 'Ethiopia',
            'Darul Islam': 'Indonesia',
            'AMB, Hamas, PIJ': 'Palestinian Territories',
            'PJAK': 'Iran' ,
            'Islamic Legion, MPS': 'Libya',
            'Fatah, Hamas, PIJ, PRC': 'Palestinian Territories',
            'MPCI, MPIGO': 'Ivory Coast' ,
            'Military faction (forces of Idi Amin)': 'Uganda',
            'IS': 'Iraq',
            'Pathet Lao': 'Laos',
            'Military faction (forces of Asaminew Tsige)': 'Ethiopia',
            'FUCD, RAFD, UFDD': 'Chad',
            'APLP, Mujahid Party': 'Sri Lanka',
            'Military faction (forces of Hugo Chávez)': 'Venezuela',
            'MDD, Military faction (forces of Maldoum Bada Abbas)': 'Chad',
            'ELN, EPL - Megateo, FARC': 'Colombia',
            'MILF, MNLF': 'Philippines',
            'Forces of Michel Aoun, Lebanese Forces': 'Lebanon',
            'FNLA, MPLA': 'Angola',
            'EOKA': 'Cyprus' ,
            'Jaish al-Adl': 'Iran',
            'Wahhabi movement of the Buinaksk district': 'Russia',
            'Forces of Hadi': 'Yemen',
            'PIRA': 'Ireland',
            'NMSP': 'Myanmar',
            'NSCN-IM': 'India',
            'Hamas, PIJ': 'Palestinian Territories',
            'EPDM, EPRP, TPLF': 'Ethiopia',
            'MJP, MPIGO': 'Ivory Coast' ,
            'Ansar al-Sunnah': 'Iraq',
            'Somalia': 'Somalia',
            'FPLA': 'Angola',
            'RSO': 'Russia' ,
            'Serbian Republic of Krajina': 'Croatia',
            'MIM': 'India' ,
            'Forces of the Caucasus Emirate': 'Russia' ,
            'Military faction (forces of Andres Rodriguez)': 'Paraguay',
            'ANLP, CPA': 'Sudan',
            'ELF, EPLF': 'Eritrea',
            'BLF': 'Pakistan',
            'Croatian Republic of Bosnia-Herzegovina': 'Bosnia and Herzegovina' ,
            'Sendero Luminoso': 'Peru',
            'KR': 'Cambodia',
            'FPRN': 'Nicaragua',


            
            }

removes = ['EPLF','France, Israel, United Kingdom','PF', 'CRA','UPC','Forest Brothers','ELF','POLISARIO','Cocoyes, Ninjas, Ntsiloulous', 'TNV', 'UFR','FARF, MDD','Opposition coalition','Military faction (navy)','WSLF','CNRT','Royalists','Popular Front','Burundi', 'FSLN', 'Myanmar ', 'Eritrea', 'Djibouti', 'UFRA', 'Sudan', 'Manipur', 'CPP', 'FLN , MNA', 'Khmer Issarak', 'Syria', 'Angola', 'Military faction (forces of Abd as-Salam Arif), NCRC', 'Somalia', 'Solomon Islands', 'Palestine', 'Cuba', 'Lashkar-e-Islam, TTP, TTP - TA', 'Macedonia'
           ]

In [69]:
# Replace values in the 'Entity' column using the replaces dictionary
armed_conflict_df['Country B'] = armed_conflict_df['Country B'].replace(replaces)

# Remove rows corresponding to the values in the removes list
armed_conflict_df = armed_conflict_df[~armed_conflict_df['Country B'].isin(removes)]

In [70]:
b_countries = set(armed_conflict_df['Country B'])

# Find the countries in auth_df that are not in the countries set
countries_not_in_set = b_countries - countries

print(countries_not_in_set)

set()


In [73]:
armed_conflict_df

Unnamed: 0,Country A,Country B,Year,Conflict Type,Cumulative Intensity
0,India,India,2012,3,0
1,India,India,2014,3,0
2,Egypt,Israel,1967,2,1
3,Egypt,Israel,1969,2,1
4,Egypt,Israel,1970,2,1
...,...,...,...,...,...
2563,Myanmar,Liberia,1979,3,0
2564,Myanmar,Liberia,1980,3,0
2565,Myanmar,Liberia,1981,3,0
2566,Myanmar,Liberia,1982,3,0


In [74]:
armed_conflict_df.to_csv('Datasets(Cleaned)/Instruments/conflicts.csv')