# Data Preprocessing

## Setup

In [1]:
# Importing packages
import numpy as np
import pandas as pd
import os
import seaborn as sns
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib as plt
import seaborn as sns
import statsmodels.api as sm
from plotnine import ggplot, aes, geom_point
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder


In [2]:
# Reading in data

## Reading in DECO data
deco = pd.read_excel("./DECO_v.1.0.xlsx")

## Reading in world indicators data
ind = pd.read_excel("./World Bank Development Indicators.xlsx")

## Reading in country abbreviations
abbr = pd.read_excel("./Country Abbreviations.xlsx")

## Display all columns
pd.set_option('display.max_columns', None)

## Preprocessing

### Creating dummy variables and one-hot encoding

#### Creating dummy variables for strictly categorical variables

In [3]:
# creating categorical dummies

## country
country_dummies = pd.get_dummies(deco["country"]) 

## region
region_dummies = pd.get_dummies(deco["region"]) 

## electoral violence side a
electoral_side_a_dummies = pd.get_dummies(deco["electoral_side_a"]) 

## electoral violence side b
electoral_side_b_dummies = pd.get_dummies(deco["electoral_side_b"]) 

## electoral uncertainty dummy (numeric: 0=low, 1=some, 2=high)
electoral_uncertainty_dummies = pd.get_dummies(deco["electoral_vio_uncertainty"]).rename(columns = {0: 'electoral_vio_uncertainty_0', 1: 'electoral_vio_uncertainty_1', 2:'electoral_vio_uncertainty_2'}, inplace = False) 

## type of violence (numeric: 1=state-based conflict, 2=non-state conflict, 3=one-sided violence, 4=violence in civilian protestsand miscellaneous incident)
type_violence_dummies = pd.get_dummies(deco["type_of_violence"]).rename(columns = {1: 'type_of_violence_1', 2: 'type_of_violence_2', 3:'type_of_violence_3', 4:'type_of_violence_4'}, inplace = False)  

## violent perpetrators (numeric: 1=side a, 2=side b, 3=symmetric, 4=unclear)
electoral_perpetrator_dummies = pd.get_dummies(deco["electoral_perpetrator"]).rename(columns = {1: 'electoral_perpetrator_1', 2: 'electoral_perpetrator_2', 3:'electoral_perpetrator_3', 4:'electoral_perpetrator_4'}, inplace = False)  

## electoral side a 
electoral_side_a_dummies = pd.get_dummies(deco["electoral_side_a"]).rename(columns = {"Public Officials": "side_a_PublicOfficials", "Security forces": "side_a_SecurityForces", "Political supporters": "side_a_PoliticalSupporters", "Unclear": "side_a_Unclear", "Rebel group": "side_a_RebelGroup", "Politicians": "side_a_Politicians", "Milita": "side_a_Militia", "Other": "side_a_Other", "Civilians": "side_a_Civilians", "External actors": "side_a_ExternActors"})

## electoral side b dummies
electoral_side_b_dummies = pd.get_dummies(deco["electoral_side_b"]).rename(columns = {"Political supporters": "side_b_PoliticalSupporters", "Rebel group": "side_b_RebelGroup", "Unclear": "side_b_Unclear", "Civilians": "side_b_Civilians", "Other": "side_b_Other", "Milita": "side_b_Militia", "Politicians": "side_b_Politicians", "Security forces": "side_b_SecurityForces", "Public officials": "side_b_PublicOfficials"})


# concatenating dummy variables to new data frame (deco_clean)
deco_clean = pd.concat([deco, 
                        country_dummies, 
                        region_dummies,
                        electoral_uncertainty_dummies,
                        type_violence_dummies,
                        electoral_perpetrator_dummies,
                        electoral_side_a_dummies,
                        electoral_side_b_dummies
                       ], axis=1)

#### Creating dummy vairables for categories where values can take on more than 1 attribute at a time (e.g. item1;item2...)

In [4]:
# Defining function to parse and one-hot encode categorical variables with >1 attribute

def dummy_multiple_attr(df, column, delimter):
    '''splitting data by delimeter'''
    split_data = df[column].str.split(delimter).explode()
    '''creating list of unique values for input column'''
    data_list = list(split_data[~split_data.str.contains(delimter)].unique())
    '''cleaning and stripping each item in list'''
    data_list = [i.lstrip() for i in data_list]
    '''removing excess punction'''
    #data_list.remove(".")
    '''for loop: interating through each unique value and one-hot encoding column for each item'''
    for i in data_list:
        df[f'{column}_{i}'] = np.where(df[column].str.lower().str.contains(str(i)), 1, 0)


In [5]:
# electoral purposes
dummy_multiple_attr(deco_clean, 'electoral_purpose', ";")

In [6]:
# electoral targets
dummy_multiple_attr(deco_clean, 'electoral_targets', ";")

In [7]:
# electoral type
dummy_multiple_attr(deco_clean, 'electoral_type', ";")

In [8]:
# electoral timing
dummy_multiple_attr(deco_clean, 'electoral_timing', ";")

In [9]:
# Consolidating electoral purpose columns


deco_clean['electoral_purpose_secure elections'] = np.where(((deco_clean['electoral_purpose_secure election']==1)|
                                                            (deco_clean['electoral_purpose_Secure elections']==1)|
                                                           (deco_clean['electoral_purpose_secure elections']==1)), 1 ,0)

deco_clean['electoral_purpose_crack down on protest and/or riot'] = np.where(((deco_clean['electoral_purpose_crack down on prostest and/or riot']==1)|
                                                                            (deco_clean['electoral_purpose_crack down on protest and/or riot']==1) |
                                                                             (deco_clean['electoral_purpose_crack down on protest and/or riot/retaliate violent events']==1)|
                                                                             (deco_clean['electoral_purpose_crack down on riot and/or protest']==1)|
                                                                             (deco_clean['electoral_purpose_crack down on riots and/or protests']==1)|
                                                                             (deco_clean['electoral_purpose_crackdown on protest and/or riot']==1)), 1, 0)

deco_clean['electoral_purpose_divergent affiliations'] = np.where(((deco_clean['electoral_purpose_divergent affiliations']==1)|
                                                                  deco_clean['electoral_purpose_diverging affiliations']==1), 1, 0)

deco_clean['electoral_purpose_unclear'] = np.where(((deco_clean['electoral_purpose_unclear']==1)|
                                                  (deco_clean['electoral_purpose_Unclear']==1)), 1, 0)

deco_clean.drop(columns=['electoral_purpose_secure election','electoral_purpose_Secure elections','electoral_purpose_crack down on prostest and/or riot','electoral_purpose_crack down on protest and/or riot/retaliate violent events','electoral_purpose_crack down on riot and/or protest','electoral_purpose_crack down on riots and/or protests','electoral_purpose_crackdown on protest and/or riot','electoral_purpose_diverging affiliations',])
# drop 'electoral_purpose_secure election', 'electoral_purpose_Secure elections', 

Unnamed: 0,id,type_of_violence,conflict_new_id,conflict_name,dyad_new_id,side_a_new_id,side_a,side_b_new_id,side_b,country_id,country,region,source_article,year,date_start,date_end,deaths_a,deaths_b,civilian_deaths,unknown,best,high,low,latitude,longitude,electoral_vio,electoral_vio_uncertainty,electoral_vio_source,electoral_purpose,electoral_side_a,electoral_side_a_2,electoral_side_a_inc,electoral_side_b,electoral_side_b_2,electoral_side_b_inc,electoral_perpetrator,electoral_targets,electoral_type,electoral_timing,relid,ucdp_ged,Afghanistan,Albania,Algeria,Angola,Bangladesh,Benin,Bolivia,Brazil,Burundi,Cambodia (Kampuchea),Central African Republic,Colombia,Congo,DR Congo (Zaire),Djibouti,Dominican Republic,Ecuador,Egypt,El Salvador,Ethiopia,Fiji,Gambia,Georgia,Ghana,Guatemala,Guinea,Guinea-Bissau,Haiti,Honduras,India,Indonesia,Iran,Iraq,Israel,Italy,Ivory Coast,Jamaica,Kenya,Kosovo,Kyrgyzstan,Lebanon,Lesotho,Libya,Madagascar (Malagasy),Malawi,Malaysia,Mali,Mauritania,Mexico,Moldova,Mozambique,Myanmar (Burma),Namibia,Nepal,Nicaragua,Nigeria,Pakistan,Papua New Guinea,Peru,Philippines,Russia (Soviet Union),Senegal,Serbia (Yugoslavia),Sierra Leone,Somalia,South Africa,Spain,Sri Lanka,Sudan,Tanzania,Thailand,Tunisia,Turkey,Uganda,Ukraine,United Kingdom,Venezuela,Yemen (North Yemen),Zambia,Zimbabwe (Rhodesia),Africa,Americas,Asia,Europe,Middle East,electoral_vio_uncertainty_0,electoral_vio_uncertainty_1,electoral_vio_uncertainty_2,type_of_violence_1,type_of_violence_2,type_of_violence_3,type_of_violence_4,electoral_perpetrator_1,electoral_perpetrator_2,electoral_perpetrator_3,electoral_perpetrator_4,side_a_Civilians,side_a_ExternActors,side_a_Militia,side_a_Other,side_a_PoliticalSupporters,side_a_Politicians,Public officials,side_a_RebelGroup,side_a_SecurityForces,side_a_Unclear,side_b_Civilians,side_b_Militia,side_b_Other,side_b_PoliticalSupporters,side_b_Politicians,side_b_PublicOfficials,side_b_RebelGroup,side_b_SecurityForces,side_b_Unclear,electoral_purpose_disrupt,electoral_purpose_divergent affiliations,electoral_purpose_crack down on protest and/or riot,electoral_purpose_protect results,electoral_purpose_unclear,electoral_purpose_spell,electoral_purpose_eliminate opponent,electoral_purpose_retaliate violent acts,electoral_purpose_overturn results,electoral_purpose_secure elections,electoral_purpose_preemptive violence,electoral_purpose_prevent from voting,electoral_purpose_shape results,electoral_purpose_retaliate for voting for other candidate,electoral_purpose_repress opposition,electoral_purpose_punishment,electoral_purpose_oppose democratic conduct,electoral_purpose_intimidate opponent,electoral_purpose_demand democratic conduct,electoral_purpose_hinder electoral misconduct,electoral_purpose_retaliate for voting for alternative candidate,electoral_purpose_retaliate for voting for candidate,electoral_purpose_forcibly displace groups in order to shape results,electoral_purpose_retaliate acts in electoral process,electoral_purpose_prevent electoral misconduct,electoral_purpose_Unknown,electoral_purpose_call for change in electoral process,electoral_purpose_intimidate voters,electoral_purpose_.,electoral_purpose_Unclear,electoral_targets_2,electoral_targets_6,electoral_targets_3,electoral_targets_5,electoral_targets_10,electoral_targets_1,electoral_targets_9,electoral_targets_7,electoral_targets_11,electoral_targets_-99,electoral_targets_8,electoral_targets_4,electoral_type_2,electoral_type_1,electoral_type_3,electoral_type_4,electoral_type_5,"electoral_type_2,3","electoral_type_1,3","electoral_type_1,4",electoral_timing_2,electoral_timing_1,electoral_timing_3,electoral_timing_4,electoral_timing_8
0,95,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-21, Four killed ...",2012,2012-02-21,2012-02-21,0,0,1,0,1,1,1,12.779444,45.036667,1,0,,disrupt,Public officials,,1,Political supporters,,0,2,2,2,2,YEM-2012-1-X2626-4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0
1,96,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-21, Four killed ...",2012,2012-02-21,2012-02-21,1,0,0,0,1,1,1,12.779444,45.036667,1,0,,disrupt,Security forces,,1,Political supporters,,0,2,6,2,2,YEM-2012-1-X2626-5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0
2,175,2,5036,Supporters of General Peoples Congress - Suppo...,5646,3425,Supporters of General Peoples Congress,2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"Reuters 1997-04-21 ""Yemeni man killed in pre-e...",1997,1997-04-20,1997-04-20,0,0,0,1,1,1,1,13.969184,43.996728,1,0,,divergent affiliations,Political supporters,,1,Political supporters,,1,3,3,1,1,YEM-1997-2-X4857-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0
3,265,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-09, Two killed i...",2012,2012-02-09,2012-02-09,0,2,0,0,2,2,2,13.695723,44.73137,1,0,,crack down on protest and/or riot,Security forces,,1,Political supporters,,0,1,5,2,1,YEM-2012-1-X2626-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
4,266,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Rueters News, 2012-02-20,WRAPUP 3-Violence hit...",2012,2012-02-20,2012-02-20,0,1,0,0,1,1,1,13.695723,44.73137,1,0,,crack down on protest and/or riot,Security forces,,1,Political supporters,,0,1,5,2,1,YEM-2012-1-X2626-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4228,253925,3,469,Government of Kenya - Civilians,936,91,Government of Kenya,1,Civilians,501,Kenya,Africa,"""Human Rights Watch,2018-02-25,Kenya: Fresh Ev...",2017,2017-10-31,2017-10-31,0,0,1,0,1,1,1,-1.283333,36.816667,1,1,,crack down on protest and/or riot,Security forces,,1,Civilians,,0,1,5,2,3,KEN-2017-3-936-70,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
4229,253926,4,13998,Government of Kenya - Opponents of Kenyatta,15173,91,Government of Kenya,6777,Opponents of Kenyatta,501,Kenya,Africa,"""Human Rights Watch,2018-02-25,Kenya: Fresh Ev...",2017,2017-11-17,2017-11-17,0,0,0,1,1,1,1,-1.283333,36.816667,1,2,,crack down on protest and/or riot,Security forces,,1,Civilians,,0,1,5,2,3,KEN-2017-4-15173-57,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
4230,253927,3,469,Government of Kenya - Civilians,936,91,Government of Kenya,1,Civilians,501,Kenya,Africa,"""Human Rights Watch,2018-02-25,Kenya: Fresh Ev...",2017,2017-11-20,2017-11-20,0,0,1,0,1,1,1,-1.283333,36.816667,1,2,,crack down on protest and/or riot,Security forces,,1,Civilians,,0,1,5,2,3,KEN-2017-3-936-73,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
4231,253928,3,469,Government of Kenya - Civilians,936,91,Government of Kenya,1,Civilians,501,Kenya,Africa,"""Human Rights Watch,2018-02-25,Kenya: Fresh Ev...",2017,2017-11-28,2017-11-28,0,0,1,0,1,1,1,-1.283333,36.816667,1,2,,crack down on protest and/or riot,Security forces,,1,Civilians,,0,1,5,2,3,KEN-2017-3-936-74,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0


### Missing values

In [10]:
# Checking for missing values
missing_values_count = deco_clean.isnull().sum() # generating array of missing values and columns
pd.set_option('display.max_rows', None) # setting view to all rows
pd.DataFrame(missing_values_count).reset_index().rename(columns={"index":"column",0:"n"}).sort_values(by="n", ascending=False) # data

Unnamed: 0,column,n
33,electoral_side_b_2,4231
30,electoral_side_a_2,3974
27,electoral_vio_source,3261
12,source_article,9
23,latitude,3
24,longitude,3
29,electoral_side_a,1
141,side_a_PoliticalSupporters,0
142,side_a_Politicians,0
143,Public officials,0


In [11]:
# Filling in missing values

## filling in NAs for electoral_side_a
deco_clean['electoral_side_a'] = deco_clean['electoral_side_a'].fillna("Unclear")

## filling in NAs for longitude and latitude
deco_clean['longitude'] = deco_clean['longitude'].fillna(-99)
deco_clean['latitude'] = deco_clean['latitude'].fillna(-99)

## filling in NAs for electoral_vio_source
deco_clean['electoral_vio_source'] = deco_clean['electoral_vio_source'].fillna("None/Unknown")

## filling in NAs for source article
deco_clean['source_article'] = deco_clean['source_article'].fillna("None/Unknown")


### Merging DECO data with indicators

In [12]:
# Creating country-year identifier for indicators
ind['country_year'] = ind['Country Abbreviation'] + ind['Year']
ind.head()


Unnamed: 0,Country,Year,Country Abbreviation,GDP (constant 2015 US$),GDP growth (annual %),GDP per capita (constant 2015 US$),Gini index,"Inflation, consumer prices (annual %)",Political Stability and Absence of Violence/Terrorism: Estimate,"Unemployment, total (% of total labor force) (modeled ILO estimate)",country_year
0,Afghanistan,1985,AFG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,AFG1985
1,Afghanistan,1986,AFG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,AFG1986
2,Afghanistan,1987,AFG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,AFG1987
3,Afghanistan,1988,AFG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,AFG1988
4,Afghanistan,1989,AFG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,AFG1989


In [13]:
# List of countries in DECO data
list(deco_clean['country'].unique())

['Yemen (North Yemen)',
 'Algeria',
 'Egypt',
 'Sudan',
 'Burundi',
 'Cambodia (Kampuchea)',
 'Benin',
 'Madagascar (Malagasy)',
 'Lesotho',
 'Djibouti',
 'Guinea',
 'Ghana',
 'Gambia',
 'Malawi',
 'Mali',
 'Nigeria',
 'South Africa',
 'Mozambique',
 'Namibia',
 'Tanzania',
 'Angola',
 'Zimbabwe (Rhodesia)',
 'DR Congo (Zaire)',
 'Guinea-Bissau',
 'Somalia',
 'Papua New Guinea',
 'Central African Republic',
 'Uganda',
 'Sierra Leone',
 'Ivory Coast',
 'Ethiopia',
 'Senegal',
 'Mauritania',
 'Libya',
 'Kenya',
 'Congo',
 'Philippines',
 'Iraq',
 'Myanmar (Burma)',
 'Pakistan',
 'Thailand',
 'India',
 'Spain',
 'United Kingdom',
 'Albania',
 'Honduras',
 'Ecuador',
 'Venezuela',
 'Brazil',
 'Indonesia',
 'Mexico',
 'Fiji',
 'Bangladesh',
 'Sri Lanka',
 'Nepal',
 'Jamaica',
 'Italy',
 'Kyrgyzstan',
 'Dominican Republic',
 'Bolivia',
 'Iran',
 'Colombia',
 'El Salvador',
 'Afghanistan',
 'Turkey',
 'Nicaragua',
 'Peru',
 'Tunisia',
 'Ukraine',
 'Kosovo',
 'Israel',
 'Moldova',
 'Lebanon',


In [14]:
ind['Country Abbreviation'].unique()

array(['AFG', 'ALB', 'DZA', 'ASM', 'AND', 'AGO', 'ATG', 'ARG', 'ARM',
       'ABW', 'AUS', 'AUT', 'AZE', 'BHS', 'BHR', 'BGD', 'BRB', 'BLR',
       'BEL', 'BLZ', 'BEN', 'BMU', 'BTN', 'BOL', 'BIH', 'BWA', 'BRA',
       'VGB', 'BRN', 'BGR', 'BFA', 'BDI', 'CPV', 'KHM', 'CMR', 'CAN',
       'CYM', 'CAF', 'TCD', 'CHL', 'CHN', 'COL', 'COM', 'COD', 'COG',
       'CRI', 'CIV', 'HRV', 'CUB', 'CUW', 'CYP', 'CZE', 'DNK', 'DJI',
       'DMA', 'DOM', 'ECU', 'EGY', 'SLV', 'GNQ', 'ERI', 'EST', 'SWZ',
       'ETH', 'FRO', 'FJI', 'FIN', 'FRA', 'PYF', 'GAB', 'GMB', 'GEO',
       'DEU', 'GHA', 'GIB', 'GRC', 'GRL', 'GRD', 'GUM', 'GTM', 'GIN',
       'GNB', 'GUY', 'HTI', 'HND', 'HKG', 'HUN', 'ISL', 'IND', 'IDN',
       'IRN', 'IRQ', 'IRL', 'IMN', 'ISR', 'ITA', 'JAM', 'JPN', 'JOR',
       'KAZ', 'KEN', 'KIR', 'PRK', 'KOR', 'XKK', 'KWT', 'KGZ', 'LAO',
       'LVA', 'LBN', 'LSO', 'LBR', 'LBY', 'LIE', 'LTU', 'LUX', 'MAC',
       'MDG', 'MWI', 'MYS', 'MDV', 'MLI', 'MLT', 'MHL', 'MRT', 'MUS',
       'MEX', 'FSM',

In [15]:
# Cleaning countries so that they can match abbreviations data
deco_clean['country_new'] = np.where(deco_clean['country']=="Yemen (North Yemen)", "Yemen", 
                                np.where(deco_clean['country']=="Cambodia (Kampuchea)", "Cambodia",
                                        np.where(deco_clean['country']== 'Madagascar (Malagasy)', "Madagascar",
                                                np.where(deco_clean['country']=="Zimbabwe (Rhodesia)", "Zimbabwe",
                                                        np.where(deco_clean['country']=='DR Congo (Zaire)', "DR Congo",
                                                                np.where(deco_clean['country']=='Myanmar (Burma)', "Myanmar",
                                                                        np.where(deco_clean['country']=='Russia (Soviet Union)', "Russia",
                                                                                np.where(deco_clean['country']=='Serbia (Yugoslavia)', "Serbia", deco_clean['country']))))))))

In [16]:
# Merging country abbreviations onto DECO data
deco_clean= pd.merge(deco_clean, abbr, left_on='country_new', right_on="Country", how="left")

In [17]:
# Creating country-year identifier
deco_clean['year_string'] = deco_clean['year'].apply(lambda x: str(x))
deco_clean['country_year'] = deco_clean['Abbreviation'] + deco_clean['year_string']
deco_clean.head(100)

Unnamed: 0,id,type_of_violence,conflict_new_id,conflict_name,dyad_new_id,side_a_new_id,side_a,side_b_new_id,side_b,country_id,country,region,source_article,year,date_start,date_end,deaths_a,deaths_b,civilian_deaths,unknown,best,high,low,latitude,longitude,electoral_vio,electoral_vio_uncertainty,electoral_vio_source,electoral_purpose,electoral_side_a,electoral_side_a_2,electoral_side_a_inc,electoral_side_b,electoral_side_b_2,electoral_side_b_inc,electoral_perpetrator,electoral_targets,electoral_type,electoral_timing,relid,ucdp_ged,Afghanistan,Albania,Algeria,Angola,Bangladesh,Benin,Bolivia,Brazil,Burundi,Cambodia (Kampuchea),Central African Republic,Colombia,Congo,DR Congo (Zaire),Djibouti,Dominican Republic,Ecuador,Egypt,El Salvador,Ethiopia,Fiji,Gambia,Georgia,Ghana,Guatemala,Guinea,Guinea-Bissau,Haiti,Honduras,India,Indonesia,Iran,Iraq,Israel,Italy,Ivory Coast,Jamaica,Kenya,Kosovo,Kyrgyzstan,Lebanon,Lesotho,Libya,Madagascar (Malagasy),Malawi,Malaysia,Mali,Mauritania,Mexico,Moldova,Mozambique,Myanmar (Burma),Namibia,Nepal,Nicaragua,Nigeria,Pakistan,Papua New Guinea,Peru,Philippines,Russia (Soviet Union),Senegal,Serbia (Yugoslavia),Sierra Leone,Somalia,South Africa,Spain,Sri Lanka,Sudan,Tanzania,Thailand,Tunisia,Turkey,Uganda,Ukraine,United Kingdom,Venezuela,Yemen (North Yemen),Zambia,Zimbabwe (Rhodesia),Africa,Americas,Asia,Europe,Middle East,electoral_vio_uncertainty_0,electoral_vio_uncertainty_1,electoral_vio_uncertainty_2,type_of_violence_1,type_of_violence_2,type_of_violence_3,type_of_violence_4,electoral_perpetrator_1,electoral_perpetrator_2,electoral_perpetrator_3,electoral_perpetrator_4,side_a_Civilians,side_a_ExternActors,side_a_Militia,side_a_Other,side_a_PoliticalSupporters,side_a_Politicians,Public officials,side_a_RebelGroup,side_a_SecurityForces,side_a_Unclear,side_b_Civilians,side_b_Militia,side_b_Other,side_b_PoliticalSupporters,side_b_Politicians,side_b_PublicOfficials,side_b_RebelGroup,side_b_SecurityForces,side_b_Unclear,electoral_purpose_disrupt,electoral_purpose_divergent affiliations,electoral_purpose_crack down on protest and/or riot,electoral_purpose_protect results,electoral_purpose_unclear,electoral_purpose_spell,electoral_purpose_eliminate opponent,electoral_purpose_retaliate violent acts,electoral_purpose_overturn results,electoral_purpose_secure elections,electoral_purpose_preemptive violence,electoral_purpose_prevent from voting,electoral_purpose_shape results,electoral_purpose_retaliate for voting for other candidate,electoral_purpose_repress opposition,electoral_purpose_punishment,electoral_purpose_crack down on prostest and/or riot,electoral_purpose_oppose democratic conduct,electoral_purpose_intimidate opponent,electoral_purpose_demand democratic conduct,electoral_purpose_hinder electoral misconduct,electoral_purpose_retaliate for voting for alternative candidate,electoral_purpose_crack down on riot and/or protest,electoral_purpose_retaliate for voting for candidate,electoral_purpose_forcibly displace groups in order to shape results,electoral_purpose_retaliate acts in electoral process,electoral_purpose_prevent electoral misconduct,electoral_purpose_Unknown,electoral_purpose_secure election,electoral_purpose_crackdown on protest and/or riot,electoral_purpose_call for change in electoral process,electoral_purpose_Secure elections,electoral_purpose_intimidate voters,electoral_purpose_crack down on riots and/or protests,electoral_purpose_.,electoral_purpose_crack down on protest and/or riot/retaliate violent events,electoral_purpose_Unclear,electoral_purpose_diverging affiliations,electoral_targets_2,electoral_targets_6,electoral_targets_3,electoral_targets_5,electoral_targets_10,electoral_targets_1,electoral_targets_9,electoral_targets_7,electoral_targets_11,electoral_targets_-99,electoral_targets_8,electoral_targets_4,electoral_type_2,electoral_type_1,electoral_type_3,electoral_type_4,electoral_type_5,"electoral_type_2,3","electoral_type_1,3","electoral_type_1,4",electoral_timing_2,electoral_timing_1,electoral_timing_3,electoral_timing_4,electoral_timing_8,country_new,Country,Abbreviation,year_string,country_year
0,95,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-21, Four killed ...",2012,2012-02-21,2012-02-21,0,0,1,0,1,1,1,12.779444,45.036667,1,0,None/Unknown,disrupt,Public officials,,1,Political supporters,,0,2,2,2,2,YEM-2012-1-X2626-4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,2012,YEM2012
1,96,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-21, Four killed ...",2012,2012-02-21,2012-02-21,1,0,0,0,1,1,1,12.779444,45.036667,1,0,None/Unknown,disrupt,Security forces,,1,Political supporters,,0,2,6,2,2,YEM-2012-1-X2626-5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,2012,YEM2012
2,175,2,5036,Supporters of General Peoples Congress - Suppo...,5646,3425,Supporters of General Peoples Congress,2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"Reuters 1997-04-21 ""Yemeni man killed in pre-e...",1997,1997-04-20,1997-04-20,0,0,0,1,1,1,1,13.969184,43.996728,1,0,None/Unknown,divergent affiliations,Political supporters,,1,Political supporters,,1,3,3,1,1,YEM-1997-2-X4857-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,Yemen,Yemen,YEM,1997,YEM1997
3,265,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-09, Two killed i...",2012,2012-02-09,2012-02-09,0,2,0,0,2,2,2,13.695723,44.73137,1,0,None/Unknown,crack down on protest and/or riot,Security forces,,1,Political supporters,,0,1,5,2,1,YEM-2012-1-X2626-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,Yemen,Yemen,YEM,2012,YEM2012
4,266,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Rueters News, 2012-02-20,WRAPUP 3-Violence hit...",2012,2012-02-20,2012-02-20,0,1,0,0,1,1,1,13.695723,44.73137,1,0,None/Unknown,crack down on protest and/or riot,Security forces,,1,Political supporters,,0,1,5,2,1,YEM-2012-1-X2626-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,Yemen,Yemen,YEM,2012,YEM2012
5,280,2,5036,Supporters of General Peoples Congress - Suppo...,5646,3425,Supporters of General Peoples Congress,2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"AP 2001-02-21 ""Death toll in Yemeni polling is...",2001,2001-02-20,2001-02-20,0,0,0,2,2,2,2,14.542742,44.405145,1,0,None/Unknown,divergent affiliations,Political supporters,,1,Political supporters,,0,3,3,3;4;5,2,YEM-2001-2-X4857-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,2001,YEM2001
6,307,2,5036,Supporters of General Peoples Congress - Suppo...,5646,3425,Supporters of General Peoples Congress,2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"Reuters 1997-04-30 ""Four killed in more Yemen ...",1997,1997-04-30,1997-04-30,0,0,0,4,4,4,4,16.0,43.25,1,0,None/Unknown,divergent affiliations,Political supporters,,1,Political supporters,,1,3,3,1,2,YEM-1997-2-X4857-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,1997,YEM1997
7,339,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-21, Four killed ...",2012,2012-02-21,2012-02-21,0,1,0,1,2,2,2,13.056667,44.881944,1,0,None/Unknown,crack down on protest and/or riot,Security forces,,1,Political supporters,,0,1,5,2,2,YEM-2012-1-X2626-7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,2012,YEM2012
8,343,1,11148,Yemen (North Yemen):,11758,123,Government of Yemen (North Yemen),2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"AFP 2001-02-25 ""Six more killed in Yemen elect...",2001,2001-02-25,2001-02-25,3,3,0,0,6,6,6,14.0,44.166667,1,0,None/Unknown,protect results,Security forces,,1,Political supporters,,0,3,3;6,3;4;5,3,YEM-2001-1-X4850-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,0,Yemen,Yemen,YEM,2001,YEM2001
9,344,2,5036,Supporters of General Peoples Congress - Suppo...,5646,3425,Supporters of General Peoples Congress,2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"Reuters 2001-02-14 ""One Yemeni killed in pre-p...",2001,2001-02-14,2001-02-14,0,1,0,0,1,1,1,14.0,44.166667,1,0,None/Unknown,divergent affiliations,Political supporters,,1,Political supporters,,0,3,3,3;4;5,1,YEM-2001-2-X4857-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,Yemen,Yemen,YEM,2001,YEM2001


In [18]:
# Merging indicators with DECO data on country-year identifier
deco_clean = pd.merge(deco_clean, ind, on='country_year', how="left")
deco_clean.head(200)

Unnamed: 0,id,type_of_violence,conflict_new_id,conflict_name,dyad_new_id,side_a_new_id,side_a,side_b_new_id,side_b,country_id,country,region,source_article,year,date_start,date_end,deaths_a,deaths_b,civilian_deaths,unknown,best,high,low,latitude,longitude,electoral_vio,electoral_vio_uncertainty,electoral_vio_source,electoral_purpose,electoral_side_a,electoral_side_a_2,electoral_side_a_inc,electoral_side_b,electoral_side_b_2,electoral_side_b_inc,electoral_perpetrator,electoral_targets,electoral_type,electoral_timing,relid,ucdp_ged,Afghanistan,Albania,Algeria,Angola,Bangladesh,Benin,Bolivia,Brazil,Burundi,Cambodia (Kampuchea),Central African Republic,Colombia,Congo,DR Congo (Zaire),Djibouti,Dominican Republic,Ecuador,Egypt,El Salvador,Ethiopia,Fiji,Gambia,Georgia,Ghana,Guatemala,Guinea,Guinea-Bissau,Haiti,Honduras,India,Indonesia,Iran,Iraq,Israel,Italy,Ivory Coast,Jamaica,Kenya,Kosovo,Kyrgyzstan,Lebanon,Lesotho,Libya,Madagascar (Malagasy),Malawi,Malaysia,Mali,Mauritania,Mexico,Moldova,Mozambique,Myanmar (Burma),Namibia,Nepal,Nicaragua,Nigeria,Pakistan,Papua New Guinea,Peru,Philippines,Russia (Soviet Union),Senegal,Serbia (Yugoslavia),Sierra Leone,Somalia,South Africa,Spain,Sri Lanka,Sudan,Tanzania,Thailand,Tunisia,Turkey,Uganda,Ukraine,United Kingdom,Venezuela,Yemen (North Yemen),Zambia,Zimbabwe (Rhodesia),Africa,Americas,Asia,Europe,Middle East,electoral_vio_uncertainty_0,electoral_vio_uncertainty_1,electoral_vio_uncertainty_2,type_of_violence_1,type_of_violence_2,type_of_violence_3,type_of_violence_4,electoral_perpetrator_1,electoral_perpetrator_2,electoral_perpetrator_3,electoral_perpetrator_4,side_a_Civilians,side_a_ExternActors,side_a_Militia,side_a_Other,side_a_PoliticalSupporters,side_a_Politicians,Public officials,side_a_RebelGroup,side_a_SecurityForces,side_a_Unclear,side_b_Civilians,side_b_Militia,side_b_Other,side_b_PoliticalSupporters,side_b_Politicians,side_b_PublicOfficials,side_b_RebelGroup,side_b_SecurityForces,side_b_Unclear,electoral_purpose_disrupt,electoral_purpose_divergent affiliations,electoral_purpose_crack down on protest and/or riot,electoral_purpose_protect results,electoral_purpose_unclear,electoral_purpose_spell,electoral_purpose_eliminate opponent,electoral_purpose_retaliate violent acts,electoral_purpose_overturn results,electoral_purpose_secure elections,electoral_purpose_preemptive violence,electoral_purpose_prevent from voting,electoral_purpose_shape results,electoral_purpose_retaliate for voting for other candidate,electoral_purpose_repress opposition,electoral_purpose_punishment,electoral_purpose_crack down on prostest and/or riot,electoral_purpose_oppose democratic conduct,electoral_purpose_intimidate opponent,electoral_purpose_demand democratic conduct,electoral_purpose_hinder electoral misconduct,electoral_purpose_retaliate for voting for alternative candidate,electoral_purpose_crack down on riot and/or protest,electoral_purpose_retaliate for voting for candidate,electoral_purpose_forcibly displace groups in order to shape results,electoral_purpose_retaliate acts in electoral process,electoral_purpose_prevent electoral misconduct,electoral_purpose_Unknown,electoral_purpose_secure election,electoral_purpose_crackdown on protest and/or riot,electoral_purpose_call for change in electoral process,electoral_purpose_Secure elections,electoral_purpose_intimidate voters,electoral_purpose_crack down on riots and/or protests,electoral_purpose_.,electoral_purpose_crack down on protest and/or riot/retaliate violent events,electoral_purpose_Unclear,electoral_purpose_diverging affiliations,electoral_targets_2,electoral_targets_6,electoral_targets_3,electoral_targets_5,electoral_targets_10,electoral_targets_1,electoral_targets_9,electoral_targets_7,electoral_targets_11,electoral_targets_-99,electoral_targets_8,electoral_targets_4,electoral_type_2,electoral_type_1,electoral_type_3,electoral_type_4,electoral_type_5,"electoral_type_2,3","electoral_type_1,3","electoral_type_1,4",electoral_timing_2,electoral_timing_1,electoral_timing_3,electoral_timing_4,electoral_timing_8,country_new,Country_x,Abbreviation,year_string,country_year,Country_y,Year,Country Abbreviation,GDP (constant 2015 US$),GDP growth (annual %),GDP per capita (constant 2015 US$),Gini index,"Inflation, consumer prices (annual %)",Political Stability and Absence of Violence/Terrorism: Estimate,"Unemployment, total (% of total labor force) (modeled ILO estimate)"
0,95,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-21, Four killed ...",2012,2012-02-21,2012-02-21,0,0,1,0,1,1,1,12.779444,45.036667,1,0,None/Unknown,disrupt,Public officials,,1,Political supporters,,0,2,2,2,2,YEM-2012-1-X2626-4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,2012,YEM2012,"Yemen, Rep.",2012,YEM,56340080000.0,2.39299,2148.466478,0.0,9.885387,-2.430663,13.425
1,96,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-21, Four killed ...",2012,2012-02-21,2012-02-21,1,0,0,0,1,1,1,12.779444,45.036667,1,0,None/Unknown,disrupt,Security forces,,1,Political supporters,,0,2,6,2,2,YEM-2012-1-X2626-5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,2012,YEM2012,"Yemen, Rep.",2012,YEM,56340080000.0,2.39299,2148.466478,0.0,9.885387,-2.430663,13.425
2,175,2,5036,Supporters of General Peoples Congress - Suppo...,5646,3425,Supporters of General Peoples Congress,2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"Reuters 1997-04-21 ""Yemeni man killed in pre-e...",1997,1997-04-20,1997-04-20,0,0,0,1,1,1,1,13.969184,43.996728,1,0,None/Unknown,divergent affiliations,Political supporters,,1,Political supporters,,1,3,3,1,1,YEM-1997-2-X4857-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,Yemen,Yemen,YEM,1997,YEM1997,"Yemen, Rep.",1997,YEM,35526370000.0,5.231112,2076.511507,0.0,2.176727,0.0,10.224
3,265,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-09, Two killed i...",2012,2012-02-09,2012-02-09,0,2,0,0,2,2,2,13.695723,44.73137,1,0,None/Unknown,crack down on protest and/or riot,Security forces,,1,Political supporters,,0,1,5,2,1,YEM-2012-1-X2626-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,Yemen,Yemen,YEM,2012,YEM2012,"Yemen, Rep.",2012,YEM,56340080000.0,2.39299,2148.466478,0.0,9.885387,-2.430663,13.425
4,266,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Rueters News, 2012-02-20,WRAPUP 3-Violence hit...",2012,2012-02-20,2012-02-20,0,1,0,0,1,1,1,13.695723,44.73137,1,0,None/Unknown,crack down on protest and/or riot,Security forces,,1,Political supporters,,0,1,5,2,1,YEM-2012-1-X2626-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,Yemen,Yemen,YEM,2012,YEM2012,"Yemen, Rep.",2012,YEM,56340080000.0,2.39299,2148.466478,0.0,9.885387,-2.430663,13.425
5,280,2,5036,Supporters of General Peoples Congress - Suppo...,5646,3425,Supporters of General Peoples Congress,2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"AP 2001-02-21 ""Death toll in Yemeni polling is...",2001,2001-02-20,2001-02-20,0,0,0,2,2,2,2,14.542742,44.405145,1,0,None/Unknown,divergent affiliations,Political supporters,,1,Political supporters,,0,3,3,3;4;5,2,YEM-2001-2-X4857-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,2001,YEM2001,"Yemen, Rep.",2001,YEM,43076690000.0,3.803646,2250.204155,0.0,11.911591,0.0,11.727
6,307,2,5036,Supporters of General Peoples Congress - Suppo...,5646,3425,Supporters of General Peoples Congress,2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"Reuters 1997-04-30 ""Four killed in more Yemen ...",1997,1997-04-30,1997-04-30,0,0,0,4,4,4,4,16.0,43.25,1,0,None/Unknown,divergent affiliations,Political supporters,,1,Political supporters,,1,3,3,1,2,YEM-1997-2-X4857-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,1997,YEM1997,"Yemen, Rep.",1997,YEM,35526370000.0,5.231112,2076.511507,0.0,2.176727,0.0,10.224
7,339,1,230,Yemen (North Yemen):Government,10855,123,Government of Yemen (North Yemen),1091,Ansarallah,678,Yemen (North Yemen),Middle East,"Agence France Presse, 2012-02-21, Four killed ...",2012,2012-02-21,2012-02-21,0,1,0,1,2,2,2,13.056667,44.881944,1,0,None/Unknown,crack down on protest and/or riot,Security forces,,1,Political supporters,,0,1,5,2,2,YEM-2012-1-X2626-7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,Yemen,Yemen,YEM,2012,YEM2012,"Yemen, Rep.",2012,YEM,56340080000.0,2.39299,2148.466478,0.0,9.885387,-2.430663,13.425
8,343,1,11148,Yemen (North Yemen):,11758,123,Government of Yemen (North Yemen),2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"AFP 2001-02-25 ""Six more killed in Yemen elect...",2001,2001-02-25,2001-02-25,3,3,0,0,6,6,6,14.0,44.166667,1,0,None/Unknown,protect results,Security forces,,1,Political supporters,,0,3,3;6,3;4;5,3,YEM-2001-1-X4850-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,0,Yemen,Yemen,YEM,2001,YEM2001,"Yemen, Rep.",2001,YEM,43076690000.0,3.803646,2250.204155,0.0,11.911591,0.0,11.727
9,344,2,5036,Supporters of General Peoples Congress - Suppo...,5646,3425,Supporters of General Peoples Congress,2374,Supporters of Islah party,678,Yemen (North Yemen),Middle East,"Reuters 2001-02-14 ""One Yemeni killed in pre-p...",2001,2001-02-14,2001-02-14,0,1,0,0,1,1,1,14.0,44.166667,1,0,None/Unknown,divergent affiliations,Political supporters,,1,Political supporters,,0,3,3,3;4;5,1,YEM-2001-2-X4857-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,Yemen,Yemen,YEM,2001,YEM2001,"Yemen, Rep.",2001,YEM,43076690000.0,3.803646,2250.204155,0.0,11.911591,0.0,11.727


### Final cleaning and preprocessing before model 

In [20]:
# Dropping obsolete variables
deco_clean_final = deco_clean.drop(columns = ['id',
                                              'conflict_name', # already label encoded as "conflict_new_id"
                                              'type_of_violence', # already one hot encoded
                                              'side_a', # already label encoded as "side_a_new_id"
                                              'side_b', # already label encoded as "side_b_new_id"
                                              'country', # already one-hot encoded and label encoded as "country_id"
                                              'region', # already one-hot encoded 
                                              'source_article', # dropping
                                              'electoral_vio_source', # dropping
                                              'electoral_purpose', # already one-hot encoded
                                              'electoral_side_a', # already one-hot encoded
                                              'electoral_side_a_2', # dropping
                                              'electoral_side_b', # already one-hot encoded
                                              'electoral_side_b_2', # dropping
                                              'country_id', # countries already one hot encoded
                                              'relid', # dropping
                                              'longitude',
                                              'latitude',
                                              'date_start',
                                              'date_end',
                                              'electoral_purpose_.',
                                              'country_new',
                                              'Country_x',
                                              'Country_y',
                                              'Abbreviation',
                                              'year_string',
                                              'country_year',
                                              'Country Abbreviation',
                                              'country_year',
                                              "Year"
                                    
                                              
                                    ])
# Exporting CSV
deco_clean.to_csv('deco_clean.csv', index=False) # for exploratory data analysis
deco_clean_final.to_csv('deco_clean_final.csv', index=False) # for ML 