In [3]:
import numpy as np
import pandas as pd
from inv_dict import wb_cow_dict

import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn import metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, r2_score
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from statsmodels.regression.linear_model import OLS
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.inspection import plot_partial_dependence
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

In [4]:
def get_cc(val):
    if val in wb_cow_dict:
        return wb_cow_dict[val]
    else:
        return 0
    
def get_year(val):
    return int(val)

def add_wd_rows(reign_df, wdi_df, variable_list):
    joint_df = reign_df.copy()
    yearlist = [str(i) for i in np.arange(1960, 2020)]
    for i in variable_list:
        df = wdi_df[wdi_df['Indicator Name'] == i]
        dfx = pd.melt(df, id_vars = ['Country Name'], value_vars=yearlist)
        dfx['ccode'] = dfx['Country Name'].apply(get_cc)
        dfx['year'] = dfx['variable'].apply(get_year)
        dfx['yearcode'] = (dfx['year']) + 10000*dfx['ccode']
        dfx[i] = dfx['value']
        dfx_limited = dfx[[i, 'yearcode']]
        joint_df = joint_df.join(dfx_limited.set_index('yearcode'), on='yearcode', how = 'inner')
    return joint_df

In [5]:
df = pd.read_csv('../data/REIGN_2020_6.csv')

In [48]:
df[(df['country'] == 'USA') & (df['year'] == 2020)].index

Int64Index([852, 853, 854, 855, 856, 857], dtype='int64')

In [14]:
twnty = df[df['year'] == 2020]

In [41]:
twntyjune = twnty[twnty['month'] == 6]

In [42]:
risks = twntyjune['couprisk'].sort_values()

In [43]:
risks[-100:].index

Int64Index([ 53371, 129741, 105317,  87371,  13835, 128331,  12105,  86066,
              4869, 107722,  12969,  22815,  90452, 104140,  10765,  23672,
             14690,  82834, 132376,  21075,  71002,  98732, 107364,  95287,
            111713, 108798, 116628, 112574, 127247, 108433,  59632, 119884,
             78176, 100443,  89919, 126395,  80574, 124245,  20212,  73182,
            116023, 101307,  79592,  91226,  76786,  87996,  92710,  64278,
             99593,  62092,  70273,  63538,  82291,  81753, 117484,  71729,
            120757,  85700,   4006,  61425, 128880,  65006,  18143,  81427,
             89284,    857,  94434,  97873,  93540, 103013,  84841,  69501,
             72460,  66454,  74633,  77468, 134617,  75359,  76086,  65731,
             84165,  67200,  68781,  62817,  38354,  60744,  86721,  91931,
             73910,   9912,  78884,  67926,  80056, 105901,  97019, 108072,
             96158,  83509,  88729,  94326],
           dtype='int64')

In [44]:
for i in risks[-20:].index:
     print(df[df.index == i]['country'])

84165    Zimbabwe
Name: country, dtype: object
67200    Guinea
Name: country, dtype: object
68781    Liberia
Name: country, dtype: object
62817    Mali
Name: country, dtype: object
38354    Poland
Name: country, dtype: object
60744    Guinea Bissau
Name: country, dtype: object
86721    Lesotho
Name: country, dtype: object
91931    Algeria
Name: country, dtype: object
73910    Cen African Rep
Name: country, dtype: object
9912    St Kitts and Nevis
Name: country, dtype: object
78884    Burundi
Name: country, dtype: object
67926    Burkina Faso
Name: country, dtype: object
80056    Somalia
Name: country, dtype: object
105901    United Arab Emirates
Name: country, dtype: object
97019    Iraq
Name: country, dtype: object
108072    Tajikistan
Name: country, dtype: object
96158    Turkey
Name: country, dtype: object
83509    Zambia
Name: country, dtype: object
88729    Madagascar
Name: country, dtype: object
94326    Sudan
Name: country, dtype: object


In [34]:
yearagg = pd.read_pickle('../data/year_agg.pkl')

In [36]:
variable_list = ['Life expectancy at birth, female (years)', 'GDP growth (annual %)', 'Mineral rents (% of GDP)', 'Oil rents (% of GDP)', 'Trade (% of GDP)', 'Foreign direct investment, net inflows (% of GDP)', 'Natural gas rents (% of GDP)', 'Population ages 0-14 (% of total population)', 'Rural population (% of total population)',  'Population growth (annual %)', 'Arable land (hectares per person)',
 'Merchandise exports (current US$)',
 'Merchandise imports (current US$)',
 'Primary education, duration (years)']

In [45]:
new_drops = ['ref_recent',                                      
'ref_ant'     ,                                   
'Party-Military',                                  
'Party-Personal-Military Hybrid',                  
'Personal Dictatorship'          ,                 
'Provisional - Military'          ,                
'Warlordism'                       ,             
'anticipation'                      ,              
'tenure_months'                      ,             
'militarycareer'                      ,            
'age'                                  ,           
'Natural gas rents (% of GDP)'          ,          
'Rural population (% of total population)',        
'Arable land (hectares per person)'        ,       
'lead_recent'                               ,      
'exec_ant'                                   ,   
'leg_ant'                                     ,    
'leg_recent'                                   , 
'indirect_recent'                               ,  
'election_now'                                   ,
'Merchandise exports (current US$)'              ,
'precip'                                          ,
'defeat_recent'                                   ,
'prev_conflict'                                   ,
'exec_recent'                                     ,
'loss'                                            ,
'delayed'                                         ,
'change_recent']                                  

In [43]:
joint_df = add_wd_rows(yearagg, wdi_df, variable_list)

In [48]:
joint_us = joint_df[joint_df['ccode'] == 2]

In [None]:
joint_df_x = joint_df.drop(['direct_recent', 'Merchandise imports (current US$)', 'Foreign direct investment, net inflows (% of GDP)', 'elected', 'Parliamentary Democracy', 'Primary education, duration (years)'], axis =1).drop(new_drops, axis =1)

In [56]:
dummies = pd.get_dummies(joint_df['government'])
us_dumb = joint_df.join(dummies)

In [57]:
us_dumb

Unnamed: 0,ccode,country,leader,year,month,elected,age,male,militarycareer,tenure_months,government,anticipation,ref_ant,leg_ant,exec_ant,irreg_lead_ant,election_now,election_recent,leg_recent,exec_recent,lead_recent,ref_recent,direct_recent,indirect_recent,victory_recent,defeat_recent,change_recent,nochange_recent,delayed,lastelection,loss,irregular,prev_conflict,precip,yearcode,coupyear,coupsuc,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),"Foreign direct investment, net inflows (% of GDP)",Natural gas rents (% of GDP),Population ages 0-14 (% of total population),Rural population (% of total population),Population growth (annual %),Arable land (hectares per person),Merchandise exports (current US$),Merchandise imports (current US$),"Primary education, duration (years)",Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Parliamentary Democracy,Party-Military,Party-Personal,Party-Personal-Military Hybrid,Personal Dictatorship,Presidential Democracy,Provisional - Civilian,Provisional - Military,Warlordism
11,2.0,USA,Eisenhower,1960.0,1.0,1.0,70.0,1,1.0,85.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.663562,4.465908,7.626082,0.0,0.035185,21960.0,False,False,73.1,,,,,,,30.691081,30.004,1.701993,,1.962600e+10,1.637100e+10,,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
12,2.0,USA,Eisenhower,1961.0,1.0,1.0,71.0,1,1.0,97.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.098612,1.098612,7.631916,0.0,-0.521275,21961.0,False,False,73.6,2.300000,,,,,,30.802588,29.623,1.657730,0.983336,2.019000e+10,1.593800e+10,,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
13,2.0,USA,Kennedy,1961.0,1.0,1.0,44.0,1,0.0,1.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.098612,1.098612,7.631916,0.0,-0.521275,21961.0,False,False,73.6,2.300000,,,,,,30.802588,29.623,1.657730,0.983336,2.019000e+10,1.593800e+10,,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
14,2.0,USA,Kennedy,1962.0,1.0,1.0,45.0,1,0.0,13.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.708050,2.708050,7.637716,0.0,0.272822,21962.0,False,False,73.5,6.100000,,,,,,30.752514,29.243,1.537997,0.949378,2.097300e+10,1.778100e+10,,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
15,2.0,USA,Kennedy,1963.0,1.0,1.0,46.0,1,0.0,25.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.295837,3.295837,7.643483,0.0,-0.218459,21963.0,False,False,73.4,4.400000,,,,,,30.580357,28.866,1.439165,0.948912,2.242700e+10,1.862100e+10,,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11363,990.0,Samoa,Tuilaepa Sailele Malielegaoi,2015.0,1.0,1.0,70.0,1,0.0,194.0,Parliamentary Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.850147,5.780744,6.458338,0.0,-0.143210,9902015.0,False,False,,1.404056,0.0,0.0,164.145234,,0.0,,12.762,0.037633,0.053752,3.790000e+08,6.750000e+08,6.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
11364,990.0,Samoa,Tuilaepa Sailele Malielegaoi,2016.0,1.0,1.0,71.0,1,0.0,206.0,Parliamentary Democracy,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.077538,5.817111,6.476973,0.0,-0.391420,9902016.0,False,False,,-2.769231,0.0,0.0,162.883436,,0.0,,12.802,-0.127294,0.053820,3.880000e+08,6.750000e+08,6.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
11365,990.0,Samoa,Tuilaepa Sailele Malielegaoi,2017.0,1.0,1.0,72.0,1,0.0,218.0,Parliamentary Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.397895,5.852202,6.495265,0.0,0.025780,9902017.0,False,False,,-5.854430,0.0,0.0,162.458472,,0.0,,12.830,-0.217311,,3.970000e+08,6.150000e+08,6.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
11366,990.0,Samoa,Tuilaepa Sailele Malielegaoi,2018.0,1.0,1.0,73.0,1,0.0,230.0,Parliamentary Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.135494,5.886104,6.513230,0.0,0.278135,9902018.0,False,False,,2.184874,,,168.238994,,,,12.847,-0.279066,,4.300000e+08,6.300000e+08,6.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [58]:
joint_df_x = us_dumb.drop(['direct_recent', 'Merchandise imports (current US$)', 'Foreign direct investment, net inflows (% of GDP)', 'elected', 'Parliamentary Democracy', 'Primary education, duration (years)'], axis =1).drop(new_drops, axis =1)

In [65]:
US_2016 = joint_df_x[(joint_df_x['ccode'] ==2) & (joint_df_x['year'] ==2016)]

Unnamed: 0,ccode,country,leader,year,month,male,government,irreg_lead_ant,election_recent,victory_recent,nochange_recent,lastelection,irregular,yearcode,coupyear,coupsuc,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),Population ages 0-14 (% of total population),Population growth (annual %),Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Party-Personal,Presidential Democracy,Provisional - Civilian
75,2.0,USA,Obama,2016.0,1.0,1,Presidential Democracy,0.0,0.0,0.0,0.0,3.663562,7.909489,22016.0,False,False,81.1,1.567215,0.075769,0.09245,26.514001,19.023342,0.716669,0,0,0,0,0,0,0,0,1,0


In [9]:
USA = df[df['ccode'] == 2.0]

In [18]:
USA

Unnamed: 0,ccode,country,leader,year,month,elected,age,male,militarycareer,tenure_months,government,anticipation,ref_ant,leg_ant,exec_ant,irreg_lead_ant,election_now,election_recent,leg_recent,exec_recent,lead_recent,ref_recent,direct_recent,indirect_recent,victory_recent,defeat_recent,change_recent,nochange_recent,delayed,lastelection,loss,irregular,prev_conflict,pt_suc,pt_attempt,precip,couprisk,pctile_risk
0,2.0,USA,Truman,1950.0,1.0,1.0,66.0,1,0.0,58.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.639057,5.327876,7.565793,0.0,0.0,0.0,-0.069058,,
1,2.0,USA,Truman,1950.0,2.0,1.0,66.0,1,0.0,59.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.70805,5.332719,7.566311,0.0,0.0,0.0,-0.113721,,
2,2.0,USA,Truman,1950.0,3.0,1.0,66.0,1,0.0,60.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.772589,5.337538,7.566829,0.0,0.0,0.0,-0.108042,,
3,2.0,USA,Truman,1950.0,4.0,1.0,66.0,1,0.0,61.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.833213,5.342334,7.567346,0.0,0.0,0.0,-0.0416,,
4,2.0,USA,Truman,1950.0,5.0,1.0,66.0,1,0.0,62.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.890372,5.347107,7.567863,0.0,0.0,0.0,-0.123601,,
5,2.0,USA,Truman,1950.0,6.0,1.0,66.0,1,0.0,63.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.944439,5.351858,7.568379,0.0,0.0,0.0,-0.178496,,
6,2.0,USA,Truman,1950.0,7.0,1.0,66.0,1,0.0,64.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.995732,5.356586,7.568896,0.0,0.0,0.0,-0.04266,,
7,2.0,USA,Truman,1950.0,8.0,1.0,66.0,1,0.0,65.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.044522,5.361292,7.569412,0.0,0.0,0.0,-0.07059,,
8,2.0,USA,Truman,1950.0,9.0,1.0,66.0,1,0.0,66.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.091042,5.365976,7.569928,0.0,0.0,0.0,0.035557,,
9,2.0,USA,Truman,1950.0,10.0,1.0,66.0,1,0.0,67.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.135494,5.370638,7.570443,0.0,0.0,0.0,-0.138818,,


In [11]:
df['pctile_risk'].max()

1.0487376

In [25]:
june2020 = USA[(USA['year'] == 2020) & (USA['month'] == 6)]

In [26]:
june2020

Unnamed: 0,ccode,country,leader,year,month,elected,age,male,militarycareer,tenure_months,government,anticipation,ref_ant,leg_ant,exec_ant,irreg_lead_ant,election_now,election_recent,leg_recent,exec_recent,lead_recent,ref_recent,direct_recent,indirect_recent,victory_recent,defeat_recent,change_recent,nochange_recent,delayed,lastelection,loss,irregular,prev_conflict,pt_suc,pt_attempt,precip,couprisk,pctile_risk
857,2.0,USA,Trump,2020.0,6.0,1.0,74.0,1,0.0,42.0,Presidential Democracy,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.78419,3.78419,7.928766,2.0,0.0,0.0,0.390137,0.001107,0.602049


In [28]:
june2020.to_pickle('../data/usajune2020')

In [31]:
wdi_df = pd.read_pickle('../data/wdi_complete.pkl')
dummies = pd.get_dummies(df['government'])
df_dumb = june2020.join(dummies)
df_dumb['pt_attempt']
df_dumb['pt_suc']
df = df_dumb.drop(['ccode', 'country', 'leader', 'month', 'government'], axis = 1)

In [32]:
df

Unnamed: 0,year,elected,age,male,militarycareer,tenure_months,anticipation,ref_ant,leg_ant,exec_ant,irreg_lead_ant,election_now,election_recent,leg_recent,exec_recent,lead_recent,ref_recent,direct_recent,indirect_recent,victory_recent,defeat_recent,change_recent,nochange_recent,delayed,lastelection,loss,irregular,prev_conflict,pt_suc,pt_attempt,precip,couprisk,pctile_risk,Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Parliamentary Democracy,Party-Military,Party-Personal,Party-Personal-Military Hybrid,Personal Dictatorship,Presidential Democracy,Provisional - Civilian,Provisional - Military,Warlordism
857,2020.0,1.0,74.0,1,0.0,42.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.78419,3.78419,7.928766,2.0,0.0,0.0,0.390137,0.001107,0.602049,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [33]:
def add_wd_rows(reign_df, wdi_df, variable_list):
    joint_df = reign_df.copy()
    yearlist = [str(i) for i in np.arange(1960, 2020)]
    for i in variable_list:
        df = wdi_df[wdi_df['Indicator Name'] == i]
        dfx = pd.melt(df, id_vars = ['Country Name'], value_vars=yearlist)
        dfx['ccode'] = dfx['Country Name'].apply(get_cc)
        dfx['year'] = dfx['variable'].apply(get_year)
        dfx['yearcode'] = (dfx['year']) + 10000*dfx['ccode']
        dfx[i] = dfx['value']
        dfx_limited = dfx[[i, 'yearcode']]
        joint_df = joint_df.join(dfx_limited.set_index('yearcode'), on='yearcode', how = 'inner')
    return joint_df

In [67]:
US_2016 = joint_df_x[(joint_df_x['ccode'] ==2) & (joint_df_x['year'] ==2016)]

In [69]:
US_2017 = joint_df_x[(joint_df_x['ccode'] ==2) & (joint_df_x['year'] ==2017)]

In [72]:
US_2018 = joint_df_x[(joint_df_x['ccode'] ==2) & (joint_df_x['year'] ==2018)]

In [68]:
US_2016

Unnamed: 0,ccode,country,leader,year,month,male,government,irreg_lead_ant,election_recent,victory_recent,nochange_recent,lastelection,irregular,yearcode,coupyear,coupsuc,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),Population ages 0-14 (% of total population),Population growth (annual %),Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Party-Personal,Presidential Democracy,Provisional - Civilian
75,2.0,USA,Obama,2016.0,1.0,1,Presidential Democracy,0.0,0.0,0.0,0.0,3.663562,7.909489,22016.0,False,False,81.1,1.567215,0.075769,0.09245,26.514001,19.023342,0.716669,0,0,0,0,0,0,0,0,1,0


In [70]:
US_2017

Unnamed: 0,ccode,country,leader,year,month,male,government,irreg_lead_ant,election_recent,victory_recent,nochange_recent,lastelection,irregular,yearcode,coupyear,coupsuc,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),Population ages 0-14 (% of total population),Population growth (annual %),Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Party-Personal,Presidential Democracy,Provisional - Civilian
76,2.0,USA,Obama,2017.0,1.0,1,Presidential Democracy,0.0,1.0,0.0,0.0,1.098612,7.913887,22017.0,False,False,81.1,2.21701,0.08085,0.177276,27.14232,18.858528,0.631008,0,0,0,0,0,0,0,0,1,0
77,2.0,USA,Trump,2017.0,1.0,1,Presidential Democracy,0.0,1.0,0.0,0.0,1.098612,7.913887,22017.0,False,False,81.1,2.21701,0.08085,0.177276,27.14232,18.858528,0.631008,0,0,0,0,0,0,0,0,1,0


In [73]:
US_2018

Unnamed: 0,ccode,country,leader,year,month,male,government,irreg_lead_ant,election_recent,victory_recent,nochange_recent,lastelection,irregular,yearcode,coupyear,coupsuc,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),Population ages 0-14 (% of total population),Population growth (annual %),Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Party-Personal,Presidential Democracy,Provisional - Civilian
78,2.0,USA,Trump,2018.0,1.0,1,Presidential Democracy,0.0,0.0,0.0,0.0,2.70805,7.918265,22018.0,False,False,81.1,2.927323,,,27.543903,18.70904,0.522337,0,0,0,0,0,0,0,0,1,0


In [75]:
june2020.columns

Index(['ccode', 'country', 'leader', 'year', 'month', 'elected', 'age', 'male',
       'militarycareer', 'tenure_months', 'government', 'anticipation',
       'ref_ant', 'leg_ant', 'exec_ant', 'irreg_lead_ant', 'election_now',
       'election_recent', 'leg_recent', 'exec_recent', 'lead_recent',
       'ref_recent', 'direct_recent', 'indirect_recent', 'victory_recent',
       'defeat_recent', 'change_recent', 'nochange_recent', 'delayed',
       'lastelection', 'loss', 'irregular', 'prev_conflict', 'pt_suc',
       'pt_attempt', 'precip', 'couprisk', 'pctile_risk'],
      dtype='object')

In [74]:
['year', 'male', 'irreg_lead_ant', 'election_recent', 'victory_recent',
       'nochange_recent', 'lastelection', 'irregular', 'yearcode',
       'Dominant Party', 'Foreign/Occupied', 'Indirect Military', 'Military',
       'Military-Personal', 'Monarchy', 'Oligarchy', 'Party-Personal',
       'Presidential Democracy', 'Provisional - Civilian', 'pt_attempt',
       'pt_suc', 'Life expectancy at birth, female (years)',
       'GDP growth (annual %)', 'Mineral rents (% of GDP)',
       'Oil rents (% of GDP)', 'Trade (% of GDP)',
       'Population ages 0-14 (% of total population)',
       'Population growth (annual %)', 'constant']

['year',
 'male',
 'irreg_lead_ant',
 'election_recent',
 'victory_recent',
 'nochange_recent',
 'lastelection',
 'irregular',
 'yearcode',
 'Dominant Party',
 'Foreign/Occupied',
 'Indirect Military',
 'Military',
 'Military-Personal',
 'Monarchy',
 'Oligarchy',
 'Party-Personal',
 'Presidential Democracy',
 'Provisional - Civilian',
 'pt_attempt',
 'pt_suc',
 'Life expectancy at birth, female (years)',
 'GDP growth (annual %)',
 'Mineral rents (% of GDP)',
 'Oil rents (% of GDP)',
 'Trade (% of GDP)',
 'Population ages 0-14 (% of total population)',
 'Population growth (annual %)',
 'constant']

In [81]:
us_j2020d = june2020.drop(['ccode', 'country', 'leader','elected', 'age', 'male',
       'militarycareer', 'tenure_months', 'government', 'anticipation',
       'ref_ant', 'leg_ant', 'exec_ant','irreg_lead_ant', 'election_now',
       'election_recent', 'leg_recent','leg_recent', 'exec_recent', 'lead_recent',
       'ref_recent', 'direct_recent', 'indirect_recent','couprisk', 'pctile_risk', 'precip'], axis =1)

In [85]:
US_2016

Unnamed: 0,ccode,country,leader,year,month,male,government,irreg_lead_ant,election_recent,victory_recent,nochange_recent,lastelection,irregular,yearcode,coupyear,coupsuc,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),Population ages 0-14 (% of total population),Population growth (annual %),Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Party-Personal,Presidential Democracy,Provisional - Civilian
75,2.0,USA,Obama,2016.0,1.0,1,Presidential Democracy,0.0,0.0,0.0,0.0,3.663562,7.909489,22016.0,False,False,81.1,1.567215,0.075769,0.09245,26.514001,19.023342,0.716669,0,0,0,0,0,0,0,0,1,0


In [78]:
US_2018

Unnamed: 0,ccode,country,leader,year,month,male,government,irreg_lead_ant,election_recent,victory_recent,nochange_recent,lastelection,irregular,yearcode,coupyear,coupsuc,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),Population ages 0-14 (% of total population),Population growth (annual %),Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Party-Personal,Presidential Democracy,Provisional - Civilian
78,2.0,USA,Trump,2018.0,1.0,1,Presidential Democracy,0.0,0.0,0.0,0.0,2.70805,7.918265,22018.0,False,False,81.1,2.927323,,,27.543903,18.70904,0.522337,0,0,0,0,0,0,0,0,1,0


In [82]:
us_j2020d

Unnamed: 0,year,month,victory_recent,defeat_recent,change_recent,nochange_recent,delayed,lastelection,loss,irregular,prev_conflict,pt_suc,pt_attempt
857,2020.0,6.0,0.0,0.0,0.0,0.0,0.0,3.78419,3.78419,7.928766,2.0,0.0,0.0


In [83]:
us_j2020d['Life expectancy at birth, female (years)'] = 81.1

In [84]:
us_j2020d['GDP growth (annual %)'] = -5.0

In [86]:
us_j2020d['Mineral rents (% of GDP)'] = 0.075769

In [87]:
us_j2020d['Oil rents (% of GDP)'] = 0.09245

In [88]:
us_j2020d['Trade (% of GDP)'] = 27.543903

In [89]:
us_j2020d['Population ages 0-14 (% of total population)'] = 18.70904

In [90]:
us_j2020d['Population growth (annual %)'] = 0.522337

In [91]:
us_j2020d['Dominant Party'] = 0

In [92]:
us_j2020d['Foreign/Occupied'] = 0

In [93]:
us_j2020d['Indirect Military'] = 0

In [98]:
us_j2020d['Monarchy'] = 0

In [95]:
us_j2020d['Military-Personal'] = 0

In [103]:
us_j2020d['Provisional - Civilian'] = 0

In [102]:
us_j2020d['Presidential Democracy'] = 1

In [109]:
us_j2020d = us_j2020d.drop('month', axis =1)

In [107]:
us_j2020d['Constant'] = 1

In [111]:
us_j2020d.to_pickle('../data/updatedus2020.pkl')

In [None]:
['year', 'male', 'irreg_lead_ant', 'election_recent', 'victory_recent',
       'nochange_recent', 'lastelection', 'irregular', 'yearcode',
       'Dominant Party', 'Foreign/Occupied', 'Indirect Military', 'Military',
       'Military-Personal', 'Monarchy', 'Oligarchy', 'Party-Personal',
       'Presidential Democracy', 'Provisional - Civilian',
       'Life expectancy at birth, female (years)', 'GDP growth (annual %)',
       'Mineral rents (% of GDP)', 'Oil rents (% of GDP)', 'Trade (% of GDP)',
       'Population ages 0-14 (% of total population)',
       'Population growth (annual %)', 'constant']

In [112]:
us_j2020d 

Unnamed: 0,year,victory_recent,defeat_recent,change_recent,nochange_recent,delayed,lastelection,loss,irregular,prev_conflict,pt_suc,pt_attempt,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),Population ages 0-14 (% of total population),Population growth (annual %),Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Party-Personal,Presidential Democracy,Provisional - Civilian,Constant
857,2020.0,0.0,0.0,0.0,0.0,0.0,3.78419,3.78419,7.928766,2.0,0.0,0.0,81.1,-5.0,0.075769,0.09245,27.543903,18.70904,0.522337,0,0,0,0,0,0,0,0,1,0,1
