In [1]:
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn import metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, r2_score
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from statsmodels.regression.linear_model import OLS
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.inspection import plot_partial_dependence
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

In [3]:
def upsampler(X_train, y_train, target = 'pt_attempt'):
    '''
    Args: X_train and y_train
    Optional: what is the target
    Returns: y_train, and X_train with the target rows sampled with replacement to equal 
    the number of non-target rows (makes X_train much bigger)
    '''
    y_train = pd.Series(y_train)
    
    X = pd.concat([X_train, y_train], axis=1) 
    no_coup = X[X[target]==0]
    coup = X[X[target]==1]
    coups_upsampled = resample(coup,
                          replace=True, # sample with replacement
                          n_samples=len(no_coup), # match number in majority class
                          random_state=29)
    upsampled = pd.concat([no_coup, coups_upsampled])
    y_up = upsampled[target]
    X_up = upsampled.drop(target, axis = 1)
    return X_up, y_up

def metric_test(model, X_test, y_test):
    '''
    Prints out the accuracy, recall, precision, and f1 score for the 
    fit model when it predicts on the test data
    '''
    preds = model.predict(X_test)
    print('accuracy = ' + str(accuracy_score(y_test, preds)))
    print('recall = ' + str(recall_score(y_test, preds)))
    print('precision = ' + str(precision_score(y_test, preds)))
    print('f1 score = ' + str(f1_score(y_test, preds)))
    
def get_feature_weights(model, feature_labels):
    '''
    returns coefficients for features in a model (intended for logistic regression) 
    args: model, feature_labels
    returns: a sorted series in ascending order of feature weights.
    '''
    d_log_vals = {}
    for idx, feat in enumerate(model.coef_[0]):
        d_log_vals[feature_labels[idx]] = feat  
    s_log_vals = (pd.Series(d_log_vals)).sort_values()
    return s_log_vals

In [8]:
def prep_for_pipeline(reign_df):
    dummies = pd.get_dummies(reign_df['government'])
    df_dumb = reign_df.join(dummies)
    df_dumb['pt_attempt'] = df_dumb['coupyear']
    df_dumb['pt_suc'] = df_dumb['coupsuc']
    df = df_dumb.drop(['ccode', 'country', 'leader', 'month', 'government', 'coupyear', 'coupsuc', 'yearcode'], axis = 1)
    return df

In [9]:
no_na_df_ready = prep_for_pipeline(no_na_df)

In [10]:
no_na_df_ready

Unnamed: 0,year,elected,age,male,militarycareer,tenure_months,anticipation,ref_ant,leg_ant,exec_ant,irreg_lead_ant,election_now,election_recent,leg_recent,exec_recent,lead_recent,ref_recent,direct_recent,indirect_recent,victory_recent,defeat_recent,change_recent,nochange_recent,delayed,lastelection,loss,irregular,prev_conflict,precip,risk,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),Rural population (% of total population),Dominant Party,Foreign/Occupied,Indirect Military,Military,Military-Personal,Monarchy,Oligarchy,Parliamentary Democracy,Party-Military,Party-Personal,Party-Personal-Military Hybrid,Personal Dictatorship,Presidential Democracy,Provisional - Civilian,Provisional - Military,Warlordism,pt_attempt,pt_suc
28,1975.0,0.0,62.0,1,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.295837,4.317488,7.710205,0.0,-0.290521,0.364317,76.600,-0.205464,0.191036,1.384686,15.516374,26.347,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,False,False
29,1976.0,0.0,63.0,1,0.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.663562,4.465908,7.715570,0.0,0.216198,0.371216,76.800,5.388139,0.116460,1.364022,16.048846,26.337,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,False,False
30,1977.0,0.0,64.0,1,0.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.098612,1.098612,7.720905,0.0,-0.830674,0.331233,77.200,4.624159,0.152244,1.440894,16.417895,26.327,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,False,False
31,1977.0,1.0,53.0,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.098612,1.098612,7.720905,0.0,-0.830674,0.311463,77.200,4.624159,0.152244,1.440894,16.417895,26.327,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,False,False
32,1978.0,1.0,54.0,1,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.708050,2.708050,7.726213,0.0,0.267359,0.377282,77.300,5.535303,0.064704,1.392541,16.972834,26.318,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11117,2013.0,0.0,61.0,1,0.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.240276,6.240276,6.240276,0.0,0.277648,0.235936,72.282,-3.120604,0.000000,0.000000,82.932608,76.618,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,False,False
11118,2014.0,0.0,62.0,1,0.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.263398,6.263398,6.263398,0.0,0.322612,0.253491,72.363,2.072607,0.000000,0.000000,74.240186,76.672,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,False,False
11119,2015.0,0.0,74.0,1,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.285998,6.285998,6.285998,0.0,-0.143210,0.353019,72.449,3.711847,0.000000,0.000000,81.744735,76.725,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,False,False
11120,2016.0,0.0,75.0,1,0.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.308098,6.308098,6.308098,0.0,-0.391420,0.347084,72.544,3.379474,0.000000,0.000000,94.783586,76.778,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,False,False


In [11]:
clf = RandomForestClassifier(max_depth=5, n_estimators=1000)

In [12]:
rfpipe = Pipeline([('scaler', StandardScaler()),('rf', clf)])

In [14]:
y = no_na_df_ready['pt_attempt']
X = no_na_df_ready.drop(['pt_attempt','pt_suc'], axis = 1)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .25, random_state= 40, stratify = y)

In [16]:
X_up, y_up = upsampler(X_train, y_train)

In [17]:
rfpipe.fit(X_up, y_up)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('rf',
                 RandomForestClassifier(bootstrap=True, class_weight=None,
                                        criterion='gini', max_depth=5,
                                        max_features='auto',
                                        max_leaf_nodes=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=1000, n_jobs=None,
                                        oob_score=False, random_state=None,
                                        verbose=0, warm_start=False))],
         verbose=False)

In [19]:
metric_test(rfpipe, X_train, y_train)

accuracy = 0.8319050758075148
recall = 0.9606299212598425
precision = 0.1383219954648526
f1 score = 0.24182358771060455


In [18]:
metric_test(rfpipe, X_test, y_test)

accuracy = 0.8279499011206328
recall = 0.6666666666666666
precision = 0.10181818181818182
f1 score = 0.17665615141955834


In [32]:
lasso = LogisticRegressionCV(
        cv=5, dual=False,
        penalty='l1', 
        scoring='recall',
        solver='saga', 
        n_jobs = 2,
        tol=0.0001,
        max_iter=1000,)

In [33]:
e_l_pipe = Pipeline([('scaler', StandardScaler()),('el', lasso)])

In [34]:
e_l_pipe.fit(X_up, y_up)



Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('el',
                 LogisticRegressionCV(Cs=10, class_weight=None, cv=5,
                                      dual=False, fit_intercept=True,
                                      intercept_scaling=1.0, l1_ratios=None,
                                      max_iter=1000, multi_class='warn',
                                      n_jobs=2, penalty='l1', random_state=None,
                                      refit=True, scoring='recall',
                                      solver='saga', tol=0.0001, verbose=0))],
         verbose=False)

In [35]:
metric_test(e_l_pipe, X_test, y_test)

accuracy = 0.7864205669083718
recall = 0.6428571428571429
precision = 0.08035714285714286
f1 score = 0.14285714285714285


In [36]:
get_feature_weights(lasso, X.columns)

GDP growth (annual %)                      -0.915417
leg_recent                                 -0.804036
Party-Personal-Military Hybrid             -0.783097
election_recent                            -0.662122
Foreign/Occupied                           -0.581966
Monarchy                                   -0.539572
year                                       -0.526401
indirect_recent                            -0.442937
Oligarchy                                  -0.411716
irregular                                  -0.396590
leg_ant                                    -0.312316
Party-Personal                             -0.283001
Trade (% of GDP)                           -0.252938
Warlordism                                 -0.221285
male                                       -0.199229
Life expectancy at birth, female (years)   -0.141908
exec_ant                                   -0.132818
nochange_recent                            -0.121702
precip                                     -0.

In [37]:

zero_fill_df_ready = prep_for_pipeline(zero_fill_df)

In [38]:
y = zero_fill_df_ready['pt_attempt']
X = zero_fill_df_ready.drop(['pt_attempt','pt_suc'], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .25, random_state= 40, stratify = y)
X_up, y_up = upsampler(X_train, y_train)

In [39]:
rfpipe = Pipeline([('scaler', StandardScaler()),('rf', clf)])
rfpipe.fit(X_up, y_up)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('rf',
                 RandomForestClassifier(bootstrap=True, class_weight=None,
                                        criterion='gini', max_depth=5,
                                        max_features='auto',
                                        max_leaf_nodes=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=1000, n_jobs=None,
                                        oob_score=False, random_state=None,
                                        verbose=0, warm_start=False))],
         verbose=False)

In [40]:
metric_test(rfpipe, X_test, y_test)

accuracy = 0.8010939830929885
recall = 0.5714285714285714
precision = 0.0784313725490196
f1 score = 0.13793103448275862


In [41]:
lasso = LogisticRegressionCV(
        cv=5, dual=False,
        penalty='l1', 
        scoring='recall',
        solver='saga', 
        n_jobs = 2,
        tol=0.0001,
        max_iter=1000,)

e_l_pipe = Pipeline([('scaler', StandardScaler()),('el', lasso)])

e_l_pipe.fit(X_up, y_up)

metric_test(e_l_pipe, X_test, y_test)



accuracy = 0.7802088513177524
recall = 0.6607142857142857
precision = 0.08043478260869565
f1 score = 0.1434108527131783


In [42]:
get_feature_weights(lasso, X.columns)

year                                       -0.529019
GDP growth (annual %)                      -0.432046
Foreign/Occupied                           -0.402644
Monarchy                                   -0.374214
Trade (% of GDP)                           -0.335679
election_recent                            -0.281104
indirect_recent                            -0.252245
victory_recent                             -0.233445
Oligarchy                                  -0.224486
male                                       -0.205669
leg_ant                                    -0.163296
Life expectancy at birth, female (years)   -0.147792
irregular                                  -0.129507
militarycareer                             -0.107997
precip                                     -0.065849
Parliamentary Democracy                    -0.059299
Party-Personal                             -0.053660
Presidential Democracy                     -0.047902
Oil rents (% of GDP)                       -0.

In [46]:
no_na_df_ready_bare = no_na_df_ready.drop('risk', axis =1)

In [47]:
no_na_df_ready_bare
y = no_na_df_ready_bare['pt_attempt']
X = no_na_df_ready_bare.drop(['pt_attempt','pt_suc'], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .25, random_state= 40, stratify = y)
X_up, y_up = upsampler(X_train, y_train)

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .25, random_state= 40, stratify = y)
X_up, y_up = upsampler(X_train, y_train)

In [49]:
rfpipe.fit(X_up, y_up)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('rf',
                 RandomForestClassifier(bootstrap=True, class_weight=None,
                                        criterion='gini', max_depth=5,
                                        max_features='auto',
                                        max_leaf_nodes=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=1000, n_jobs=None,
                                        oob_score=False, random_state=None,
                                        verbose=0, warm_start=False))],
         verbose=False)

In [50]:
metric_test(rfpipe, X_test, y_test)

accuracy = 0.8259723137771918
recall = 0.6428571428571429
precision = 0.09782608695652174
f1 score = 0.169811320754717


In [51]:
lasso = LogisticRegressionCV(
        cv=5, dual=False,
        penalty='l1', 
        scoring='recall',
        solver='saga', 
        n_jobs = 2,
        tol=0.0001,
        max_iter=1000,)

e_l_pipe = Pipeline([('scaler', StandardScaler()),('el', lasso)])

e_l_pipe.fit(X_up, y_up)

metric_test(e_l_pipe, X_test, y_test)



accuracy = 0.7982860909690178
recall = 0.6904761904761905
precision = 0.09006211180124224
f1 score = 0.15934065934065933


In [52]:
get_feature_weights(lasso, X.columns)

GDP growth (annual %)                      -0.777050
irregular                                  -0.725310
Monarchy                                   -0.436633
year                                       -0.434318
Party-Personal-Military Hybrid             -0.372051
leg_recent                                 -0.363413
election_recent                            -0.334976
Trade (% of GDP)                           -0.275109
Party-Personal                             -0.222480
Life expectancy at birth, female (years)   -0.205013
Foreign/Occupied                           -0.187896
Oligarchy                                  -0.182930
Dominant Party                             -0.174464
male                                       -0.167501
indirect_recent                            -0.149061
leg_ant                                    -0.136158
Warlordism                                 -0.091412
precip                                     -0.091344
Parliamentary Democracy                    -0.

In [55]:
df_expanded = pd.read_pickle('../data/pickles/late_joined_expanded_no_na.pkl')

In [56]:
df_expanded

Unnamed: 0,ccode,country,leader,year,month,elected,age,male,militarycareer,tenure_months,government,anticipation,ref_ant,leg_ant,exec_ant,irreg_lead_ant,election_now,election_recent,leg_recent,exec_recent,lead_recent,ref_recent,direct_recent,indirect_recent,victory_recent,defeat_recent,change_recent,nochange_recent,delayed,lastelection,loss,irregular,prev_conflict,precip,yearcode,coupyear,coupsuc,risk,"Life expectancy at birth, female (years)",GDP growth (annual %),Mineral rents (% of GDP),Oil rents (% of GDP),Trade (% of GDP),"Foreign direct investment, net inflows (% of GDP)",Natural gas rents (% of GDP),Population ages 0-14 (% of total population),Rural population (% of total population),Population growth (annual %)
28,2.0,USA,Ford,1975.0,1.0,0.0,62.0,1,0.0,6.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.295837,4.317488,7.710205,0.0,-0.290521,21975.0,False,False,0.364317,76.600,-0.205464,0.191036,1.384686,15.516374,0.137100,0.843346,25.005105,26.347,0.985986
29,2.0,USA,Ford,1976.0,1.0,0.0,63.0,1,0.0,18.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.663562,4.465908,7.715570,0.0,0.216198,21976.0,False,False,0.371216,76.800,5.388139,0.116460,1.364022,16.048846,0.155332,0.823074,24.546818,26.337,0.950220
30,2.0,USA,Ford,1977.0,1.0,0.0,64.0,1,0.0,30.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.098612,1.098612,7.720905,0.0,-0.830674,21977.0,False,False,0.331233,77.200,4.624159,0.152244,1.440894,16.417895,0.140261,0.809130,24.050240,26.327,1.005772
31,2.0,USA,Carter,1977.0,1.0,1.0,53.0,1,0.0,1.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.098612,1.098612,7.720905,0.0,-0.830674,21977.0,False,False,0.311463,77.200,4.624159,0.152244,1.440894,16.417895,0.140261,0.809130,24.050240,26.327,1.005772
32,2.0,USA,Carter,1978.0,1.0,1.0,54.0,1,0.0,13.0,Presidential Democracy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.708050,2.708050,7.726213,0.0,0.267359,21978.0,False,False,0.377282,77.300,5.535303,0.064704,1.392541,16.972834,0.233883,0.708539,23.545173,26.318,1.059573
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11117,955.0,Tonga,Tu'ivakano,2013.0,1.0,0.0,61.0,1,0.0,26.0,Monarchy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.240276,6.240276,6.240276,0.0,0.277648,9552013.0,False,False,0.235936,72.282,-3.120604,0.000000,0.000000,82.932608,1.391103,0.000000,36.850483,76.618,-0.947661
11118,955.0,Tonga,Tu'ivakano,2014.0,1.0,0.0,62.0,1,0.0,38.0,Monarchy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.263398,6.263398,6.263398,0.0,0.322612,9552014.0,False,False,0.253491,72.363,2.072607,0.000000,0.000000,74.240186,2.982906,0.000000,36.635388,76.672,-0.729801
11119,955.0,Tonga,Pohiva,2015.0,1.0,0.0,74.0,1,0.0,2.0,Monarchy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.285998,6.285998,6.285998,0.0,-0.143210,9552015.0,False,False,0.353019,72.449,3.711847,0.000000,0.000000,81.744735,2.881435,0.000000,36.406664,76.725,-0.244786
11120,955.0,Tonga,Pohiva,2016.0,1.0,0.0,75.0,1,0.0,14.0,Monarchy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.308098,6.308098,6.308098,0.0,-0.391420,9552016.0,False,False,0.347084,72.544,3.379474,0.000000,0.000000,94.783586,1.427198,0.000000,36.130640,76.778,0.348664


In [57]:
df_expanded_ready = prep_for_pipeline(df_expanded)

In [58]:
no_na_df_ready_bare
y = df_expanded_ready['pt_attempt']
X = df_expanded_ready.drop(['pt_attempt','pt_suc'], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .25, random_state= 40, stratify = y)
X_up, y_up = upsampler(X_train, y_train)

In [59]:
e_l_pipe.fit(X_up, y_up)
rfpipe.fit(X_up, y_up)



Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('rf',
                 RandomForestClassifier(bootstrap=True, class_weight=None,
                                        criterion='gini', max_depth=5,
                                        max_features='auto',
                                        max_leaf_nodes=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=1000, n_jobs=None,
                                        oob_score=False, random_state=None,
                                        verbose=0, warm_start=False))],
         verbose=False)

In [60]:
metric_test(e_l_pipe, X_test, y_test)


accuracy = 0.7832880434782609
recall = 0.6428571428571429
precision = 0.08157099697885196
f1 score = 0.1447721179624665


In [61]:
metric_test(rfpipe, X_test, y_test)

accuracy = 0.828125
recall = 0.6904761904761905
precision = 0.10780669144981413
f1 score = 0.1864951768488746


In [None]:
accuracy = 0.7982860909690178
recall = 0.6904761904761905
precision = 0.09006211180124224
f1 score = 0.15934065934065933


In [None]:
accuracy = 0.8017241379310345
recall = 0.7380952380952381
precision = 0.09717868338557993
f1 score = 0.17174515235457063

In [62]:
get_feature_weights(lasso, X.columns)

election_recent                                     -0.856256
leg_recent                                          -0.637179
Monarchy                                            -0.600438
GDP growth (annual %)                               -0.599766
Foreign/Occupied                                    -0.541078
Life expectancy at birth, female (years)            -0.400319
Foreign direct investment, net inflows (% of GDP)   -0.358235
ref_recent                                          -0.346892
year                                                -0.301419
Oligarchy                                           -0.300346
Provisional - Military                              -0.269146
indirect_recent                                     -0.266642
lead_recent                                         -0.234602
Oil rents (% of GDP)                                -0.222506
irregular                                           -0.210719
Party-Personal                                      -0.205531
Indirect

In [None]:
accuracy = 0.7982860909690178
recall = 0.6904761904761905
precision = 0.09006211180124224
f1 score = 0.15934065934065933
