In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import eli5
from eli5.sklearn import PermutationImportance
import pickle

In [2]:
def loadPickle(name):
    return pickle.load(open('pickles/{}.pickle'.format(name),'rb'))


def reWriteTicks(ticks):
    for i in range(len(ticks)):
        tick = ticks[i].split('_')
        if len(tick)==2:
            supportType = tick[0]
            if supportType == 'ds':
                ticks[i] = tick[1].capitalize()+' ({})'.format('Df')
            elif supportType =='s':
                ticks[i] = tick[1].capitalize()+' ({})'.format('A')
        elif ticks[i]=='domsup':
            ticks[i]='Domestic'
    return ticks


new_headers = loadPickle('new_headers')

In [3]:
fname='regression_v.csv'
headers=open(fname,'r').readline().strip().split(',')
n=len(headers)

df = pd.read_csv(fname,low_memory=False,usecols=[i for i in range(0,n)],header=None)
df = df.replace('',np.nan, regex=True)

df.dropna(how='any', inplace=True)

In [4]:
headers

['0.8666666666666666',
 '1.0',
 '0.8666666666666666',
 '0.8666666666666666',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '0.13333333333333333',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '0.13333333333333333',
 '0.13333333333333333',
 '1.0',
 '1.0',
 '0.29411764705882354',
 '0.47058823529411764',
 '0.2222222222222222',
 '0.2777777777777778',
 '1',
 '9']

In [5]:
X = df.drop(23, axis=1)
X=(X-X.min())/(X.max()-X.min())


y = df[23]
maxYear = y.max()
minYear = y.min()
y=(y-y.min())/(y.max()-y.min())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [6]:
reg = linear_model.BayesianRidge()
reg.fit(X, y)

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [15]:
perm = PermutationImportance(reg).fit(X, y)

In [17]:
df.columns.tolist()[0:23]

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22]

In [44]:
new_headers = reWriteTicks(new_headers[0:23])
eli5.explain_weights(perm,feature_names=new_headers)

Weight,Feature
0.4928  ± 0.1043,Year from BRD
0.0609  ± 0.0193,Traincamp (Df)
0.0568  ± 0.0176,Objective (De-facto Support)
0.0548  ± 0.0130,Training (Df)
0.0236  ± 0.0125,Safemem (A)
0.0190  ± 0.0070,Safelead (A)
0.0145  ± 0.0060,Domestic
0.0122  ± 0.0121,Finaid (Df)
0.0120  ± 0.0107,Finaid (A)
0.0099  ± 0.0086,Troop (A)


In [45]:
reg

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [43]:
new_headers

['Safemem (A)',
 'Safelead (A)',
 'Headquar (A)',
 'Traincamp (A)',
 'Training (A)',
 'Weaponlog (A)',
 'Finaid (A)',
 'Transport (A)',
 'Troop (A)',
 'Safemem (Df)',
 'Safelead (Df)',
 'Headquar (Df)',
 'Traincamp (Df)',
 'Training (Df)',
 'Weaponlog (Df)',
 'Finaid (Df)',
 'Transport (Df)',
 'Domestic',
 'Ideology (Active Support)',
 'Ideology (De-facto Support)',
 'Objective (Active Support)',
 'Objective (De-facto Support)',
 'Year from BRD']

In [139]:
y_predict = reg.predict(X)

In [142]:
mse = mean_squared_error(y,y_predict)
var_error = explained_variance_score(y,y_predict)

print(mse,var_error)

0.0208892266061 0.531993726023


In [125]:
def predictThrivingYears(x,y,z,a,b,c): 
    activeSupportDeprivation = x
    defactoSupportDeprivation = y
    domesticSupportDeprivation = z
    Support_Ideology = a
    Support_Objective = b
    BRD_After_Years = (c - minYear)/(maxYear-minYear)


    temp = [activeSupportDeprivation]*9 + [defactoSupportDeprivation]*8 + [domesticSupportDeprivation]
    temp = temp + [Support_Ideology]*2 + [Support_Objective]*2 +[BRD_After_Years]
    
    year = reg.predict([temp])*(maxYear-minYear)+minYear
    year = year[0] if year[0]>=0 else 0
    
    print("Predicted thriving years: ",year)

In [126]:
# Test-cases - A

x = 1 # Active Support Deprivation
y = 1 # DeFacto Support Deprivation
z = 1 # Domestic Support Deprivation
a = 0.5 # Probability for receving support based upon ideology
b = 0.5 # Probability for receiving support based upon objective
c = 4 # Years after 25 BRD took place

predictThrivingYears(x,y,z,a,b,c)

Predicted thriving years:  8.51959767909


In [127]:
# Test-cases - B

x = 0 # Active Support Deprivation
y = 0 # DeFacto Support Deprivation
z = 0 # Domestic Support Deprivation
a = 0.5 # Probability for receving support based upon ideology
b = 0.5 # Probability for receiving support based upon objective
c = 4 # Years after 25 BRD took place

predictThrivingYears(x,y,z,a,b,c)

Predicted thriving years:  16.7682170492


In [128]:
# Test-cases - C

x = 1 # Active Support Deprivation
y = 1 # DeFacto Support Deprivation
z = 1 # Domestic Support Deprivation
a = 0 # Probability for receving support based upon ideology
b = 0 # Probability for receiving support based upon objective
c = 4 # Years after 25 BRD took place

predictThrivingYears(x,y,z,a,b,c)

Predicted thriving years:  0.333873342085


In [129]:
# Test-cases - D

x = 1 # Active Support Deprivation
y = 1 # DeFacto Support Deprivation
z = 1 # Domestic Support Deprivation
a = 0 # Probability for receving support based upon ideology
b = 0 # Probability for receiving support based upon objective
c = 0 # Years after 25 BRD took place

predictThrivingYears(x,y,z,a,b,c)

Predicted thriving years:  0


In [131]:
# Test-cases - E

x = 0 # Active Support Deprivation
y = 0 # DeFacto Support Deprivation
z = 0 # Domestic Support Deprivation
a = 0 # Probability for receving support based upon ideology
b = 0 # Probability for receiving support based upon objective
c = 0 # Years after 25 BRD took place

predictThrivingYears(x,y,z,a,b,c)

Predicted thriving years:  6.5423338438


In [132]:
# Test-cases - F

x = 1 # Active Support Deprivation
y = 1 # DeFacto Support Deprivation
z = 1 # Domestic Support Deprivation
a = 0.99 # Probability for receving support based upon ideology
b = 0.99 # Probability for receiving support based upon objective
c = 1 # Years after 25 BRD took place

predictThrivingYears(x,y,z,a,b,c)

Predicted thriving years:  15.0114883781


In [144]:
# Test-cases - G

x = 0 # Active Support Deprivation
y = 0 # DeFacto Support Deprivation
z = 0 # Domestic Support Deprivation
a = 1 # Probability for receving support based upon ideology
b = 1 # Probability for receiving support based upon objective
c = 21 # Years after 25 BRD took place

predictThrivingYears(x,y,z,a,b,c)

Predicted thriving years:  33.6246165768
