In [1]:
import numpy as np

In [2]:
from evaluation import *
from load_data import *

# Training SVM

In [3]:
from sklearn import svm
import matplotlib.pyplot as plt

In [4]:
X_tr, X_te, y_tr, y_te, xs_tr, xs_te = load_adult_data(svm=True,random_state=42)

for svm min:  -1.0 and max:  1.0


In [5]:
clf = svm.SVC(kernel='linear')
clf.fit(X_tr,y_tr)

SVC(kernel='linear')

## principal model parameters

In [68]:
import os
import pickle

In [69]:
svs = clf.support_vectors_

In [71]:
svs

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [17]:
alphas = clf.dual_coef_

In [19]:
kern = clf.kernel

'linear'

In [72]:
clf._gamma

0.17936778824511002

In [97]:
def save_svm(clf, save_dir = '', filename = 'svm_model'):
    res = {}
    res['sv'] = clf.support_vectors_
    res['alpha'] = clf.dual_coef_
    res['kernel'] = clf.kernel
    res['gamma'] = clf._gamma
    res['coef0'] = clf.coef0
    res['degree'] = clf.degree
    res['intercept'] = clf.intercept_
    save_path = os.path.join(save_dir, filename+'.sm')
    with open(save_path,'wb') as f:
        pickle.dump(res,f)

def load_svm(save_path):
    with open(save_path,'rb') as f:
        aa = pickle.load(f)
    return aa

In [98]:
save_svm(clf)

In [99]:
sm = load_svm('svm_model.sm')

In [101]:
sm

{'sv': array([[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]]),
 'alpha': array([[-1., -1., -1., ...,  1.,  1.,  1.]]),
 'kernel': 'linear',
 'gamma': 0.17936778824511002,
 'coef0': 0.0,
 'degree': 3,
 'intercept': array([-3.42748711])}

# testing SVM model

## Kernel functions

In [108]:
from kernel_fns import calculate_kernel

In [109]:
calculate_kernel(X_tr[:100])

array([[6.98926816, 5.99435584, 3.78842101, ..., 3.83743834, 6.91666771,
        2.70333438],
       [5.99435584, 7.23222283, 2.7795262 , ..., 2.82610404, 5.91997031,
        2.70663698],
       [3.78842101, 2.7795262 , 6.63452922, ..., 4.67653419, 3.73561552,
        2.55783774],
       ...,
       [3.83743834, 2.82610404, 4.67653419, ..., 6.72317124, 3.78557111,
        2.59001556],
       [6.91666771, 5.91997031, 3.73561552, ..., 3.78557111, 6.85917091,
        2.64583758],
       [2.70333438, 2.70663698, 2.55783774, ..., 2.59001556, 2.64583758,
        6.50361536]])

## Prediction phase

In [107]:
# scikit-learn svm library
pred_ = clf.predict(X_te)

In [102]:
def predict_svm(X_te, Xxv, alpha, intercept, kernel, **kwds):
    Ktest = calculate_kernel(Xxv, X_te, kernel, **kwds)
    dec_eval = Ktest.T.dot(alpha.flatten())+intercept
    return np.sign(dec_eval).flatten()

In [103]:
pred = predict_svm(X_te, sm['sv'],sm['alpha'],sm['intercept'],sm['kernel'])

In [106]:
sum(pred ==pred_)

13567

# Evaluation

In [146]:
from fair_eval import calculate_prule_clf, calculate_odds_clf, calculate_parity_reg, calculate_group_loss

In [147]:
calculate_prule_clf(pred,y_te,xs_te)

disparate impact:  27.760537158480396
disparate misclassification rate:  35.88339128583593
disparate false positive rate: 19.91285878320219
disparate false negative rate: 81.60141062705613


In [148]:
calculate_odds_clf(pred,y_te,xs_te)

equalized opportunity for -1.0 : 19.91285878320219
equalized opportunity for 1.0 : 84.69135418448762


In [149]:
calculate_parity_reg(pred,y_te,xs_te)

disparate parity for threshold -0.5108719687476966: 27.760537158480396


In [150]:
calculate_group_loss(l2_loss,pred,y_te,xs_te)

loss function:  l2_loss
loss value for group 0: 0.27490774907749077
loss value for group 1: 0.7661141804788214
