# SVM (regularization)
**Version:** 0.1a1

In [None]:
import numpy as np
import pandas as pd
import shap
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

shap.initjs()

In [None]:
df = pd.read_pickle('cvr_prediction_20201228.pkl')

In [None]:
feature_col_names = ['ari_class', 'bormuth_score', 'bormuth_class', 'coleman_liau_class',
                     'flesch_class', 'flesch_kincaid_class', 'fog_score', 'fog_class',
                     'lix_class', 'rix_score', 'rix_class', 'smog_class', 'strain_class',
                     'aws', 'pdw', 'pew', 'ppw', 'psw', 'puw', 'sentences']
predicted_class_names = ['cvr_class']

x = df[feature_col_names].values
y = df[predicted_class_names].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42) 

### Select `C`

In [None]:
best = 0 
for i in np.linspace(0.01,1,100):
    model = SVC(gamma='auto', C=i)
    model.fit(x_train,  y_train.ravel())
    svm_predict_test = model.predict(x_test)
    accuracy = metrics.accuracy_score(y_test, svm_predict_test)
    if accuracy >= best:
        best = accuracy
        print(i, best)

## Train

In [None]:
model = SVC(gamma='auto', C=0.25)  # C=0.25
model.fit(x_train, y_train.ravel())
svm_predict_test = model.predict(x_test)

## Results

In [None]:
print('Confusion Matrix:')
print('{}'.format(metrics.confusion_matrix(y_test, svm_predict_test, labels=[1, 0])))

In [None]:
print(metrics.classification_report(y_test, svm_predict_test, labels=[1, 0]))

In [None]:
metrics.accuracy_score(y_test, svm_predict_test).round(6)  # 0.7808219178082192

## Shap

In [None]:
explainer = shap.KernelExplainer(model.predict, data=x_test, feature_names=feature_col_names)
shap_values = explainer.shap_values(x_test)

### Global feature importance plot

In [None]:
shap.summary_plot(shap_values, x_test, feature_names=feature_col_names, plot_type='bar', color='#808080')

### Dependence plots
Values: `sentences`, `bormuth_score`, `aws`, `fog_score`

In [None]:
shap.dependence_plot('sentences', shap_values, x_test, feature_names=feature_col_names,
                     dot_size=32, interaction_index=None, color='#808080')

### Force plot
Values: `sentences`, `bormuth_score`, `aws`, `fog_score`

In [None]:
shap.force_plot(explainer.expected_value, shap_values, x_test, feature_names=feature_col_names,
                plot_cmap=['#d3d3d3', '#808080'])