# Part 7: Leave One Out Validation with SVM

In [8]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [9]:
# Run some setup code for this notebook.
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import ipywidgets as widgets

from ipywidgets import VBox, HBox, Layout
from sklearn import svm
from sklearn import neighbors
from sklearn import preprocessing
from sklearn.utils import shuffle
from sklearn.model_selection import cross_val_score


from utils.checkbox import *
from utils.data_utils import *
#from utils.data_processing import *
from utils.svm_modeling import *
from utils.model_eval import *
from __future__ import print_function

# This is a bit of magic to make matplotlib figures appear inline in the
# notebook rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (15.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
# Load data.
path = 'dataset_new'
feature_dict = load_features(path, dont_show=True)
#show_feature_details(feature_dict)

Feature dict loaded.



In [19]:
# Select some features.
use_all = False
#use_all = True
group_1 = [7, 23, 41]
group_2 = [7, 23, 41, 29, 55, 22]
group_3 = [7, 23, 41, 29, 55, 22, 25, 15, 35]
include_feature_groups = []
include_feature_index = group_3
exclude_feature_index = []

if use_all:
    include_feature_index = np.arange(1, len(feature_dict), 1)

feature_pre_selected = pre_select_feature(include_feature_groups, include_feature_index, exclude_feature_index, dont_show=True)
precheck_boxes = generate_precheck_boxes(feature_pre_selected, feature_dict, dont_show=True)

hbox = gen_checkbox(precheck_boxes, feature_dict)
HBox(hbox)

In [20]:
# Turn on / off log.

#use_log = False
use_log = False

# Load data.
checked_features = review_checkbox(hbox, dont_show=False, log=use_log)
X = load_using_features(feature_dict, checked_features, dont_show=True)
asm, asm_h2, sarcopenia, gender, height_squared, patient_id = load_asm(), load_asm_over_h2(), load_sarcopenia(), load_gender(), load_height_squared(), load_index()

# Random shuffle. Comment this line if you want to keep the shuffling order.
shuffle_index = np.random.permutation(X.shape[0])

X = X[shuffle_index]; asm = asm[shuffle_index]; asm_h2 = asm_h2[shuffle_index]; sarcopenia = sarcopenia[shuffle_index];
gender = gender[shuffle_index]; height_squared = height_squared[shuffle_index]; patient_id = patient_id[shuffle_index];

# Data Rescaling.
scaler = set_scaler()

# k-fold Splitting
n_fold = 132
items_per_fold = int(X.shape[0] / n_fold)
train_fold_mask = []
val_fold_mask = []
mask_cv = np.arange(0, 132)
masks = []
for i in np.arange(0, n_fold):
    masks.append(np.split(mask_cv, [i * items_per_fold, (i+1) * items_per_fold]))
    
for i in np.arange(0, n_fold): 
    train_fold_mask.append(np.concatenate([masks[i][0], masks[i][2]]))
    val_fold_mask.append(masks[i][1])
    #print(val_fold_mask[i])
    #print(train_fold_mask[i])

Checked features:
  [7, 15, 22, 23, 25, 29, 35, 41, 55]
Loading (9) features, done.


## Leave one out test for SVM

## Method I: SVM Classifier on Sarcopenia

In [24]:
# Train SVC.
kernel_options = {1: "rbf", 2: "linear"}
kernel_SVC = 2

result_test_SVC = np.zeros([X.shape[0]])

for i in np.arange(0, n_fold): 
    
    X_train, X_test = X[train_fold_mask[i]], X[val_fold_mask[i]]

    X_train, X_test = scaler.fit_transform(X_train), scaler.transform(X_test)
    asm_train, asm_test = asm[train_fold_mask[i]], asm[val_fold_mask[i]] 
    asm_h2_train, asm_h2_test = asm_h2[train_fold_mask[i]], asm_h2[val_fold_mask[i]]
    sarcopenia_train, sarcopenia_test = sarcopenia[train_fold_mask[i]], sarcopenia[val_fold_mask[i]]
    gender_train, gender_test = gender[train_fold_mask[i]], gender[val_fold_mask[i]]
    height_squared_train, height_squared_test = height_squared[train_fold_mask[i]], height_squared[val_fold_mask[i]]
    patient_id_train, patient_id_test = patient_id[train_fold_mask[i]], patient_id[val_fold_mask[i]]
    #print(patient_id_train, patient_id_test)
    
    best_clf_SVC = run_SVC_k_fold(X_train, sarcopenia_train, kernel=kernel_options[kernel_SVC], log=use_log, dont_show=True)
    
    #observe_prediction_SVC(best_clf_SVC, X_train, sarcopenia_train, patient_id_train, dont_show=True, log=use_log, setname='Training')
    #result_train_SVC = best_clf_SVC.predict(X_train)
    #eval_classifier(result_train_SVC, sarcopenia_train, show_detail=False, log=use_log, setname='Training')

    observe_prediction_SVC(best_clf_SVC, X_test, sarcopenia_test, patient_id_test, dont_show=True, log=use_log, setname='Test')
    result_test_SVC[i] = best_clf_SVC.predict(X_test)
    
eval_classifier(result_test_SVC, sarcopenia, show_detail=True, log=use_log, setname='Test')
ppvs, npvs, sensitivitys, specificitys = eval_classifier_k_fold(result_test_SVC, sarcopenia)

    
print("------------------------ Overall Statistics --------------------------") 
print("ppv: %.3f, \nnpv: %.3f, \nsensitivity: %.3f, \nspecificity: %.3f\n" % 
      (np.mean(ppvs), np.mean(npvs), np.mean(sensitivitys), np.mean(specificitys), ))



Evaluating Test set:
Positive: 37, Negative: 95
TP: 23, FP: 10, TN: 85, FN: 14
Correct: 108(132), Precision: 0.697, Recall: 0.622, Specificity: 0.895, F1-Score: 0.657

------------------------ Overall Statistics --------------------------
ppv: 0.697, 
npv: 0.859, 
sensitivity: 0.622, 
specificity: 0.895



## Method II: SVM Regressor on asm/h2

In [22]:
# Train SVR on asm/h2.
kernel_options = {1: "rbf", 2: "linear"}
kernel_SVR = 2

result_test_asm_h2_SVR = np.zeros([X.shape[0]])

for i in np.arange(0, n_fold): 
    X_train, X_test = X[train_fold_mask[i]], X[val_fold_mask[i]]

    X_train, X_test = scaler.fit_transform(X_train), scaler.transform(X_test)
    asm_train, asm_test = asm[train_fold_mask[i]], asm[val_fold_mask[i]] 
    asm_h2_train, asm_h2_test = asm_h2[train_fold_mask[i]], asm_h2[val_fold_mask[i]]
    sarcopenia_train, sarcopenia_test = sarcopenia[train_fold_mask[i]], sarcopenia[val_fold_mask[i]]
    gender_train, gender_test = gender[train_fold_mask[i]], gender[val_fold_mask[i]]
    height_squared_train, height_squared_test = height_squared[train_fold_mask[i]], height_squared[val_fold_mask[i]]
    patient_id_train, patient_id_test = patient_id[train_fold_mask[i]], patient_id[val_fold_mask[i]]

    
    best_clf_asm_h2_SVR = run_SVR_k_fold(X_train, asm_h2_train, kernel=kernel_options[kernel_SVR], log=use_log, dont_show=True)
    # Training Set:
    observe_prediction_asm_h2_SVR(best_clf_asm_h2_SVR, X_train, asm_h2_train, gender_train, sarcopenia_train, patient_id_train, dont_show=True, log=use_log, setname='Training')
    result_train_asm_h2_SVR = eval_sarcopenia_asm_h2(best_clf_asm_h2_SVR, X_train, gender_train, sarcopenia_train)
    eval_classifier(result_train_asm_h2_SVR, sarcopenia_train, show_detail=False, log=use_log, setname='Training')

    #Test Set:
    observe_prediction_asm_h2_SVR(best_clf_asm_h2_SVR, X_test, asm_h2_test, gender_test, sarcopenia_test, patient_id_test, dont_show=True, log=use_log, setname='Test')
    result_test_asm_h2_SVR[i] = eval_sarcopenia_asm_h2(best_clf_asm_h2_SVR, X_test, gender_test, sarcopenia_test)[0]

eval_classifier(result_test_asm_h2_SVR, sarcopenia, show_detail=True, log=use_log, setname='Test')
ppvs, npvs, sensitivitys, specificitys = eval_classifier_k_fold(result_test_asm_h2_SVR, sarcopenia)

print("------------------------ Overall Statistics --------------------------") 
print("ppv: %.3f, \nnpv: %.3f, \nsensitivity: %.3f, \nspecificity: %.3f\n" % 
      (np.mean(ppvs), np.mean(npvs), np.mean(sensitivitys), np.mean(specificitys), ))




Evaluating Test set:
Positive: 37, Negative: 95
TP: 21, FP: 6, TN: 89, FN: 16
Correct: 110(132), Precision: 0.778, Recall: 0.568, Specificity: 0.937, F1-Score: 0.656

------------------------ Overall Statistics --------------------------
ppv: 0.778, 
npv: 0.848, 
sensitivity: 0.568, 
specificity: 0.937



## Method III: SVM Regressor on asm

In [12]:
# Train SVR on asm.
kernel_options = {1: "rbf", 2: "linear"}
kernel_SVR = 2

result_test_asm_SVR = np.zeros([X.shape[0]])

for i in np.arange(0, n_fold): 
    X_train, X_test = X[train_fold_mask[i]], X[val_fold_mask[i]]
    X_train, X_test = scaler.fit_transform(X_train), scaler.transform(X_test)
    
    asm_train, asm_test = asm[train_fold_mask[i]], asm[val_fold_mask[i]] 
    asm_h2_train, asm_h2_test = asm_h2[train_fold_mask[i]], asm_h2[val_fold_mask[i]]
    sarcopenia_train, sarcopenia_test = sarcopenia[train_fold_mask[i]], sarcopenia[val_fold_mask[i]]
    gender_train, gender_test = gender[train_fold_mask[i]], gender[val_fold_mask[i]]
    height_squared_train, height_squared_test = height_squared[train_fold_mask[i]], height_squared[val_fold_mask[i]]
    patient_id_train, patient_id_test = patient_id[train_fold_mask[i]], patient_id[val_fold_mask[i]]
        
    # Train Regressor
    best_clf_asm_SVR = run_SVR_k_fold(X_train, asm_train, kernel=kernel_options[kernel_SVR], log=use_log, dont_show=True)
    
    # Training Set:
    observe_prediction_asm_SVR(best_clf_asm_SVR, X_train, asm_train, gender_train, height_squared_train, sarcopenia_train, patient_id_train, dont_show=True, log=use_log, setname='Training')
    result_train_asm_SVR = eval_sarcopenia_asm(best_clf_asm_SVR, X_train, gender_train, height_squared_train, sarcopenia_train)
    eval_classifier(result_train_asm_SVR, sarcopenia_train, show_detail=False, log=use_log, setname='Training')
    # Test Set:
    observe_prediction_asm_SVR(best_clf_asm_SVR, X_test, asm_test, gender_test, height_squared_test, sarcopenia_test, patient_id_test, dont_show=True, log=use_log, setname='Test')
    result_test_asm_SVR[i] = eval_sarcopenia_asm(best_clf_asm_SVR, X_test, gender_test, height_squared_test, sarcopenia_test)[0]

eval_classifier(result_test_asm_SVR, sarcopenia, show_detail=True, log=use_log, setname='Test')
ppvs, npvs, sensitivitys, specificitys = eval_classifier_k_fold(result_test_asm_SVR, sarcopenia)

print("------------------------ Overall Statistics --------------------------") 
print("ppv: %.3f, \nnpv: %.3f, \nsensitivity: %.3f, \nspecificity: %.3f\n" % 
      (np.mean(ppvs), np.mean(npvs), np.mean(sensitivitys), np.mean(specificitys), ))




Evaluating Test set:
Positive: 37, Negative: 95
TP: 16, FP: 11, TN: 84, FN: 21
Correct: 100(132), Precision: 0.593, Recall: 0.432, Specificity: 0.884, F1-Score: 0.500

------------------------ Overall Statistics --------------------------
ppv: 0.593, 
npv: 0.800, 
sensitivity: 0.432, 
specificity: 0.884



## Measurement Index
$$Precision = \frac{True\ Positive}{True\ Positve + False\ Positive}$$
$$Recall = \frac{True\ Positive}{True\ Positive + False\ Negative}$$
$$F1\_Score = \frac{2 \times Precision \times Recall}{Precision + Recall}$$

$$PPV = \frac{True\ Positive}{True\ Positve + False\ Positive}$$
$$NPV = \frac{True\ Negative}{True\ Negative + False\ Negative}$$
$$Sensitivity = \frac{True\ Positive}{True\ Positive + False\ Negative}$$
$$Specificity = \frac{True\ Negative}{True\ Negative + False\ Positive}$$


|       |          | Actual   | Class  |
| :---  | ---      | ---      |    --- |
|       |          | Positive |Negative|
|Predict|Positive  | TP       | FP     |
| Class |Negative  | FN       | TN     |
