# Part II: Predict Sarcopenia with SVM

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [2]:
# Run some setup code for this notebook.
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import ipywidgets as widgets

from ipywidgets import VBox, HBox, Layout
from sklearn import svm
from sklearn import neighbors
from sklearn import preprocessing
from sklearn.utils import shuffle

from utils.checkbox import *
from utils.data_utils import *
#from utils.data_processing import *
from utils.svm_modeling import *
from utils.model_eval import *
from __future__ import print_function

# This is a bit of magic to make matplotlib figures appear inline in the
# notebook rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (15.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

In [3]:
# Load data.
path = 'dataset_new'
feature_dict = load_features(path, dont_show=True)
#show_feature_details(feature_dict)

Feature dict loaded.



In [4]:
# Select some features.
use_all = False
#use_all = True

level_1 = [7, 41, 25, 60, 16, 17, 23, 28, 30, 31, 38, 40, 42, 43, 44, 46, 47, 48, 52, 56, 61, 62, 63, 64, 65, 66]
level_2 = [7, 41, 25]
cui_statistic = [7, 11, 15, 22, 23, 25, 29, 35, 41, 44, 55, 62]
cui_1 = [7, 22, 23, 29, 41, 55]
cui_2 = [7, 12, 15, 22, 23, 25, 35, 41, 44, 55, 62]
cui_3 = [7, 22, 23, 41, 52, 55]
include_feature_groups = []
include_feature_index = cui_2
exclude_feature_index = []

if use_all:
    include_feature_index = np.arange(1, len(feature_dict), 1)

feature_pre_selected = pre_select_feature(include_feature_groups, include_feature_index, exclude_feature_index, dont_show=True)
precheck_boxes = generate_precheck_boxes(feature_pre_selected, feature_dict, dont_show=True)

hbox = gen_checkbox(precheck_boxes, feature_dict)
HBox(hbox)

In [5]:
# Turn on / off log.

#use_log = False
use_log = True

# Load data.
checked_features = review_checkbox(hbox, dont_show=False, log=use_log)
X = load_using_features(feature_dict, checked_features, dont_show=True)
asm, asm_h2, sarcopenia, gender, height_squared, patient_id = load_asm(), load_asm_over_h2(), load_sarcopenia(), load_gender(), load_height_squared(), load_index()

# Random shuffle. Comment this line if you want to keep the shuffling order.
shuffle_index = np.random.permutation(X.shape[0])

# Data Rescaling.
scaler = set_scaler()
#X_normalized = scaler.fit_transform(X)

# Split dataset.
num_train = 80
num_val = 26
num_test = 26
asm_train, asm_val, asm_test = shuffle_feature(asm, shuffle_index, num_train, num_val, num_test)
asm_h2_train, asm_h2_val, asm_h2_test = shuffle_feature(asm_h2, shuffle_index, num_train, num_val, num_test)
sarcopenia_train, sarcopenia_val, sarcopenia_test = shuffle_feature(sarcopenia, shuffle_index, num_train, num_val, num_test)
gender_train, gender_val, gender_test = shuffle_feature(gender, shuffle_index, num_train, num_val, num_test)
height_squared_train, height_squared_val, height_squared_test = shuffle_feature(height_squared, shuffle_index, num_train, num_val, num_test)
patient_id_train, patient_id_val, patient_id_test = shuffle_feature(patient_id, shuffle_index, num_train, num_val, num_test)
X_train, X_val, X_test = shuffle_feature(X, shuffle_index, num_train, num_val, num_test)
X_train, X_val, X_test = scaler.fit_transform(X_train), scaler.transform(X_val), scaler.transform(X_test)

Checked features:
  [7, 12, 15, 22, 23, 25, 35, 41, 44, 55, 62]
Loading (11) features, done.


## Method I: SVM Classifier on Sarcopenia

In [6]:
# Train SVC.
kernel_options = {1: "rbf", 2: "linear"}
kernel_SVC = 2
# Run SVM Classifier.
best_clf_SVC = run_SVC(X_train, X_val, sarcopenia_train, sarcopenia_val, kernel=kernel_options[kernel_SVC], log=use_log)
# Observe Model.
observe_prediction_SVC(best_clf_SVC, X_train, sarcopenia_train, patient_id_train, dont_show=False, log=use_log, setname='Training')
result_train_SVC = best_clf_SVC.predict(X_train)
eval_classifier(result_train_SVC, sarcopenia_train, show_detail=True, log=use_log, setname='Training')

observe_prediction_SVC(best_clf_SVC, X_val, sarcopenia_val, patient_id_val, dont_show=False, log=use_log, setname='Validation')
result_val_SVC = best_clf_SVC.predict(X_val)
eval_classifier(result_val_SVC, sarcopenia_val, show_detail=True, log=use_log, setname='Validation')

observe_prediction_SVC(best_clf_SVC, X_test, sarcopenia_test, patient_id_test, dont_show=False, log=use_log, setname='Test')
result_test_SVC = best_clf_SVC.predict(X_test)
eval_classifier(result_test_SVC, sarcopenia_test, show_detail=True, log=use_log, setname='Test')

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
Model best f1_score: 0.7273, f1_score_training: 0.7917, f1_score_val: 0.7273

Observing Training Set:
Truth:  1, Predicted: -1, Patient id:  28
Truth:  1, Predicted: -1, Patient id:  20
Truth: -1, Predicted:  1, Patient id:  89
Truth:  1, Predicted: -1, Patient id: 107
Truth: -1, Predicted:  1, Patient id: 113
Truth: -1, Predicted:  1, Patient id:  93
Truth:  1, Predicted: -1, Patient id: 106
Truth:  1, Predicted: -1, Patient id:   7
Truth:  1, Predicted: -1, Patient id:  64
Truth:  1, Predicted: -1, Patient id:  47

Evaluating Training set:
Positive: 26, Negative: 54
TP: 19, FP: 3, TN: 51, FN: 7
Correct: 70(80), Precision: 0.864, Recall: 0.731, Specificity: 0.944, F1-Score: 0.792


Observing Validation Set:
Truth: -1, Predicted:  1, Patient id:  44
Tru

## Method II: SVM Regressor on asm/h2 (appendicular skeletal muscle mass / squared height)

In [9]:
# Train SVR on asm/h2.
kernel_options = {1: "rbf", 2: "linear"}
kernel = 2
# Run SVM Classifier.
best_clf_asm_h2_SVR = run_SVR(X_train, X_val, asm_h2_train, asm_h2_val, kernel=kernel_options[kernel], log=use_log)
# Observe Model.
#Training Set:
observe_prediction_asm_h2_SVR(best_clf_asm_h2_SVR, X_train, asm_h2_train, gender_train, sarcopenia_train, patient_id_train, dont_show=False, log=use_log, setname='Training')
result_train_asm_h2_SVR = eval_sarcopenia_asm_h2(best_clf_asm_h2_SVR, X_train, gender_train, sarcopenia_train)
eval_classifier(result_train_asm_h2_SVR, sarcopenia_train, show_detail=True, log=use_log, setname='Training')

#Validation Set:
observe_prediction_asm_h2_SVR(best_clf_asm_h2_SVR, X_val, asm_h2_val, gender_val, sarcopenia_val, patient_id_val, dont_show=False, log=use_log, setname='Validation')
result_val_asm_h2_SVR = eval_sarcopenia_asm_h2(best_clf_asm_h2_SVR, X_val, gender_val, sarcopenia_val)
eval_classifier(result_val_asm_h2_SVR, sarcopenia_val, show_detail=True, log=use_log, setname='Validation')

#Test Set:
observe_prediction_asm_h2_SVR(best_clf_asm_h2_SVR, X_test, asm_h2_test, gender_test, sarcopenia_test, patient_id_test, dont_show=False, log=use_log, setname='Test')
result_test_asm_h2_SVR = eval_sarcopenia_asm_h2(best_clf_asm_h2_SVR, X_test, gender_test, sarcopenia_test)
eval_classifier(result_test_asm_h2_SVR, sarcopenia_test, show_detail=True, log=use_log, setname='Test')


SVR(C=1.024, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)
Model best error: 0.0529, error_training: 0.0672, error_val: 0.0529

Training Set:
Truth: 6.64, Predicted: 5.81, Error: -12.40%, Gender:  2, GT: -1, Pred: -1, Correct:  1, Patient_id: 118
Truth: 6.99, Predicted: 7.12, Error:   1.82%, Gender:  1, GT:  1, Pred: -1, Correct:  0, Patient_id:  28
Truth: 9.48, Predicted: 6.50, Error: -31.47%, Gender:  2, GT: -1, Pred: -1, Correct:  1, Patient_id: 130
Truth: 4.81, Predicted: 5.45, Error:  13.33%, Gender:  2, GT:  1, Pred: -1, Correct:  0, Patient_id: 101
Truth: 5.58, Predicted: 7.71, Error:  38.14%, Gender:  1, GT:  1, Pred: -1, Correct:  0, Patient_id:  20
Truth: 5.36, Predicted: 5.56, Error:   3.71%, Gender:  2, GT:  1, Pred: -1, Correct:  0, Patient_id:  84
Truth: 7.21, Predicted: 6.35, Error: -11.86%, Gender:  2, GT: -1, Pred: -1, Correct:  1, Patient_id:  68
Truth: 5.24, Predi

## Method III: SVM Regressor on asm (appendicular skeletal muscle mass)

In [8]:
# Train SVR on asm.
kernel_options = {1: "rbf", 2: "linear"}
kernel = 1
# Run SVM Classifier.
best_clf_asm_SVR = run_SVR(X_train, X_val, asm_train, asm_val, kernel=kernel_options[kernel], log=use_log)
# Observe Model.
observe_prediction_asm_SVR(best_clf_asm_SVR, X_train, asm_train, gender_train, height_squared_train, sarcopenia_train, patient_id_train, dont_show=False, log=use_log, setname='Training')
result_train_asm_SVR = eval_sarcopenia_asm(best_clf_asm_SVR, X_train, gender_train, height_squared_train, sarcopenia_train)
eval_classifier(result_train_asm_SVR, sarcopenia_train, show_detail=True, log=use_log, setname='Training')

observe_prediction_asm_SVR(best_clf_asm_SVR, X_val, asm_val, gender_val, height_squared_val, sarcopenia_val, patient_id_val, dont_show=False, log=use_log, setname='Validation')
result_val_asm_SVR = eval_sarcopenia_asm(best_clf_asm_SVR, X_val, gender_val, height_squared_val, sarcopenia_val)
eval_classifier(result_val_asm_SVR, sarcopenia_val, show_detail=True, log=use_log, setname='Validation')

observe_prediction_asm_SVR(best_clf_asm_SVR, X_test, asm_test, gender_test, height_squared_test, sarcopenia_test, patient_id_test, dont_show=False, log=use_log, setname='Test')
result_test_asm_SVR = eval_sarcopenia_asm(best_clf_asm_SVR, X_test, gender_test, height_squared_test, sarcopenia_test)
eval_classifier(result_test_asm_SVR, sarcopenia_test, show_detail=True, log=use_log, setname='Test')

SVR(C=51.2, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.032768,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
Model best error: 0.0647, error_training: 0.0784, error_val: 0.0647

Training Set:
Truth: 26.13, Pred: 23.50, ASM/h2: 7.25, Error: -10.06%, Gender:  1, GT: -1, Pred: -1, Correct:  1, Patient_id:  57
Truth: 17.20, Pred: 14.47, ASM/h2: 5.58, Error: -15.87%, Gender:  2, GT: -1, Pred: -1, Correct:  1, Patient_id: 118
Truth: 13.99, Pred: 17.18, ASM/h2: 8.06, Error:  22.74%, Gender:  2, GT: -1, Pred: -1, Correct:  1, Patient_id:  88
Truth: 21.16, Pred: 21.26, ASM/h2: 7.02, Error:   0.47%, Gender:  1, GT:  1, Pred: -1, Correct:  0, Patient_id:  28
Truth: 17.86, Pred: 20.24, ASM/h2: 7.44, Error:  13.32%, Gender:  1, GT:  1, Pred: -1, Correct:  0, Patient_id:  17
Truth: 18.82, Pred: 16.63, ASM/h2: 6.18, Error: -11.62%, Gender:  2, GT: -1, Pred: -1, Correct:  1, Patient_id:  79
Truth: 18.63, Pred: 16.20, ASM/h2: 5.95, Error: -13.05%, Gender:  2, GT:

## Measurement Index
$$Precision = \frac{True\ Positive}{True\ Positve + False\ Positive}$$
$$Recall = \frac{True\ Positive}{True\ Positive + False\ Negative}$$
$$Specificity = \frac{True\ Negative}{True\ Negative + False\ Positive}$$
$$F1\_Score = \frac{2 \times Precision \times Recall}{Precision + Recall}$$


|       |          | Actual   | Class  |
| :---  | ---      | ---      |    --- |
|       |          | Positive |Negative|
|Predict|Positive  | TP       | FP     |
| Class |Negative  | FN       | TN     |
