In [1]:
import numpy as np
import pandas as pd
from statistics import mean
from xgboost import XGBClassifier
from sklearn import model_selection, metrics
from sklearn.feature_selection import RFECV

In [2]:
# Read Data
data = pd.read_csv('/Users/ray/Thesis/ShoulderAll.csv')

# colList = pd.read_csv('/Users/ray/Thesis/p_val.csv')

# cList = np.append(np.array(colList['colName']), 'group')
# data = data[data.columns[data.columns.isin(cList)]]

# Encoding
data['sex'] = data['sex'].replace({'F': 0, 'M': 1})
data['group'] = data['group'].replace({'AC': 0, 'RCT': 1, 'HC': 2})

# Choosing group
train_data = data.loc[data['group'] != 2].reset_index(drop = True)

X = train_data.drop(columns=['ID','group'])
# X = train_data.drop(columns=['group'])
y = train_data['group']
# Train & Test Split
# X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.3, random_state=7)

In [12]:
# Model Evaluation
def evaluation(test, prediction, multi=False):
    # for Binary-Class
    if multi==False:
        confuse_matrix = metrics.confusion_matrix(test, prediction, labels=[0,1])
        tn, fp, fn, tp = metrics.confusion_matrix(test, prediction).ravel()
        print(confuse_matrix)

    # for Multi-Class
    elif multi==True:
        confuse_matrix = metrics.confusion_matrix(test, prediction, labels=[0,1,2])
        print(confuse_matrix)

        fp = confuse_matrix.sum(axis=0) - np.diag(confuse_matrix) 
        fn = confuse_matrix.sum(axis=1) - np.diag(confuse_matrix)
        tp = np.diag(confuse_matrix)
        tn = confuse_matrix.sum() - (fp + fn + tp)
        fp = fp.astype(float)
        fn = fn.astype(float)
        tp = tp.astype(float)
        tn = tn.astype(float)

    accuracy = (tp+tn)/(tp+fp+fn+tn)
    print ('Accuracy: ', accuracy)

    sensitivity = tp/(tp+fn)
    print('Sensitivity: ', sensitivity)

    specificity = tn/(tn+fp) 
    print('Specificity: ', specificity)

In [3]:
xgboostModel = XGBClassifier()

selector = RFECV(xgboostModel, min_features_to_select=150, step=1, cv=model_selection.StratifiedKFold(5), n_jobs=10)
selector = selector.fit(X, y)
selectList = selector.get_feature_names_out()

In [10]:
data = pd.read_csv('/Users/ray/Thesis/ShoulderAll.csv')

selectList = np.append(selectList, 'group')
colList = pd.read_csv('/Users/ray/Thesis/p_val.csv')

cList = np.append(np.array(colList['colName']), 'group')
# data = data[data.columns[data.columns.isin(cList)]]
data = data[data.columns[data.columns.isin(cList)]]
data['group'] = data['group'].replace({'AC': 0, 'RCT': 1, 'HC': 2})

# Choosing group
train_data = data.loc[data['group'] != 2].reset_index(drop = True)
X = train_data.drop(columns=['group'])
y = train_data['group']

In [13]:
skf = model_selection.StratifiedKFold(n_splits=5)
acc_stratified = []
  
for train_index, test_index in skf.split(X, y):
    X_train_fold, X_test_fold = X.loc[train_index], X.loc[test_index]
    y_train_fold, y_test_fold = y.loc[train_index], y.loc[test_index]
    xgboostModel.fit(X_train_fold, y_train_fold)

    acc_stratified.append(xgboostModel.score(X_test_fold, y_test_fold))
    
    y_predicted = xgboostModel.predict(X_test_fold)
    evaluation(y_test_fold, y_predicted)

print('Maximum Accuracy: ',max(acc_stratified)) 
print('Minimum Accuracy: ',min(acc_stratified)) 
print('Overall Accuracy: ',mean(acc_stratified))

[[1 5]
 [1 9]]
Accuracy:  0.625
Sensitivity:  0.9
Specificity:  0.16666666666666666
[[3 2]
 [3 8]]
Accuracy:  0.6875
Sensitivity:  0.7272727272727273
Specificity:  0.6
[[1 4]
 [3 8]]
Accuracy:  0.5625
Sensitivity:  0.7272727272727273
Specificity:  0.2
[[1 4]
 [2 9]]
Accuracy:  0.625
Sensitivity:  0.8181818181818182
Specificity:  0.2
[[4 1]
 [3 8]]
Accuracy:  0.75
Sensitivity:  0.7272727272727273
Specificity:  0.8
Maximum Accuracy:  0.75
Minimum Accuracy:  0.5625
Overall Accuracy:  0.65
