In [68]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm
import xgboost as xgb

In [69]:
ds_name = '20220328-or-eng-full.csv'
test_size=0.3

data = pd.read_csv(ds_name)

# removed_cols = ['Postoperative Olanzapine', 'Postoperative Fluphenazine', 'Postoperative Flupentixol']
# data = data.drop(removed_cols, axis=1)

# 将特征和目标分开
features = data.drop('Label', axis=1).values
target = data['Label'].values

print(features.shape)
print(target.shape)

(1809, 101)
(1809,)


In [70]:
# 划分训练集和测试集
train_features, test_features, train_target, test_target = train_test_split(
    features, target, test_size=test_size, random_state=42)

In [71]:
def test_model(outputs, features, target):
    # Test in all data
    outputs = outputs > 0.5

    success = np.sum(outputs == target)
    print('Success: {}/{}'.format(success, len(target)))

    acc = success / len(target)
    print('Accuracy: {:.2f}'.format(acc))

    # Confusion matrix
    TP = np.sum((outputs == 1) & (target == 1))
    TN = np.sum((outputs == 0) & (target == 0))
    FP = np.sum((outputs == 1) & (target == 0))
    FN = np.sum((outputs == 0) & (target == 1))

    print('TP: {}, TN: {}, FP: {}, FN: {}'.format(TP, TN, FP, FN))

    sensitivity = TP / (TP + FN)
    specificity = TN / (TN + FP)
    print('Sensitivity: {:.2f}, Specificity: {:.2f}'.format(sensitivity, specificity))

In [72]:
# 创建SVM分类器对象
svm_classifier = svm.SVC(class_weight={0: 1, 1: 10})

# 训练模型
svm_classifier.fit(train_features, train_target)

# 在测试集上进行预测
svm_predictions = svm_classifier.predict(test_features)

# 计算混淆矩阵
test_model(svm_predictions, test_features, test_target)

Success: 502/543
Accuracy: 0.92
TP: 7, TN: 495, FP: 30, FN: 11
Sensitivity: 0.39, Specificity: 0.94


In [73]:
# 将数据集转换为DMatrix格式
dtrain = xgb.DMatrix(train_features, label=train_target)
dtest = xgb.DMatrix(test_features, label=test_target)

# 设置XGBoost的参数
params = {
    'max_depth': 5,
    'eta': 0.01,
    'objective': 'binary:logistic',
    'eval_metric': 'error',
    'scale_pos_weight': 10
}

# 训练模型
xgb_classifier = xgb.train(params, dtrain)

# 在测试集上进行预测
xgb_predictions = xgb_classifier.predict(dtest)

# 计算混淆矩阵
test_model(xgb_predictions, test_features, test_target)

Success: 477/543
Accuracy: 0.88
TP: 15, TN: 462, FP: 63, FN: 3
Sensitivity: 0.83, Specificity: 0.88
