In [12]:
import matplotlib.pyplot as plt
from sklearn.metrics import brier_score_loss
from sklearn.calibration import CalibrationDisplay
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [13]:
file_path = 'D:\\ml\\DQ\\0904.xlsx'  
data = pd.read_excel(file_path)
# 分离特征和目标变量
X = data.drop('death', axis=1)
y = data['death']
# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 数据标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 初始化模型
logreg = LogisticRegression(max_iter=1000)
rf = RandomForestClassifier(random_state=42)
gb = GradientBoostingClassifier(random_state=42)
svm = SVC(probability=True, random_state=42)

# 训练模型
logreg.fit(X_train_scaled, y_train)
rf.fit(X_train_scaled, y_train)
gb.fit(X_train_scaled, y_train)
svm.fit(X_train_scaled, y_train)

In [2]:
# Load the dataset
file_path = 'D:\\ml\\DQ\\0904.xlsx' 
data = pd.read_excel(file_path)

# Splitting the data into features and target variable
X = data.drop('death', axis=1)
y = data['death']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the logistic regression model
log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train, y_train)
log_reg_probs = log_reg.predict_proba(X_test)[:, 1]

# Initialize and train the random forest model
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
rf_probs = rf.predict_proba(X_test)[:, 1]

# Initialize and calibrate the SVM model
svm = SVC(probability=False, random_state=42)
calibrated_svm = CalibratedClassifierCV(svm, method='sigmoid', cv=5)
calibrated_svm.fit(X_train, y_train)
svm_probs = calibrated_svm.predict_proba(X_test)[:, 1]

# Initialize and train the gradient boosting model
gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)
gb_probs = gb.predict_proba(X_test)[:, 1]

In [14]:
# 训练模型
models = {
    'Logistic Regression': logreg,
    'Random Forest': rf,
    'Gradient Boosting': gb,
    'SVM': svm
}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    predictions = model.predict(X_test_scaled)
    probabilities = model.predict_proba(X_test_scaled)[:, 1]

    # 计算性能指标
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    auc = roc_auc_score(y_test, probabilities)
    brier = brier_score_loss(y_test, probabilities)

    print(f"Model: {name}")
    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, AUC: {auc:.4f}, Brier Score: {brier:.4f}\n")

Model: Logistic Regression
Accuracy: 0.8537, Precision: 1.0000, Recall: 0.6471, F1 Score: 0.7857, AUC: 0.9142, Brier Score: 0.1008

Model: Random Forest
Accuracy: 0.9024, Precision: 0.8824, Recall: 0.8824, F1 Score: 0.8824, AUC: 0.9804, Brier Score: 0.0715

Model: Gradient Boosting
Accuracy: 0.8780, Precision: 0.8750, Recall: 0.8235, F1 Score: 0.8485, AUC: 0.9387, Brier Score: 0.0951

Model: SVM
Accuracy: 0.8780, Precision: 0.9286, Recall: 0.7647, F1 Score: 0.8387, AUC: 0.9608, Brier Score: 0.0737



In [15]:
from sklearn.metrics import matthews_corrcoef

# 预测
logreg_preds = logreg.predict(X_test_scaled)
rf_preds = rf.predict(X_test_scaled)
gb_preds = gb.predict(X_test_scaled)
svm_preds = svm.predict(X_test_scaled)

# 计算MCC分数
mcc_logreg = matthews_corrcoef(y_test, logreg_preds)
mcc_rf = matthews_corrcoef(y_test, rf_preds)
mcc_gb = matthews_corrcoef(y_test, gb_preds)
mcc_svm = matthews_corrcoef(y_test, svm_preds)

# 输出MCC分数
print("Logistic Regression MCC:", mcc_logreg)
print("Random Forest MCC:", mcc_rf)
print("Gradient Boosting MCC:", mcc_gb)
print("SVM MCC:", mcc_svm)

Logistic Regression MCC: 0.7194769341845015
Random Forest MCC: 0.7990196078431373
Gradient Boosting MCC: 0.747561394947389
SVM MCC: 0.7511834906203398
