###  y标签0，1，2，3

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from tqdm import tqdm

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'WenQuanYi Micro Hei', 'Heiti TC']
plt.rcParams['axes.unicode_minus'] = False

# 设置随机种子函数
def set_random_seed(seed):
    np.random.seed(seed)
    import random
    random.seed(seed)

# 训练和评估函数
def train_and_evaluate(seed):
    set_random_seed(seed)
    
    # 划分训练集和测试集，使用固定的random_state确保数据划分一致
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42)
    
    # 使用当前seed
    clf = LogisticRegression(
        random_state=seed     # 控制随机初始化
    )
    
    # 训练模型并显示进度
    print(f"运行{seed} 训练LR模型...")
    
    # 使用tqdm显示训练进度（模拟）
    with tqdm(total=100, desc=f"运行{seed} 训练进度") as pbar:
        clf.fit(X_train, y_train)
        pbar.update(100)
    
    # 预测
    y_pred = clf.predict(X_test)
    y_pred_proba = clf.predict_proba(X_test)
    
    # 计算weighted指标
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
    from sklearn.preprocessing import label_binarize
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # 计算weighted AUC
    y_test_bin = label_binarize(y_test, classes=list(range(num_classes)))
    auc_weighted = roc_auc_score(y_test_bin, y_pred_proba, average='weighted', multi_class='ovr')
    
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'auc': auc_weighted,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba,
        'y_test': y_test,
        'seed': seed
    }

# 读取数据
X = pd.read_csv("E:/comorbidity/models/multi_features/all_features.csv", encoding='utf-8-sig')  
y = pd.read_excel("E:/comorbidity/models/multi_features/Y.xlsx")

# 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 标签编码
y = y['标签']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)

# 运行10次实验
print("开始10次独立实验...")
results = []
for i in range(10):
    print(f"运行第{i+1}次实验 (seed={42+i})...")
    result = train_and_evaluate(42+i)
    results.append(result)
    print(f"第{i+1}次: 准确率={result['accuracy']:.4f}, F1={result['f1']:.4f}")

# 计算统计结果
metrics = ['accuracy', 'precision', 'recall', 'f1', 'auc']
stats = {}

for metric in metrics:
    values = [r[metric] for r in results]
    stats[metric] = {
        'mean': np.mean(values),
        'std': np.std(values),
        'values': values
    }

# 输出最终结果
print("\n" + "="*50)
print("最终结果 (平均值±标准差)")
print("="*50)
print(f"准确率: {stats['accuracy']['mean']:.4f} ± {stats['accuracy']['std']:.4f}")
print(f"精准率: {stats['precision']['mean']:.4f} ± {stats['precision']['std']:.4f}")
print(f"召回率: {stats['recall']['mean']:.4f} ± {stats['recall']['std']:.4f}")
print(f"F1值: {stats['f1']['mean']:.4f} ± {stats['f1']['std']:.4f}")
print(f"AUC值: {stats['auc']['mean']:.4f} ± {stats['auc']['std']:.4f}")

# 保存最后一次实验的结果用于后续可视化
final_result = results[-1]
y_pred = final_result['y_pred']
y_pred_proba = final_result['y_pred_proba']
y_test = final_result['y_test']

开始10次独立实验...
运行第1次实验 (seed=42)...
运行42 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行42 训练进度: 100%|██████████| 100/100 [00:04<00:00, 20.31it/s]


第1次: 准确率=0.8159, F1=0.8111
运行第2次实验 (seed=43)...
运行43 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行43 训练进度: 100%|██████████| 100/100 [00:04<00:00, 21.46it/s]


第2次: 准确率=0.8159, F1=0.8111
运行第3次实验 (seed=44)...
运行44 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行44 训练进度: 100%|██████████| 100/100 [00:04<00:00, 21.80it/s]


第3次: 准确率=0.8159, F1=0.8111
运行第4次实验 (seed=45)...
运行45 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行45 训练进度: 100%|██████████| 100/100 [00:04<00:00, 23.16it/s]


第4次: 准确率=0.8159, F1=0.8111
运行第5次实验 (seed=46)...
运行46 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行46 训练进度: 100%|██████████| 100/100 [00:04<00:00, 23.23it/s]


第5次: 准确率=0.8159, F1=0.8111
运行第6次实验 (seed=47)...
运行47 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行47 训练进度: 100%|██████████| 100/100 [00:04<00:00, 22.67it/s]


第6次: 准确率=0.8159, F1=0.8111
运行第7次实验 (seed=48)...
运行48 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行48 训练进度: 100%|██████████| 100/100 [00:04<00:00, 23.02it/s]


第7次: 准确率=0.8159, F1=0.8111
运行第8次实验 (seed=49)...
运行49 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行49 训练进度: 100%|██████████| 100/100 [00:04<00:00, 23.05it/s]


第8次: 准确率=0.8159, F1=0.8111
运行第9次实验 (seed=50)...
运行50 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行50 训练进度: 100%|██████████| 100/100 [00:04<00:00, 22.80it/s]


第9次: 准确率=0.8159, F1=0.8111
运行第10次实验 (seed=51)...
运行51 训练LR模型...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
运行51 训练进度: 100%|██████████| 100/100 [00:04<00:00, 22.96it/s]

第10次: 准确率=0.8159, F1=0.8111

最终结果 (平均值±标准差)
准确率: 0.8159 ± 0.0000
精准率: 0.8105 ± 0.0000
召回率: 0.8159 ± 0.0000
F1值: 0.8111 ± 0.0000
AUC值: 0.9242 ± 0.0000





In [2]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from tqdm import tqdm

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'WenQuanYi Micro Hei', 'Heiti TC']
plt.rcParams['axes.unicode_minus'] = False

# 设置随机种子函数
def set_random_seed(seed):
    np.random.seed(seed)
    import random
    random.seed(seed)

# 训练和评估函数
def train_and_evaluate(seed):
    set_random_seed(seed)
    
    # 划分训练集和测试集，使用固定的random_state确保数据划分一致
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42)
    
    # 使用当前seed
    clf = tree.DecisionTreeClassifier(random_state=seed)
    
    # 训练模型并显示进度
    print(f"运行{seed} 训练DT模型...")
    
    # 使用tqdm显示训练进度（模拟）
    with tqdm(total=100, desc=f"运行{seed} 训练进度") as pbar:
        clf.fit(X_train, y_train)
        pbar.update(100)
    
    # 预测
    y_pred = clf.predict(X_test)
    y_pred_proba = clf.predict_proba(X_test)
    
    # 计算weighted指标
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
    from sklearn.preprocessing import label_binarize
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # 计算weighted AUC
    y_test_bin = label_binarize(y_test, classes=list(range(num_classes)))
    auc_weighted = roc_auc_score(y_test_bin, y_pred_proba, average='weighted', multi_class='ovr')
    
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'auc': auc_weighted,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba,
        'y_test': y_test,
        'seed': seed
    }

# 读取数据
X = pd.read_csv("E:/comorbidity/models/multi_features/all_features.csv", encoding='utf-8-sig')  
y = pd.read_excel("E:/comorbidity/models/multi_features/Y.xlsx")

# 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 标签编码
y = y['标签']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)

# 运行10次实验
print("开始10次独立实验...")
results = []
for i in range(10):
    print(f"运行第{i+1}次实验 (seed={42+i})...")
    result = train_and_evaluate(42+i)
    results.append(result)
    print(f"第{i+1}次: 准确率={result['accuracy']:.4f}, F1={result['f1']:.4f}")

# 计算统计结果
metrics = ['accuracy', 'precision', 'recall', 'f1', 'auc']
stats = {}

for metric in metrics:
    values = [r[metric] for r in results]
    stats[metric] = {
        'mean': np.mean(values),
        'std': np.std(values),
        'values': values
    }

# 输出最终结果
print("\n" + "="*50)
print("最终结果 (平均值±标准差)")
print("="*50)
print(f"准确率: {stats['accuracy']['mean']:.4f} ± {stats['accuracy']['std']:.4f}")
print(f"精准率: {stats['precision']['mean']:.4f} ± {stats['precision']['std']:.4f}")
print(f"召回率: {stats['recall']['mean']:.4f} ± {stats['recall']['std']:.4f}")
print(f"F1值: {stats['f1']['mean']:.4f} ± {stats['f1']['std']:.4f}")
print(f"AUC值: {stats['auc']['mean']:.4f} ± {stats['auc']['std']:.4f}")

# 保存最后一次实验的结果用于后续可视化
final_result = results[-1]
y_pred = final_result['y_pred']
y_pred_proba = final_result['y_pred_proba']
y_test = final_result['y_test']

开始10次独立实验...
运行第1次实验 (seed=42)...
运行42 训练DT模型...


运行42 训练进度: 100%|██████████| 100/100 [02:47<00:00,  1.68s/it]


第1次: 准确率=0.6810, F1=0.6822
运行第2次实验 (seed=43)...
运行43 训练DT模型...


运行43 训练进度: 100%|██████████| 100/100 [02:44<00:00,  1.64s/it]


第2次: 准确率=0.6802, F1=0.6816
运行第3次实验 (seed=44)...
运行44 训练DT模型...


运行44 训练进度: 100%|██████████| 100/100 [02:41<00:00,  1.62s/it]


第3次: 准确率=0.6805, F1=0.6820
运行第4次实验 (seed=45)...
运行45 训练DT模型...


运行45 训练进度: 100%|██████████| 100/100 [02:48<00:00,  1.69s/it]


第4次: 准确率=0.6814, F1=0.6828
运行第5次实验 (seed=46)...
运行46 训练DT模型...


运行46 训练进度: 100%|██████████| 100/100 [02:58<00:00,  1.79s/it]


第5次: 准确率=0.6813, F1=0.6827
运行第6次实验 (seed=47)...
运行47 训练DT模型...


运行47 训练进度: 100%|██████████| 100/100 [02:47<00:00,  1.68s/it]


第6次: 准确率=0.6800, F1=0.6816
运行第7次实验 (seed=48)...
运行48 训练DT模型...


运行48 训练进度: 100%|██████████| 100/100 [02:41<00:00,  1.62s/it]


第7次: 准确率=0.6820, F1=0.6834
运行第8次实验 (seed=49)...
运行49 训练DT模型...


运行49 训练进度: 100%|██████████| 100/100 [02:41<00:00,  1.62s/it]


第8次: 准确率=0.6804, F1=0.6824
运行第9次实验 (seed=50)...
运行50 训练DT模型...


运行50 训练进度: 100%|██████████| 100/100 [02:49<00:00,  1.70s/it]


第9次: 准确率=0.6838, F1=0.6851
运行第10次实验 (seed=51)...
运行51 训练DT模型...


运行51 训练进度: 100%|██████████| 100/100 [02:49<00:00,  1.69s/it]

第10次: 准确率=0.6802, F1=0.6815

最终结果 (平均值±标准差)
准确率: 0.6811 ± 0.0011
精准率: 0.6842 ± 0.0010
召回率: 0.6811 ± 0.0011
F1值: 0.6825 ± 0.0010
AUC值: 0.7127 ± 0.0009



