In [None]:
import pandas as pd
import shutil
import os
import numpy as np
import matplotlib.pyplot as plt
import onekey_algo.custom.components as okcomp
from onekey_algo import get_param_in_cwd

os.makedirs('img', exist_ok=True)
plt.rcParams['figure.dpi'] = 300
model_names = get_param_in_cwd('compare_model')
# 获取配置
task = get_param_in_cwd('task_column') or 'label'
labelf = get_param_in_cwd('label_file')
results_dir = get_param_in_cwd('results_dir')
group_info = get_param_in_cwd('dataset_column')

# 读取label文件。
labels = [task]
label_data_ = pd.read_csv(labelf)
ids = label_data_['ID']
print(label_data_.columns)
label_data = label_data_[['ID'] + labels]
label_data

# 训练集

In [None]:
import pandas as pd

subset = 'train'
ALL_results = None
for mn in model_names:
    r = pd.read_csv(os.path.join(results_dir, f'{mn}_{subset}.csv'))
    r.columns = ['ID', '-0', mn]
    if ALL_results is None:
        ALL_results = r
    else:
        ALL_results = pd.merge(ALL_results, r, on='ID', how='inner')

ALL_results = pd.merge(ALL_results, label_data, on='ID', how='inner')

ALL_results = ALL_results.dropna(axis=1)
ALL_results

In [None]:
pred_column = [f'{task}-0', f'{task}-1']
gt = [np.array(ALL_results[task]) for d in model_names]
pred_train = [np.array(ALL_results[d]) for d in model_names]
okcomp.comp1.draw_roc(gt, pred_train, labels=model_names, title=f"Model AUC")
plt.savefig(f'img/compare_{subset}_auc.svg')

In [None]:
from onekey_algo.custom.components.metrics import analysis_pred_binary
metric = []
for mname, y, score in zip(model_names, gt, pred_train):
    # 计算验证集指标
    acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres = analysis_pred_binary(y, score)
    ci = f"{ci[0]:.4f} - {ci[1]:.4f}"
    metric.append((mname, acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres, f"Train"))
pd.DataFrame(metric, index=None, columns=['Signature', 'Accuracy', 'AUC', '95% CI', 'Sensitivity', 'Specificity', 
                                          'PPV', 'NPV', 'Precision', 'Recall', 'F1','Threshold', 'Cohort'])

In [None]:
from onekey_algo.custom.components.delong import delong_roc_test
from onekey_algo.custom.components.comp1 import draw_matrix

delong = []
delong_columns = []
this_delong = []
plt.figure(figsize=(5, 4))
cm = np.zeros((len(model_names), len(model_names)))
for i, mni in enumerate(model_names):
    for j, mnj in enumerate(model_names):
        if i <= j:
            cm[i][j] = np.nan
        else:
            cm[i][j] = delong_roc_test(ALL_results[task], ALL_results[mni], ALL_results[mnj])[0][0]
cm = pd.DataFrame(cm[1:, :-1], index=model_names[1:], columns=model_names[:-1])
draw_matrix(cm, annot=True, cmap='jet_r', cbar=True)
plt.title(f'Cohort {subset} Delong')
plt.savefig(f'img/compare_delong_each_cohort_{subset}.svg', bbox_inches = 'tight')
plt.show()

In [None]:
from onekey_algo.custom.components.comp1 import plot_DCA
plot_DCA([ALL_results[mn] for mn in model_names], ALL_results[task], title=f'Model for DCA', labels=model_names, y_min=-0.15)
plt.savefig(f'img/compare_{subset}_dca.svg')

In [None]:
from onekey_algo.custom.components.comp1 import draw_calibration
draw_calibration(pred_scores=pred_train, n_bins=5, y_test=gt, model_names=model_names)
plt.savefig(f'img/compare_{subset}_cali.svg')

In [None]:
from onekey_algo.custom.components import stats

hosmer = []
hosmer.append([stats.hosmer_lemeshow_test(y_true, y_pred, bins=15) 
              for fn, y_true, y_pred in zip(model_names, gt, pred_train)])
pd.DataFrame(hosmer, columns=model_names)

# 测试集

In [None]:
import pandas as pd

subset = 'test'
ALL_results = None
for mn in model_names:
    r = pd.read_csv(os.path.join(results_dir, f'{mn}_{subset}.csv'))
    r.columns = ['ID', '-0', mn]
    if ALL_results is None:
        ALL_results = r
    else:
        ALL_results = pd.merge(ALL_results, r, on='ID', how='inner')

ALL_results = pd.merge(ALL_results, label_data, on='ID', how='inner')

ALL_results = ALL_results.dropna(axis=1)
ALL_results

In [None]:
pred_column = [f'{task}-0', f'{task}-1']
gt = [np.array(ALL_results[task]) for d in model_names]
pred_train = [np.array(ALL_results[d]) for d in model_names]
okcomp.comp1.draw_roc(gt, pred_train, labels=model_names, title=f"Model AUC")
plt.savefig(f'img/compare_{subset}_auc.svg')

In [None]:
from onekey_algo.custom.components.metrics import analysis_pred_binary
for mname, y, score in zip(model_names, gt, pred_train):
    # 计算验证集指标
    acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres = analysis_pred_binary(y, score)
    ci = f"{ci[0]:.4f} - {ci[1]:.4f}"
    metric.append((mname, acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres, f"Test"))
metric = pd.DataFrame(metric, index=None, columns=['Signature', 'Accuracy', 'AUC', '95% CI',
                                                   'Sensitivity', 'Specificity', 
                                                   'PPV', 'NPV', 'Precision', 'Recall', 'F1',
                                                   'Threshold', 'Cohort'])

metric

In [None]:
from onekey_algo.custom.components.delong import delong_roc_test
from onekey_algo.custom.components.comp1 import draw_matrix

delong = []
delong_columns = []
this_delong = []
plt.figure(figsize=(5, 4))
cm = np.zeros((len(model_names), len(model_names)))
for i, mni in enumerate(model_names):
    for j, mnj in enumerate(model_names):
        if i <= j:
            cm[i][j] = np.nan
        else:
            cm[i][j] = delong_roc_test(ALL_results[task], ALL_results[mni], ALL_results[mnj])[0][0]
cm = pd.DataFrame(cm[1:, :-1], index=model_names[1:], columns=model_names[:-1])
draw_matrix(cm, annot=True, cmap='jet_r', cbar=True)
plt.title(f'Cohort {subset} Delong')
plt.savefig(f'img/compare_delong_each_cohort_{subset}.svg', bbox_inches = 'tight')
plt.show()

In [None]:
from onekey_algo.custom.components.comp1 import plot_DCA
plot_DCA([ALL_results[mn] for mn in model_names], ALL_results[task], title=f'Model for DCA', labels=model_names, y_min=-0.15)
plt.savefig(f'img/compare_{subset}_dca.svg')

In [None]:
from onekey_algo.custom.components.comp1 import draw_calibration
draw_calibration(pred_scores=pred_train, n_bins=5, y_test=gt, model_names=model_names)
plt.savefig(f'img/compare_{subset}_cali.svg')

In [None]:
from onekey_algo.custom.components import stats

hosmer.append([stats.hosmer_lemeshow_test(y_true, y_pred, bins=5) 
              for fn, y_true, y_pred in zip(model_names, gt, pred_train)])
pd.DataFrame(hosmer, columns=model_names)