In [None]:
from causalnex.inference import InferenceEngine
from causalnex.network import BayesianNetwork
from sklearn.model_selection import train_test_split, KFold
import xml_read
import networkx as nx
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import log_loss
from var_name import dict_CE
import time
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif']=['SimHei']

In [None]:
def calc_acc(preds, gt):
    assert len(preds) == len(gt)
    right_cnt = 0
    P, N, TP, TN, FP, FN = [0] * 6
    for i in range(len(gt)):
        logits = list(preds[i].values())
        pred_idx = 0 if logits[0] > logits[1] else 1
        if gt[i]:
            P += 1
            if pred_idx == gt[i]:
                TP += 1
            else:
                FN += 1
        else:
            N += 1
            if pred_idx == gt[i]:
                TN += 1
            else:
                FP += 1
    acc = (TP + TN) / (N + P)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    specificity = TN / (FP + TN)
    TN_N = TN/N
    f1 = 2 * precision * recall / (precision + recall)
    return {'accuracy': acc, 'precision': precision, 'recall': recall, 'specificity': specificity, 'f1': f1, 'TN/N':TN_N, 'TP':TP, 'TN':TN}
def calc_logloss(preds, gt,label):
    assert len(preds) == len(gt)
    # ave_loss=0
    total = len(gt)
    logits = [list(d.values()) for d in preds]
    loss = log_loss(gt, logits, labels=label)
    return {'Loss': loss}

In [None]:
file_name = 'causal network'
graph_structure = xml_read.parse_xml_sm(file_name+'.xml')
data = pd.read_csv('Structural data_coarse-grained.csv')


In [None]:
variables = data.columns.values.tolist()
avg_accuracy = 0
avg_precision = 0
avg_recall = 0
avg_specificity = 0
avg_f1 = 0
avg_loss = 0
# Kflod交叉验证
num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=79)
start_time = time.perf_counter()
for fold_idx, (train_idx, test_idx) in enumerate(kf.split(data)):
    train, test = data.iloc[train_idx], data.iloc[test_idx]
    bn = BayesianNetwork(graph_structure)
    bn = bn.fit_node_states(data)
    bn_cpd = bn.fit_cpds(train, method="BayesianEstimator", bayes_prior="K2")
    ie = InferenceEngine(bn_cpd)
    calc_nodes0 = bn_cpd.nodes
    print(calc_nodes0)
    calc_nodes0.remove('AMI')
    #Validatoin A, B, C
    #Markov boundary
    calc_nodes1=['ALB', 'ChestPain','Dyspnea','HGB','Numbness', 'STWA', 'TWA','CVD', 'CD', 'DYS', 'Gender','Lifestyle'];
    #validation A
    calc_nodes=calc_nodes0
    #calc_nodes=calc_nodes1 #validation B
    #calc_nodes=[x for x in calc_nodes0 if x not in calc_nodes1];#validation C
    
    data_calc = test.loc[:, calc_nodes]
    gt = list(test.loc[:, 'AMI'])
    list_calc = []
    for idx in range(data_calc.shape[0]):
        row = data_calc.iloc[idx]
        list_calc.append(row.to_dict())

    marginal_condition = ie.query(list_calc)
    ami_preds = [one_pred['AMI'] for one_pred in marginal_condition]
    loss = calc_logloss(ami_preds, gt, [0,1])
    print(loss)
    avg_loss += loss.get('Loss')
    acc = calc_acc(ami_preds, gt)
    avg_accuracy += acc.get('accuracy')
    avg_precision += acc.get('precision')
    avg_recall += acc.get('recall')
    avg_specificity += acc.get('specificity')
    avg_f1 += acc.get('f1')

    print(f'Fold {fold_idx + 1}: {acc}')

print(file_name, "total: accuracy", avg_accuracy / num_folds, 'precision', avg_precision / num_folds, 'recall', avg_recall / num_folds, 'specificity', avg_specificity / num_folds,'f1', avg_f1 / num_folds)
print(file_name, "total: log loss", avg_loss / num_folds)

