In [None]:
#This is the code for diesease pre-diagnosis using the Ensemble learning-DNN approach.
#This code is only avaliable for Ensemble learning-C(num_labels,K<num_labels-1)
import numpy as np
import pandas as pd
import random
import itertools
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import warnings
matplotlib.rcParams['font.sans-serif']=['SimHei']
matplotlib.rcParams['axes.unicode_minus']= False
matplotlib.rcParams['font.size']= 11
from pandas import DataFrame
from sklearn.ensemble import VotingClassifier
from sklearn import metrics
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
from sklearn.metrics import RocCurveDisplay, roc_curve, auc, roc_auc_score
from sklearn.metrics import hamming_loss, f1_score, precision_score, precision_recall_curve, recall_score
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.model_selection import train_test_split, KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.utils import resample

#Ensemble learning-C(num_labels,K<num_labels-1)
K = 3 #K=2,3,4,...,num_labels-2
# This section imports the data. 
filepath = "C:\\Users\\PS\\Desktop\\Open code\\Demo.csv" #Set the user's own path
df = pd.read_csv(filepath, header=0, encoding="gbk")
X1 = df.drop(["Pancreas disease", "Biliary tract disease", "Gastroduodenal disease", "Intestinal tract disease", "liver disease", "others"], axis=1)
y = df[["Pancreas disease", "Intestinal tract disease", "liver disease", "Biliary tract disease", "Gastroduodenal disease", "others"]]

# This section generates all possible C(num_labels,K) combinations. 
labels = y.columns.tolist()
num_labels = y.shape[1]
#Use itertools.combinations to generate all possible combinations of K subclasses.
combinations = list(itertools.combinations(labels, K))
#N-fold cross-validation
N = 5
kf = KFold(n_splits=N, shuffle=True, random_state=3)

#逻辑回归
classifier1= LogisticRegression(solver='lbfgs', penalty='l2', dual=False, tol=1e-3, C=1.0, fit_intercept=True,
                                intercept_scaling=1, class_weight='balanced', random_state=None,
                                max_iter=100, verbose=0, warm_start=False, n_jobs=-1)

#weight_Class reflects the class weights.
def weight_Class (y):
    # Calculate weight of each subclass.
    # Initialize weight dictionary.
    class_weights = {}
    # Iterate through each column (label)
    for i, column in enumerate(y.columns):
        # Calculate the frequency of each class
        counts = np.bincount(y[column].astype(int), minlength=2)
        # Calculate the weights, avoiding division by zero.
        total_samples = len(y)
        weights = total_samples / counts
        weights[counts == 0] = 0  # avoiding division by zero
        #Store the weights in a dictionary with class labels as keys and weights as values.
        class_weights[column] = dict({0: weights[0], 1: weights[1]})
        #Create a weight array for each sample in the training set.
    pos_weights = [class_weights[category][1] for category in class_weights]

    return pos_weights


#Label Co-occurrence Adjustment Layer
class FeatureAdjustmentLayer(nn.Module):
    def __init__(self, hidden_size, num_labels, co_occurrence_matrix):
        super(FeatureAdjustmentLayer, self).__init__()
        self.co_occurrence_matrix = nn.Parameter(torch.from_numpy(co_occurrence_matrix).float(), requires_grad=False)
        #Define a linear layer to map from hidden_size to num_labels.
        self.map_to_labels = nn.Linear(hidden_size, num_labels)

    def forward(self, x):
        # Map the hidden layer features to a dimension equal to the number of labels.
        mapped_features = self.map_to_labels(x)  # [batch_size, num_labels]
        # Adjust the features using the co-occurrence matrix.
        # Note: Ensure that the dimensions of mapped_features and co_occurrence_matrix are compatible.
        adjusted_with_cooccurrence = torch.matmul(mapped_features, self.co_occurrence_matrix)  # [batch_size, num_labels]
        return adjusted_with_cooccurrence

#DNN for MultiLabelClassification。
class MultiLabelClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_labels, co_occurrence_matrix):
        super(MultiLabelClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.adjustment_layer = FeatureAdjustmentLayer(hidden_size, num_labels, co_occurrence_matrix)
        self.fc3 = nn.Linear(num_labels, num_labels)  #Ensure the input dimension here is num_labels.

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.adjustment_layer(x)
        output = torch.sigmoid(self.fc3(x))  #Ensure that the input dimension of self.fc3 matches the dimension of x.
        return output

#This function defines the basic method of model training。
def Fit_MLP(X, y, pos_weights, co_occurrence_matrix, num_epochs, hidden_size=64, learning_rate=0.01):
    #device set
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    pos_weights_tensor = torch.tensor(pos_weights, device=device)
    X_Ten = torch.tensor(X.values, dtype=torch.float32).to(device)
    y_Ten = torch.tensor(y.values, dtype=torch.float32).to(device)
    dataset = TensorDataset(X_Ten, y_Ten)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    #Initialize the model, ensuring that the co_occurrence_matrix has been converted to a format suitable for the model.
    model = MultiLabelClassifier(X.shape[1], hidden_size, y.shape[1], co_occurrence_matrix).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weights_tensor)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
   
    for epoch in range(num_epochs):
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    return model

#This is a self-defined GRIDSEARCH function
def GRIDSEARCH (X_train, y_train, pos_weights, co_occurrence_matrix):
    #Define the parameter grid.
    param_grid = { 'hidden_size': [8, 16, 32], 'learning_rate': [0.01, 0.05], 'num_epochs': [10, 20, 30]}
    best_clf = None
    best_score = np.inf
    best_params = {}
    for hidden_size in param_grid['hidden_size']:
        for lr in param_grid['learning_rate']:
            for epochs in param_grid['num_epochs']:
                model = Fit_MLP(X_train, y_train, pos_weights, co_occurrence_matrix, epochs, hidden_size, lr)
                y_pred_proba, y_pred = predict_Multilabel(model, X_train)
                y_true = y_train.values
                score = np.sum(y_true != y_pred) #Calculate the unnormalized Hamming distance, which is the measure metric here.
                if score < best_score:
                    best_score = score
                    best_params = {'hidden_size': hidden_size, 'learning_rate': lr, 'num_epochs': epochs}
                    best_clf = model
                    
    return best_clf

#This function defines the basic method for multi-label prediction.
def predict_Multilabel(classifier, X):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    classifier = classifier.to(device)
    X_Ten = torch.tensor(X.values, dtype=torch.float32).to(device)
    dataset = TensorDataset(X_Ten)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
    classifier.eval()
    y_pred_proba_combined = []
    with torch.no_grad():
         for batch in dataloader:
                inputs = batch[0].to(device)
                probabilities = classifier(inputs)
                y_pred_proba_combined.append(probabilities.cpu().numpy())
    
    y_pred_proba_combined = np.concatenate(y_pred_proba_combined)
    binary_vector = (y_pred_proba_combined > 0.5).astype(int)
    return y_pred_proba_combined, binary_vector

#This function defines the basic method for multi-label prediction with voting.
def predict_with_rfecv(K, classifiers, df, num_labels):
    #C(num_labels,K)
    base_labels=K
    residule_labels = num_labels - base_labels 
    num_rows = len(df) 
    vote_results_list = []  # Store the voting results for each row.

    for index, row in df.iterrows():  #Iterate through each row of df.
        predictions = np.zeros((len(classifiers), base_labels), dtype=int)
        predictions_R = np.zeros((len(classifiers), base_labels + residule_labels), dtype=int)
        #Store the voting results for each label.
        vote_results = np.zeros((base_labels + residule_labels), dtype=int)
        
        for i, (clf, support, remaining_label_index) in enumerate(classifiers):
            #Apply the features selected by the RFECV operators.
            X_selected = row[support].values.reshape(1, -1)  #Select the features in the row.
            X_selected_d = pd.DataFrame (X_selected)
            # Use the corresponding binary classifier to make predictions.
            clf_predictions_proba, clf_predictions = predict_Multilabel(clf, X_selected_d)
            # The predict_Multilabel function returns a one - dimensional array that contains all the label predictions from the current classifier.
            predictions[i, :] = clf_predictions
            m=0
            for idx in remaining_label_index:
                predictions_R[i, idx] = clf_predictions[0, m]
                m=m+1
                
        # Conduct voting to obtain the final results.
        for j in range(base_labels + residule_labels):
            votes = np.sum(predictions_R[:, j])
            vote_results[j] = 1 if votes > (len(classifiers1)-4) / 2 else 0 #Slightly lower the voting threshold to suit multi-label classification.
            
        vote_results_list.append(vote_results)  # Add the voting results of the current row to the list.
        predictions = []
        predictions_R =[]
        
    return np.array(vote_results_list)  # Return the voting results for all rows.

#Validation funciton with bootstrap - for recall, specificity, accuracy, precision, and F1
def predict_with_rfecv_bootstrap(K, classifiers, df, y_test, n_bootstrap):
    #Storage space for results
    bootstrap_stats= {
        'recall': [],
        'accuracy': [],
        'precision': [],  
        'F1': [],
        'han':[]   
    }
    bootstrap_stats_class1= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class2={
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class3= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class4= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class5= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class6= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    num_rows = len(df)  
    num_labels = y.shape[1]
    f1_vals = np.zeros(n_bootstrap, dtype=float)
    precision_vals = np.zeros(n_bootstrap, dtype=float)
    accuracy_vals = np.zeros(n_bootstrap, dtype=float)
    recall_vals = np.zeros(n_bootstrap, dtype=float)
    n_classes = y_test.shape[1]
    
    for b in range(n_bootstrap):
        indices = resample(df.index, n_samples=num_rows, replace=True, random_state=b)
        df_bootstrap = df.loc[indices]
        y_test_bootstrap = y_test.loc[indices]
        predictions = predict_with_rfecv(K, classifiers, df_bootstrap, num_labels)  
        warnings.filterwarnings('ignore', category=UserWarning)
        
        hamming_loss_val = hamming_loss(y_test_bootstrap, predictions)
        f1_val = f1_score(y_test_bootstrap, predictions, average='micro')
        precision_val = precision_score(y_test_bootstrap, predictions, average='micro')
        accuracy_val = accuracy_score(y_test_bootstrap, predictions)
        recall_val = recall_score(y_test_bootstrap, predictions, average='micro')
        bootstrap_stats['recall'].append(recall_val)
        bootstrap_stats['precision'].append(precision_val)
        bootstrap_stats['accuracy'].append(accuracy_val)
        bootstrap_stats['F1'].append(f1_val)
        bootstrap_stats['han'].append(hamming_loss_val)
        
        # Calculate the multi - label confusion matrix.
        mcm = multilabel_confusion_matrix(y_test_bootstrap, predictions)
        #  Compute and store the precision, recall, and F1 score for each subclass.
        precision = precision_score(y_test_bootstrap, predictions, average=None)
        recall = recall_score(y_test_bootstrap, predictions, average=None)
        f1 = f1_score(y_test_bootstrap, predictions, average=None)

        bootstrap_stats_class1['precision'].append(precision[0])
        bootstrap_stats_class2['precision'].append(precision[1])
        bootstrap_stats_class3['precision'].append(precision[2])
        bootstrap_stats_class4['precision'].append(precision[3])
        bootstrap_stats_class5['precision'].append(precision[4])
        bootstrap_stats_class6['precision'].append(precision[5])
        bootstrap_stats_class1['recall'].append(recall[0])
        bootstrap_stats_class2['recall'].append(recall[1])
        bootstrap_stats_class3['recall'].append(recall[2])
        bootstrap_stats_class4['recall'].append(recall[3])
        bootstrap_stats_class5['recall'].append(recall[4])
        bootstrap_stats_class6['recall'].append(recall[5])
        bootstrap_stats_class1['F1'].append(f1[0])
        bootstrap_stats_class2['F1'].append(f1[1])
        bootstrap_stats_class3['F1'].append(f1[2])
        bootstrap_stats_class4['F1'].append(f1[3])
        bootstrap_stats_class5['F1'].append(f1[4])
        bootstrap_stats_class6['F1'].append(f1[5])

        #Calculate and store the accuracy for each subclass.
        accuracies = []
        for i in range(n_classes):
            # Extract the true and predicted labels for the i-th class.
            y_true_class = y_test_bootstrap.values[:, i]
            y_pred_class = predictions[:, i]  # Use binary labels for the i-th class.
            # Calculate the accuracy for the i-th class.
            accuracy_class = accuracy_score(y_true_class, y_pred_class)
            if i==0:
                bootstrap_stats_class1['accuracy'].append(accuracy_class)
            elif i==1:
                bootstrap_stats_class2['accuracy'].append(accuracy_class)
            elif i==2:
                bootstrap_stats_class3['accuracy'].append(accuracy_class)
            elif i==3:
                bootstrap_stats_class4['accuracy'].append(accuracy_class)
            elif i==4:
                bootstrap_stats_class5['accuracy'].append(accuracy_class)
            elif i==5:
                bootstrap_stats_class6['accuracy'].append(accuracy_class)
       
        # Calculate and store the specificity for each class
        specificities = []
        for i in range(n_classes):
            #True Negatives = Sum of diagonal elements - True Positives of the current class.
            true_negatives = np.sum(mcm[:, 0, 0]) - mcm[i, 0, 0]
            # False Positives = Sum of the elements in current row - True Positives.
            false_positives = np.sum(mcm[i, 0, 1])
            # Calculate specificities
            specificity = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
            if i==0:
                bootstrap_stats_class1['specificity'].append(specificity)
            elif i==1:
                bootstrap_stats_class2['specificity'].append(specificity)
            elif i==2:
                bootstrap_stats_class3['specificity'].append(specificity)
            elif i==3:
                bootstrap_stats_class4['specificity'].append(specificity)
            elif i==4:
                bootstrap_stats_class5['specificity'].append(specificity)
            elif i==5:
                bootstrap_stats_class6['specificity'].append(specificity)
    
    return bootstrap_stats, bootstrap_stats_class1, bootstrap_stats_class2, bootstrap_stats_class3, bootstrap_stats_class4, bootstrap_stats_class5, bootstrap_stats_class6


#Validation funciton with bootstrap - only for AUROC and AUPRC
def predict_with_rfecv_bootstrap_RPC(K, classifiers, df, y, n_bootstrap):
    #Storage space for results
    bootstrap_stats = {
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class1= {
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class2={
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class3= {
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class4={
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class5= {
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class6= {
        'auc': [],
        'prc': [],
    }

    num_labels = K 
    n_classes = y.shape[1]
    num_rows = len(df) 
    # Store the prediction results for all Bootstrap samples.
    all_predictions_proba = np.zeros((num_rows, n_classes))
    for b in range(n_bootstrap):
        #Bootstrap resampling allows for the selection of samples with replacement.
        indices = resample(df.index, n_samples= num_rows, replace=True, random_state= b)
        df_bootstrap = df.loc[indices].reset_index(drop=True)
        y_test = y.loc[indices].reset_index(drop=True)
        sample_labels_V =y_test.values

        for index, row in df_bootstrap.iterrows():  # Iterate through each row of df_bootstrap.
            predictions_R_prob = np.zeros((len(classifiers), n_classes), dtype=int)
        
            for i, (clf, support, remaining_label_index) in enumerate(classifiers):
                # Apply the features selected by the RFECV operators.
                X_selected = row[support].values.reshape(1, -1) 
                X_selected_d = pd.DataFrame (X_selected)
                # Use the corresponding binary classifier to make predictions.
                clf_predictions_proba, clf_predictions = predict_Multilabel(clf, X_selected_d)
                #The predict_Multilabel function returns a one-dimensional array containing all the label predictions from the current classifier.
                m=0
                for idx in remaining_label_index:
                    predictions_R_prob[i, idx] = clf_predictions_proba[0, m]
                    m=m+1
            # or column_means = np.array([np.mean(predictions_R_prob[:, i][predictions_R_prob[:, i] != 0]) for i in range(predictions_R_prob.shape[1])])
            # The combination number C(5,2) equals 10. In other cases it is the value of the combination numberC(num_labels-1,K-1)
            column_means=np.sum(predictions_R_prob, axis=0)/10 
            # Store the prediction probabilities for the current dataset.
            all_predictions_proba[index, :] = column_means

        # Calculate Micro-AUROC
        micro_auc = roc_auc_score(sample_labels_V.ravel(), all_predictions_proba.ravel(), average='micro')
        precisionq, recallq, _ = precision_recall_curve(sample_labels_V.ravel(), all_predictions_proba.ravel())
        # Calculate Micro-AUPRC
        micro_prc_auc = auc(recallq, precisionq)
        bootstrap_stats['auc'].append(micro_auc)
        bootstrap_stats['prc'].append(micro_prc_auc)
        
        fprs = dict()
        tprs = dict()
        roc_aucs = dict()
        precisions = dict()
        recalls = dict()
        pr_aucs = dict()
        # Calculate the AUROC for each class.
        for i in range(n_classes):
            fprs[i], tprs[i], _ = roc_curve(sample_labels_V[:, i], all_predictions_proba[:, i])
            roc_aucs[i] = auc(fprs[i], tprs[i])
            if i==0:
                bootstrap_stats_class1['auc'].append(roc_aucs[0])
            elif i==1:
                bootstrap_stats_class2['auc'].append(roc_aucs[1])
            elif i==2:
                bootstrap_stats_class3['auc'].append(roc_aucs[2])
            elif i==3:
                bootstrap_stats_class4['auc'].append(roc_aucs[3])
            elif i==4:
                bootstrap_stats_class5['auc'].append(roc_aucs[4])
            elif i==5:
                bootstrap_stats_class6['auc'].append(roc_aucs[5])
        # Calculate the AUPRC for each class.
        for i in range(n_classes):
            precisions[i], recalls[i], _ = precision_recall_curve(sample_labels_V[:, i], all_predictions_proba[:, i])
            pr_aucs[i] = auc(recalls[i], precisions[i])
            if i==0:
                bootstrap_stats_class1['prc'].append(pr_aucs[i])
            elif i==1:
                bootstrap_stats_class2['prc'].append(pr_aucs[i])
            elif i==2:
                bootstrap_stats_class3['prc'].append(pr_aucs[i])
            elif i==3:
                bootstrap_stats_class4['prc'].append(pr_aucs[i])
            elif i==4:
                bootstrap_stats_class5['prc'].append(pr_aucs[i])
            elif i==5:
                bootstrap_stats_class6['prc'].append(pr_aucs[i])

    return bootstrap_stats, bootstrap_stats_class1, bootstrap_stats_class2, bootstrap_stats_class3, bootstrap_stats_class4, bootstrap_stats_class5, bootstrap_stats_class6

#Automatically identify continuous and binary variables.
continuous_vars = []
binary_vars = []
for col in X1.columns:
# Automatically identify binary variables by iterating through each column in the dataset. 
#If a column is numeric, has exactly two unique values, and those values are 0 and 1, it is considered a binary variable.
    if X1[col].dtype.kind in 'biufc' and X1[col].nunique() == 2 and set(X1[col].unique()) == {0, 1}:
       binary_vars.append(col)
    # Otherwise, it is considered a continuous variable.
    else:
       continuous_vars.append(col)
    # Define variable groups.
    groups = {
        'Continuous': continuous_vars,
         'Binary': binary_vars
    }

# Apply logarithmic transformation and standardization to continuous variables.
log_X1 = X1.copy()
log_X1[continuous_vars] = np.log1p(X1[continuous_vars])  #Apply logarithmic transformation to continuous variables.
scaler = MinMaxScaler()
X_scaled = log_X1.copy()
X_scaled[continuous_vars] = scaler.fit_transform(X_scaled[continuous_vars])  #Apply standardization to continuous variables.

# Perform train - test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.20, random_state=5)

# Initialize the lists for classifiers and supports for each label.
classifiers1 = []
label_supports = []

#Perform RFECV feature selection separately for each category label.
for j in range(y_train.shape[1]):
    rfecv = RFECV(estimator=classifier1, step=1, cv=5, scoring='accuracy', n_jobs=-1)
    rfecv.fit(X_train, y_train.iloc[:, j])
    label_supports.append(rfecv.support_)

# Iterate through all combinations of C(num_labels,K) categories.
for combo in combinations:
    remaining_label_index = [i for i, label in enumerate(labels) if label in combo]
    subset = pd.concat([X_train, y_train.loc[:, combo]], axis=1)
    filtered_subset = subset[y_train.loc[:, combo].sum(axis=1) > 0]
    columns_to_drop = [col for col in combo if col in filtered_subset.columns]
    X1_train = filtered_subset.drop(columns_to_drop, axis=1)
    y1_train = filtered_subset.loc[:, combo]

    #Combine the support features of all labels involved in the current combination.
    combo_support = np.logical_or.reduce([label_supports[j] for j in range(y_train.shape[1]) if labels[j] in combo])

    best_clf = None
    best_accuracy = 1.0
    for train_index, test_index in kf.split(filtered_subset):
        X_T_train, X_T_test = X1_train.iloc[train_index], X1_train.iloc[test_index]
        y_T_train, y_T_test = y1_train.iloc[train_index], y1_train.iloc[test_index]
        # Use the combined support vector(Index vector) to select features.
        X_T_train_selected = X_T_train.loc[:, X_T_train.columns[combo_support]]
        X_T_test_selected = X_T_test.loc[:, X_T_test.columns[combo_support]]
        # Train the classifiers.
        pos_weights = weight_Class (y_T_train)
        co_occurrence_matrix = np.dot(y_T_train.values.T, y_T_train.values) / y_T_train.values.shape[0]
        model = GRIDSEARCH (X_T_train_selected, y_T_train, pos_weights, co_occurrence_matrix)
        y_pred_proba, y_pred = predict_Multilabel(model, X_T_test_selected)
        
        hamming_loss_val = hamming_loss(y_T_test, y_pred)
        #If the current model's Hamming Loss is smaller, then update the best model.
        if hamming_loss_val < best_accuracy:
            best_accuracy = hamming_loss_val
            best_clf = (model, combo_support, remaining_label_index)
    if best_clf:
        classifiers1.append(best_clf)

# Use the prediction function for validation with bootstrap
warnings.filterwarnings('ignore', category=UserWarning)
bootstrap_stats, bootstrap_stats_class1, bootstrap_stats_class2, bootstrap_stats_class3,bootstrap_stats_class4, bootstrap_stats_class5, bootstrap_stats_class6 = predict_with_rfecv_bootstrap(K, classifiers1, X_test, y_test, n_bootstrap=600)
bootstrap_stat, bootstrap_stat_class1, bootstrap_stat_class2, bootstrap_stat_class3,bootstrap_stat_class4, bootstrap_stat_class5, bootstrap_stat_class6 = predict_with_rfecv_bootstrap(K, classifiers1, X_train, y_train, n_bootstrap=600)
strap_stats, strap_stats_class1, strap_stats_class2, strap_stats_class3, strap_stats_class4, strap_stats_class5, strap_stats_class6 = predict_with_rfecv_bootstrap_RPC(K, classifiers1, X_test.reset_index(drop=True), y_test.reset_index(drop=True), n_bootstrap=600)
strap_stat, strap_stat_class1, strap_stat_class2, strap_stat_class3, strap_stat_class4, strap_stat_class5, strap_stat_class6 = predict_with_rfecv_bootstrap_RPC(K, classifiers1, X_train.reset_index(drop=True), y_train.reset_index(drop=True), n_bootstrap=600)

# Print the metric results
print(f"micro- Ave-hamming distance in external validation: {np.mean(bootstrap_stats['han']):.4f}", f"micro- Std-hamming distance in external validation: {np.std(bootstrap_stats['han']):.4f}")
print(f"micro- Ave-sensitivity in external validation: {np.mean(bootstrap_stats['recall']):.4f}", f"micro- Std-sensitivity in external validation: {np.std(bootstrap_stats['recall']):.4f}")
print(f"micro- Ave-AUROC in external validation: {np.mean(strap_stats['auc']):.4f}", f"micro- Std-AUROC in external validation: {np.std(strap_stats['auc']):.4f}")
print(f"micro- Ave-precision in external validation: {np.mean(bootstrap_stats['precision']):.4f}", f"micro- Std-precision in external validation: {np.std(bootstrap_stats['precision']):.4f}")
print(f"subset-Ave-accyracy in external validation: {np.mean(bootstrap_stats['accuracy']):.4f}", f"subset- Std-accuracy in external validation: {np.std(bootstrap_stats['accuracy']):.4f}")
print(f"micro- Ave-F1 in external validation: {np.mean(bootstrap_stats['F1']):.4f}", f"micro- Std-F1 in external validation: {np.std(bootstrap_stats['F1']):.4f}")
print(f"micro- Ave-AUPRC in external validation: {np.mean(strap_stats['prc']):.4f}", f"micro- Std-F1 in external validation: {np.std(strap_stats['prc']):.4f}")

print(f"micro- Ave-hamming distance in internal validation: {np.mean(bootstrap_stat['han']):.4f}", f"micro- Std-hamming distance in internal validation: {np.std(bootstrap_stat['han']):.4f}")
print(f"micro- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat['recall']):.4f}", f"micro- Std-sensitivity in internal validation: {np.std(bootstrap_stat['recall']):.4f}")
print(f"micro- Ave-AUROC in internal validation: {np.mean(strap_stat['auc']):.4f}", f"micro- Std-AUROC in internal cross-validation: {np.std(strap_stat['auc']):.4f}")
print(f"micro- Ave-precision in internal validation: {np.mean(bootstrap_stat['precision']):.4f}", f"micro- Std-precision in internal validation:  {np.std(bootstrap_stat['precision']):.4f}")
print(f"subset-Ave-accyracy in internal validation: {np.mean(bootstrap_stat['accuracy']):.4f}", f"subset- Std-accuracy in internal validation: {np.std(bootstrap_stat['accuracy']):.4f}")
print(f"micro- Ave-F1 in internal validation: {np.mean(bootstrap_stat['F1']):.4f}", f"micro- Std-F1 in internal validation: {np.std(bootstrap_stat['F1']):.4f}")
print(f"micro- Ave-AUPRC in internal validation: {np.mean(strap_stat['prc']):.4f}", f"micro- Std-F1 in internal validation: {np.std(strap_stat['prc']):.4f}")

print(f"subclass1- Ave-specificity in external validation: {np.mean(bootstrap_stats_class1['specificity']):.4f}", f"subclass1- Std-specificity in external validation: {np.std(bootstrap_stats_class1['specificity']):.4f}")
print(f"subclass1- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class1['recall']):.4f}", f"subclass1- Std-sensitivity in external validation: {np.std(bootstrap_stats_class1['recall']):.4f}")
print(f"subclass1- Ave-AUROC in external validation: {np.mean(strap_stats_class1['auc']):.4f}", f"subclass1- Std-AUROC in external validation: {np.std(strap_stats_class1['auc']):.4f}")
print(f"subclass1- Ave-precision in external validation: {np.mean(bootstrap_stats_class1['precision']):.4f}", f"subclass1- Std-precision in external validation: {np.std(bootstrap_stats_class1['precision']):.4f}")
print(f"subclass1- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class1['accuracy']):.4f}", f"subclass1- Std-accuracy in external validation: {np.std(bootstrap_stats_class1['accuracy']):.4f}")
print(f"subclass1- Ave-F1 in external validation: {np.mean(bootstrap_stats_class1['F1']):.4f}", f"subclass1- Std-F1 in external validation: {np.std(bootstrap_stats_class1['F1']):.4f}")
print(f"subclass1- Ave-AUPRC in external validation: {np.mean(strap_stats_class1['prc']):.4f}", f"subclass1- Std-AUPRC in external validation: {np.std(strap_stats_class1['prc']):.4f}")

print(f"subclass2- Ave-specificity in external validation: {np.mean(bootstrap_stats_class2['specificity']):.4f}", f"subclass2- Std-specificity in external validation: {np.std(bootstrap_stats_class2['specificity']):.4f}")
print(f"subclass2- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class2['recall']):.4f}", f"subclass2- Std-sensitivity in external validation: {np.std(bootstrap_stats_class2['recall']):.4f}")
print(f"subclass2- Ave-AUROC in external validation: {np.mean(strap_stats_class2['auc']):.4f}", f"subclass2- Std-AUROC in external validation: {np.std(strap_stats_class2['auc']):.4f}")
print(f"subclass2- Ave-precision in external validation: {np.mean(bootstrap_stats_class2['precision']):.4f}", f"subclass2- Std-precision in external validation: {np.std(bootstrap_stats_class2['precision']):.4f}")
print(f"subclass2- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class2['accuracy']):.4f}", f"subclass2- Std-accuracy in external validation: {np.std(bootstrap_stats_class2['accuracy']):.4f}")
print(f"subclass2- Ave-F1 in external validation: {np.mean(bootstrap_stats_class2['F1']):.4f}", f"subclass2- Std-F1 in external validation: {np.std(bootstrap_stats_class2['F1']):.4f}")
print(f"subclass2- Ave-AUPRC in external validation: {np.mean(strap_stats_class2['prc']):.4f}", f"subclass2- Std-AUPRC in external validation: {np.std(strap_stats_class2['prc']):.4f}")

print(f"subclass3- Ave-specificity in external validation: {np.mean(bootstrap_stats_class3['specificity']):.4f}", f"subclass3- Std-specificity in external validation: {np.std(bootstrap_stats_class3['specificity']):.4f}")
print(f"subclass3- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class3['recall']):.4f}", f"subclass3- Std-sensitivity in external validation: {np.std(bootstrap_stats_class3['recall']):.4f}")
print(f"subclass3- Ave-AUROC in external validation: {np.mean(strap_stats_class3['auc']):.4f}", f"subclass3- Std-AUROC in external validation: {np.std(strap_stats_class3['auc']):.4f}")
print(f"subclass3- Ave-precision in external validation: {np.mean(bootstrap_stats_class3['precision']):.4f}", f"subclass3- Std-precision in external validation: {np.std(bootstrap_stats_class3['precision']):.4f}")
print(f"subclass3- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class3['accuracy']):.4f}", f"subclass3- Std-accuracy in external validation: {np.std(bootstrap_stats_class3['accuracy']):.4f}")
print(f"subclass3- Ave-F1 in external validation: {np.mean(bootstrap_stats_class3['F1']):.4f}", f"subclass3- Std-F1 in external validation: {np.std(bootstrap_stats_class3['F1']):.4f}")
print(f"subclass3- Ave-AUPRC in external validation: {np.mean(strap_stats_class3['prc']):.4f}", f"subclass3- Std-AUPRC in external validation: {np.std(strap_stats_class3['prc']):.4f}")

print(f"subclass4- Ave-specificity in external validation: {np.mean(bootstrap_stats_class4['specificity']):.4f}", f"subclass4- Std-specificity in external validation: {np.std(bootstrap_stats_class4['specificity']):.4f}")
print(f"subclass4- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class4['recall']):.4f}", f"subclass4- Std-sensitivity in external validation: {np.std(bootstrap_stats_class4['recall']):.4f}")
print(f"subclass4- Ave-AUROC in external validation: {np.mean(strap_stats_class4['auc']):.4f}", f"subclass4- Std-AUROC in external validation: {np.std(strap_stats_class4['auc']):.4f}")
print(f"subclass4- Ave-precision in external validation: {np.mean(bootstrap_stats_class4['precision']):.4f}", f"subclass4- Std-precision in external validation: {np.std(bootstrap_stats_class4['precision']):.4f}")
print(f"subclass4- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class4['accuracy']):.4f}", f"subclass4- Std-accuracy in external validation: {np.std(bootstrap_stats_class4['accuracy']):.4f}")
print(f"subclass4- Ave-F1 in external validation: {np.mean(bootstrap_stats_class4['F1']):.4f}", f"subclass4- Std-F1 in external validation: {np.std(bootstrap_stats_class4['F1']):.4f}")
print(f"subclass4- Ave-AUPRC in external validation: {np.mean(strap_stats_class4['prc']):.4f}", f"subclass4- Std-AUPRC in external validation: {np.std(strap_stats_class4['prc']):.4f}")

print(f"subclass5- Ave-specificity in external validation:{np.mean(bootstrap_stats_class5['specificity']):.4f}", f"subclass5- Std-specificity in external validation: {np.std(bootstrap_stats_class5['specificity']):.4f}")
print(f"subclass5- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class5['recall']):.4f}", f"subclass5- Std-sensitivity in external validation:{np.std(bootstrap_stats_class5['recall']):.4f}")
print(f"subclass5- Ave-AUROC in external validation:{np.mean(strap_stats_class5['auc']):.4f}", f"subclass5- Std-AUROC in external validation: {np.std(strap_stats_class5['auc']):.4f}")
print(f"subclass5- Ave-precision in external validation: {np.mean(bootstrap_stats_class5['precision']):.4f}", f"subclass5- Std-precision in external validation: {np.std(bootstrap_stats_class5['precision']):.4f}")
print(f"subclass5- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class5['accuracy']):.4f}", f"subclass5- Std-accuracy in external validation: {np.std(bootstrap_stats_class5['accuracy']):.4f}")
print(f"subclass5- Ave-F1 in external validation: {np.mean(bootstrap_stats_class5['F1']):.4f}", f"subclass5- Std-F1 in external validation: {np.std(bootstrap_stats_class5['F1']):.4f}")
print(f"subclass5- Ave-AUPRC in external validation: {np.mean(strap_stats_class5['prc']):.4f}", f"subclass5- Std-AUPRC in external validation: {np.std(strap_stats_class5['prc']):.4f}")

print(f"subclass6- Ave-specificity in external validation: {np.mean(bootstrap_stats_class6['specificity']):.4f}", f"subclass6- Std-specificity in external validation: {np.std(bootstrap_stats_class6['specificity']):.4f}")
print(f"subclass6- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class6['recall']):.4f}", f"subclass6- Std-sensitivity in external validation: {np.std(bootstrap_stats_class6['recall']):.4f}")
print(f"subclass6- Ave-AUROC in external validation: {np.mean(strap_stats_class6['auc']):.4f}", f"subclass6- Std-AUROC in external validation: {np.std(strap_stats_class6['auc']):.4f}")
print(f"subclass6- Ave-precision in external validation: {np.mean(bootstrap_stats_class6['precision']):.4f}", f"subclass6- Std-precision in external validation: {np.std(bootstrap_stats_class6['precision']):.4f}")
print(f"subclass6- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class6['accuracy']):.4f}", f"subclass6- Std-accuracy in external validation: {np.std(bootstrap_stats_class6['accuracy']):.4f}")
print(f"subclass6- Ave-F1 in external validation: {np.mean(bootstrap_stats_class6['F1']):.4f}", f"subclass6- Std-F1 in external validation: {np.std(bootstrap_stats_class6['F1']):.4f}")
print(f"subclass6- Ave-AUPRC in external validation: {np.mean(strap_stats_class6['prc']):.4f}", f"subclass6- Std-AUPRC in external validation: {np.std(strap_stats_class6['prc']):.4f}")

print(f"subclass1- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class1['specificity']):.4f}", f"subclass1- Std-specificity in internal validation: {np.std(bootstrap_stat_class1['specificity']):.4f}")
print(f"subclass1- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class1['recall']):.4f}", f"subclass1- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class1['recall']):.4f}")
print(f"subclass1- Ave-AUROC in internal validation: {np.mean(strap_stat_class1['auc']):.4f}", f"subclass1- Std-AUROC in internal validation: {np.std(strap_stat_class1['auc']):.4f}")
print(f"subclass1- Ave-precision in internal validation: {np.mean(bootstrap_stat_class1['precision']):.4f}", f"subclass1- Std-precision in internal validation: {np.std(bootstrap_stat_class1['precision']):.4f}")
print(f"subclass1- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class1['accuracy']):.4f}", f"subclass1- Std-accuracy in internal validation: {np.std(bootstrap_stat_class1['accuracy']):.4f}")
print(f"subclass1- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class1['F1']):.4f}", f"subclass1- Std-F1 in internal validation: {np.std(bootstrap_stat_class1['F1']):.4f}")
print(f"subclass1- Ave-AUPRC in internal validation: {np.mean(strap_stat_class1['prc']):.4f}", f"subclass1- Std-AUPRC in internal validation: {np.std(strap_stat_class1['prc']):.4f}")

print(f"subclass2- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class2['specificity']):.4f}", f"subclass2- Std-specificity in internal validation: {np.std(bootstrap_stat_class2['specificity']):.4f}")
print(f"subclass2- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class2['recall']):.4f}", f"subclass2- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class2['recall']):.4f}")
print(f"subclass2- Ave-AUROC in internal validation: {np.mean(strap_stat_class2['auc']):.4f}", f"subclass2- Std-AUROC in internal validation: {np.std(strap_stat_class2['auc']):.4f}")
print(f"subclass2- Ave-precision in internal validation: {np.mean(bootstrap_stat_class2['precision']):.4f}", f"subclass2- Std-precision in internal validation: {np.std(bootstrap_stat_class2['precision']):.4f}")
print(f"subclass2- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class2['accuracy']):.4f}", f"subclass2- Std-accuracy in internal validation: {np.std(bootstrap_stat_class2['accuracy']):.4f}")
print(f"subclass2- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class2['F1']):.4f}", f"subclass2- Std-F1 in internal validation: {np.std(bootstrap_stat_class2['F1']):.4f}")
print(f"subclass2- Ave-AUPRC in internal cross-validation: {np.mean(strap_stat_class2['prc']):.4f}", f"subclass2- Std-AUPRC in internal validation: {np.std(strap_stat_class2['prc']):.4f}")

print(f"subclass3- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class3['specificity']):.4f}", f"subclass3- Std-specificity in internal validation: {np.std(bootstrap_stat_class3['specificity']):.4f}")
print(f"subclass3- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class3['recall']):.4f}", f"subclass3- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class3['recall']):.4f}")
print(f"subclass3- Ave-AUROC in internal validation: {np.mean(strap_stat_class3['auc']):.4f}", f"subclass3- Std-AUROC in internal validation: {np.std(strap_stat_class3['auc']):.4f}")
print(f"subclass3- Ave-precision in internal validation: {np.mean(bootstrap_stat_class3['precision']):.4f}", f"subclass3- Std-precision in internal validation: {np.std(bootstrap_stat_class3['precision']):.4f}")
print(f"subclass3- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class3['accuracy']):.4f}", f"subclass3- Std-accuracy in internal validation: {np.std(bootstrap_stat_class3['accuracy']):.4f}")
print(f"subclass3- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class3['F1']):.4f}", f"subclass3- Std-F1 in internal validation: {np.std(bootstrap_stat_class3['F1']):.4f}")
print(f"subclass3- Ave-AUPRC in internal validation: {np.mean(strap_stat_class3['prc']):.4f}", f"subclass3 Std-AUPRC in internal validation: {np.std(strap_stat_class3['prc']):.4f}")

print(f"subclass4- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class4['specificity']):.4f}", f"subclass4- Std-specificity in internal validation: {np.std(bootstrap_stat_class4['specificity']):.4f}")
print(f"subclass4- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class4['recall']):.4f}", f"subclass4- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class4['recall']):.4f}")
print(f"subclass4- Ave-AUROC in internal validation:{np.mean(strap_stat_class4['auc']):.4f}", f"subclass4- Std-AUROC in internal validation: {np.std(strap_stat_class4['auc']):.4f}")
print(f"subclass4- Ave-precision in internal validation: {np.mean(bootstrap_stat_class4['precision']):.4f}", f"subclass4- Std-precision in internal validation: {np.std(bootstrap_stat_class4['precision']):.4f}")
print(f"subclass4- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class4['accuracy']):.4f}", f"subclass4- Std-accuracy in internal validation: {np.std(bootstrap_stat_class4['accuracy']):.4f}")
print(f"subclass4- Ave-F1 in internal validation:{np.mean(bootstrap_stat_class4['F1']):.4f}", f"subclass4- Std-F1 in internal validation: {np.std(bootstrap_stat_class4['F1']):.4f}")
print(f"subclass4- Ave-AUPRC in internal validation: {np.mean(strap_stat_class4['prc']):.4f}", f"subclass4- Std-AUPRC in internal validation: {np.std(strap_stat_class4['prc']):.4f}")

print(f"subclass5- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class5['specificity']):.4f}", f"subclass5- Std-specificity in internal validation: {np.std(bootstrap_stat_class5['specificity']):.4f}")
print(f"subclass5- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class5['recall']):.4f}", f"subclass5- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class5['recall']):.4f}")
print(f"subclass5- Ave-AUROC in internal validation: {np.mean(strap_stat_class5['auc']):.4f}", f"subclass5- Std-AUROC in internal validation: {np.std(strap_stat_class5['auc']):.4f}")
print(f"subclass5- Ave-precision in internal validation: {np.mean(bootstrap_stat_class5['precision']):.4f}", f"subclass5- Std-precision in internal validation: {np.std(bootstrap_stat_class5['precision']):.4f}")
print(f"subclass5- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class5['accuracy']):.4f}", f"subclass5- Std-accuracy in internal validation: {np.std(bootstrap_stat_class5['accuracy']):.4f}")
print(f"subclass5- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class5['F1']):.4f}", f"subclass5- Std-F1 in internal validation: {np.std(bootstrap_stat_class5['F1']):.4f}")
print(f"subclass5- Ave-AUPRC in internal alidation: {np.mean(strap_stat_class5['prc']):.4f}", f"subclass5- Std-AUPRC in internal validation: {np.std(strap_stat_class5['prc']):.4f}")

print(f"subclass6- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class6['specificity']):.4f}", f"subclass6- Std-specificity in internal validation: {np.std(bootstrap_stat_class6['specificity']):.4f}")
print(f"subclass6- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class6['recall']):.4f}", f"subclass6- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class6['recall']):.4f}")
print(f"subclass6- Ave-AUROC in internal validation:{np.mean(strap_stat_class6['auc']):.4f}", f"subclass6- Std-AUROC in internal validation: {np.std(strap_stat_class6['auc']):.4f}")
print(f"subclass6- Ave-precision in internal validation: {np.mean(bootstrap_stat_class6['precision']):.4f}", f"subclass6- Std-precision in internal validation: {np.std(bootstrap_stat_class6['precision']):.4f}")
print(f"subclass6- Ave-accyracy in internal validation:: {np.mean(bootstrap_stat_class6['accuracy']):.4f}", f"subclass6- Std-accuracy in internal validation: {np.std(bootstrap_stat_class6['accuracy']):.4f}")
print(f"subclass6- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class6['F1']):.4f}", f"subclass6- Std-F1 in internal validation: {np.std(bootstrap_stat_class6['F1']):.4f}")
print(f"subclass6- Ave-AUPRC in internal validation:{np.mean(strap_stat_class6['prc']):.4f}", f"subclass6- Std-AUPRC in internal validation: {np.std(strap_stat_class6['prc']):.4f}")


In [None]:
#This section offers an MLP - based feature selection component akin to RFECV, which can replace the aforementioned RFECV - LR functionality.
#This is only a component
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_classif, f_classif
from sklearn.metrics import RocCurveDisplay, roc_curve, auc, roc_auc_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

#Date load
filepath = "C:\\Users\\PS\\Desktop\\Open code\\Demo.csv" #Set the user's own path
df = pd.read_csv(filepath, header=0, encoding="gbk")
X1 = df.drop(["Pancreas disease", "Biliary tract disease", "Gastroduodenal disease", "Intestinal tract disease", "liver disease", "others"], axis=1)
y = df[["liver disease", "Intestinal tract disease", "liver disease", "Biliary tract disease", "Gastroduodenal disease", "others"]]

N=5
kf = KFold(n_splits=N, shuffle=True, random_state=3)
test_acc = np.zeros(N)
train_acc = np.zeros(N)
precision = np.zeros(N)
F1 = np.zeros(N)
recall = np.zeros(N)
specificity = np.zeros(N)
auc1 = np.zeros(N)
list = []
accuracy=np.zeros(48) #Ensure the feature selection scope is consistent.
#MLP
classifier = MLPClassifier(activation='logistic', solver='adam', alpha=1e-2, hidden_layer_sizes=(13), learning_rate='constant', max_iter=20000)

for k in range(40,88):
    
        selector = SelectKBest(score_func=mutual_info_classif,k=k)
        X_new = selector.fit_transform(X1, y) # Generate new feature columns.
        mask = selector.get_support() # Get the feature mask.
        new_features = X1.columns[mask] # Select the important features.
        X = df[new_features]

        j = 0
        for train_index, test_index in kf.split(X):
             X_train, X_test = X.iloc[train_index], X.iloc[test_index]
             y_train, y_test = y.iloc[train_index], y.iloc[test_index]

             classifier.fit(X_train, y_train)
             pred_train = classifier.predict(X_train)
             pred_test = classifier.predict(X_test)
             cm = confusion_matrix(y_test, pred_test)
             #Model Evaluation Metrics.
             train_acc[j] = accuracy_score(y_train, pred_train)
             test_acc[j] = accuracy_score(y_test, pred_test)
             precision[j], recall[j], F1[j], _ = precision_recall_fscore_support(np.array(y_test), np.array(pred_test), average='binary')
             specificity[j] = cm1[0, 0] / (cm1[0, 0] + cm1[0, 1])
   
             score_lr = classifier.predict_proba(X_test)[:, 1]
             fpr_lr, tpr_lr, thres_lr = roc_curve(y_test, score_lr, )
             auc1[j] = auc(fpr_lr, tpr_lr)
             j = j + 1
            
        accuracy[k-40]=np.mean(test_acc)
        list.append(mask)

max_index = np.argmax(accuracy)#Select the optimal feature subset based on accuracy.
new_features_ = X1.columns[list[max_index]]
print("Optimal number of features: %d" % len(new_features_))

In [None]:
#This is the code for diesease pre-diagnosis using the Ensemble learning-DNN approach.
#This code is only avaliable for Ensemble learning-C(num_labels,K=num_labels-1)
import numpy as np
import pandas as pd
import random
import itertools
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import warnings
matplotlib.rcParams['font.sans-serif']=['SimHei']
matplotlib.rcParams['axes.unicode_minus']= False
matplotlib.rcParams['font.size']= 11
from pandas import DataFrame
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn import metrics
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
from sklearn.metrics import RocCurveDisplay, roc_curve, auc, roc_auc_score
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.model_selection import train_test_split, KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import hamming_loss, f1_score, precision_score, precision_recall_curve, recall_score
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.utils import resample

# 变量初始化
#Ensemble learning-C(num_labels,K=num_labels-1)
K = 5 #K=num_labels-1

# This section imports the data. 
filepath = "C:\\Users\\PS\\Desktop\\Open code\\Demo.csv" #Set the user's own path
df = pd.read_csv(filepath, header=0, encoding="gbk")
X1 = df.drop(["Pancreas disease", "Biliary tract disease", "Gastroduodenal disease", "Intestinal tract disease", "liver disease", "others"], axis=1)
y = df[["Pancreas disease", "Intestinal tract disease", "liver disease", "Biliary tract disease", "Gastroduodenal disease", "others"]]

# This section generates all possible C(num_labels,K) combinations. 
labels = y.columns.tolist()
num_labels = y.shape[1]
#Use itertools.combinations to generate all possible combinations of K subclasses.
combinations = list(itertools.combinations(labels, K))

#N-fold cross-validation
N = 5
kf = KFold(n_splits=N, shuffle=True, random_state=3)

#This section selects the fundamental approach for RFECV, which can utilize models like logistic regression, random forests, XGBoost, or a custom neural network.
#select LR
classifier1= LogisticRegression(solver='lbfgs', penalty='l2', dual=False, tol=1e-3, C=1.0, fit_intercept=True,
                                intercept_scaling=1, class_weight='balanced', random_state=None,
                                max_iter=100,verbose=0,warm_start=False, n_jobs=-1)

#weight_Class reflects the class weights.
def weight_Class (y):
    # Calculate weight of each subclass.
    # Initialize weight dictionary.
    class_weights = {}
    # Iterate through each column (label)
    for i, column in enumerate(y.columns):
        # Calculate the frequency of each class
        counts = np.bincount(y[column].astype(int), minlength=2)
        # Calculate the weights, avoiding division by zero.
        total_samples = len(y)
        weights = total_samples / counts
        weights[counts == 0] = 0  # avoiding division by zero
        #Store the weights in a dictionary with class labels as keys and weights as values.
        class_weights[column] = dict({0: weights[0], 1: weights[1]})
        #Create a weight array for each sample in the training set.
    pos_weights = [class_weights[category][1] for category in class_weights]

    return pos_weights

#Label Co-occurrence Adjustment Layer
class FeatureAdjustmentLayer(nn.Module):
    def __init__(self, hidden_size, num_labels, co_occurrence_matrix):
        super(FeatureAdjustmentLayer, self).__init__()
        self.co_occurrence_matrix = nn.Parameter(torch.from_numpy(co_occurrence_matrix).float(), requires_grad=False)
        #Define a linear layer to map from hidden_size to num_labels.
        self.map_to_labels = nn.Linear(hidden_size, num_labels)

    def forward(self, x):
        # Map the hidden layer features to a dimension equal to the number of labels.
        mapped_features = self.map_to_labels(x)  # [batch_size, num_labels]
        # Adjust the features using the co-occurrence matrix.
        # Note: Ensure that the dimensions of mapped_features and co_occurrence_matrix are compatible.
        adjusted_with_cooccurrence = torch.matmul(mapped_features, self.co_occurrence_matrix)  # [batch_size, num_labels]
        return adjusted_with_cooccurrence

#DNN for MultiLabelClassification。
class MultiLabelClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_labels, co_occurrence_matrix):
        super(MultiLabelClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.adjustment_layer = FeatureAdjustmentLayer(hidden_size, num_labels, co_occurrence_matrix)
        self.fc3 = nn.Linear(num_labels, num_labels)  #Ensure the input dimension here is num_labels.

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.adjustment_layer(x)
        output = torch.sigmoid(self.fc3(x))  #Ensure that the input dimension of self.fc3 matches the dimension of x.
        return output

#This function defines the basic method of model training.
def Fit_MLP(X, y, pos_weights, co_occurrence_matrix, num_epochs, hidden_size=64, learning_rate=0.01):
    #device set
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    pos_weights_tensor = torch.tensor(pos_weights, device=device)
    X_Ten = torch.tensor(X.values, dtype=torch.float32).to(device)
    y_Ten = torch.tensor(y.values, dtype=torch.float32).to(device)
    dataset = TensorDataset(X_Ten, y_Ten)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    #Initialize the model, ensuring that the co_occurrence_matrix has been converted to a format suitable for the model.
    model = MultiLabelClassifier(X.shape[1], hidden_size, y.shape[1], co_occurrence_matrix).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weights_tensor)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
   
    for epoch in range(num_epochs):
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    return model

#This is a self-defined GRIDSEARCH function
def GRIDSEARCH (X_train, y_train, pos_weights, co_occurrence_matrix):
    # Define the parameter grid.
    param_grid = {'hidden_size': [8, 16, 32], 'learning_rate': [0.01, 0.05], 'num_epochs': [10, 20, 30]}
    best_clf = None
    best_score = np.inf
    best_params = {}
    for hidden_size in param_grid['hidden_size']:
        for lr in param_grid['learning_rate']:
            for epochs in param_grid['num_epochs']:
                model = Fit_MLP(X_train, y_train, pos_weights, co_occurrence_matrix, epochs, hidden_size, lr)
                y_pred_proba, y_pred = predict_Multilabel(model, X_train)
                y_true = y_train.values
                #y_pred = y_pred.numpy()
                score = np.sum(y_true != y_pred)
                if score < best_score:
                    best_score = score
                    best_params = {'hidden_size': hidden_size, 'learning_rate': lr, 'num_epochs': epochs}
                    best_clf = model
                    
    return best_clf

#This function defines the basic method for multi-label prediction.
def predict_Multilabel(classifier, X):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    classifier = classifier.to(device)
    X_Ten = torch.tensor(X.values, dtype=torch.float32).to(device)
    dataset = TensorDataset(X_Ten)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
    classifier.eval()
    y_pred_proba_combined = []
    with torch.no_grad():
         for batch in dataloader:
                inputs = batch[0].to(device)
                probabilities = classifier(inputs)
                y_pred_proba_combined.append(probabilities.cpu().numpy())
    
    y_pred_proba_combined = np.concatenate(y_pred_proba_combined)
    binary_vector = (y_pred_proba_combined > 0.5).astype(int)
    return y_pred_proba_combined, binary_vector

#This function defines the basic method for multi-label prediction with voting. 
#Compared to the approach for C(num_labels,K<num_labels-1), there are slight differences here.
def predict_with_rfecv(K, classifiers, df, num_labels):
    #C(num_labels,K)
    base_labels=K 
    num_rows = len(df)  
    vote_results_list = []  #Store the voting results for each row.
    
    for index, row in df.iterrows():  #Iterate through each row of df.
        predictions = np.zeros((len(classifiers), base_labels), dtype=int)
        predictions_R = np.zeros((len(classifiers), num_labels), dtype=int)
        # Store the voting results for each label.
        vote_results = np.zeros(num_labels, dtype=int)
        
        for i, (clf, support, remaining_label_index) in enumerate(classifiers):
            # Apply the features selected by RFECV.
            X_selected = row[support].values.reshape(1, -1)  # Select the features in the row.
            X_selected_d = pd.DataFrame (X_selected)
            # Use the corresponding binary classifier to make predictions.
            clf_predictions_proba, clf_predictions = predict_Multilabel(clf, X_selected_d)
            # The predict_Multilabel function returns a one - dimensional array that contains all the label predictions from the current classifier.
            predictions[i, :] = clf_predictions
            predictions_R[i, :] = np.insert(predictions[i, :], remaining_label_index, 0)   

        # Conduct voting to obtain the final results.
        for j in range(num_labels):
            votes = np.sum(predictions_R[:, j])
            vote_results[j] = 1 if votes > (len(classifiers)-2) / 2 else 0 #Slightly lower the voting threshold to suit multi-label classification.
        vote_results_list.append(vote_results)  # Add the voting results of the current row to the list.
        predictions = []
        predictions_R =[]
        
    return np.array(vote_results_list)  # Return the voting results for all rows.

#Validation funciton with bootstrap - for recall, specificity, accuracy, precision, and F1
def predict_with_rfecv_bootstrap(K, classifiers, df, y_test, n_bootstrap):
    
    #Storage space for results
    bootstrap_stats= {
        'recall': [],
        'accuracy': [],
        'precision': [],  
        'F1': [],
        'han':[]   
    }
    bootstrap_stats_class1= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class2={
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class3= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class4= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class5= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class6= {
        'recall': [],
        'specificity': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    num_rows = len(df)  
    f1_vals = np.zeros(n_bootstrap, dtype=float)
    precision_vals = np.zeros(n_bootstrap, dtype=float)
    accuracy_vals = np.zeros(n_bootstrap, dtype=float)
    recall_vals = np.zeros(n_bootstrap, dtype=float)
    n_classes = y_test.shape[1]
    
    for b in range(n_bootstrap):
        indices = resample(df.index, n_samples=num_rows, replace=True, random_state=b)
        df_bootstrap = df.loc[indices]
        y_test_bootstrap = y_test.loc[indices]
        predictions = predict_with_rfecv(K, classifiers, df_bootstrap, n_classes) 
        warnings.filterwarnings('ignore', category=UserWarning)
        #'han' refers to the hamming_loss
        hamming_loss_val = hamming_loss(y_test_bootstrap, predictions)
        f1_val = f1_score(y_test_bootstrap, predictions, average='micro')
        precision_val = precision_score(y_test_bootstrap, predictions, average='micro')
        accuracy_val = accuracy_score(y_test_bootstrap, predictions)
        recall_val = recall_score(y_test_bootstrap, predictions, average='micro')
        bootstrap_stats['recall'].append(recall_val)
        bootstrap_stats['precision'].append(precision_val)
        bootstrap_stats['accuracy'].append(accuracy_val)
        bootstrap_stats['F1'].append(f1_val)
        bootstrap_stats['han'].append(hamming_loss_val)
        
        #Calculate the multi - label confusion matrix.
        mcm = multilabel_confusion_matrix(y_test_bootstrap, predictions)
        # Compute and store the precision, recall, and F1 score for each subclass.
        precision = precision_score(y_test_bootstrap, predictions, average=None)
        recall = recall_score(y_test_bootstrap, predictions, average=None)
        f1 = f1_score(y_test_bootstrap, predictions, average=None)

        bootstrap_stats_class1['precision'].append(precision[0])
        bootstrap_stats_class2['precision'].append(precision[1])
        bootstrap_stats_class3['precision'].append(precision[2])
        bootstrap_stats_class4['precision'].append(precision[3])
        bootstrap_stats_class5['precision'].append(precision[4])
        bootstrap_stats_class6['precision'].append(precision[5])
        bootstrap_stats_class1['recall'].append(recall[0])
        bootstrap_stats_class2['recall'].append(recall[1])
        bootstrap_stats_class3['recall'].append(recall[2])
        bootstrap_stats_class4['recall'].append(recall[3])
        bootstrap_stats_class5['recall'].append(recall[4])
        bootstrap_stats_class6['recall'].append(recall[5])
        bootstrap_stats_class1['F1'].append(f1[0])
        bootstrap_stats_class2['F1'].append(f1[1])
        bootstrap_stats_class3['F1'].append(f1[2])
        bootstrap_stats_class4['F1'].append(f1[3])
        bootstrap_stats_class5['F1'].append(f1[4])
        bootstrap_stats_class6['F1'].append(f1[5])

        #Calculate and store the accuracy for each subclass.
        accuracies = []
        for i in range(n_classes):
            # Extract the true and predicted labels for the i-th class.
            y_true_class = y_test_bootstrap.values[:, i]
            y_pred_class = predictions[:, i]  # Use binary labels.
            # Calculate the accuracy for the i-th class.
            accuracy_class = accuracy_score(y_true_class, y_pred_class)
            if i==0:
                bootstrap_stats_class1['accuracy'].append(accuracy_class)
            elif i==1:
                bootstrap_stats_class2['accuracy'].append(accuracy_class)
            elif i==2:
                bootstrap_stats_class3['accuracy'].append(accuracy_class)
            elif i==3:
                bootstrap_stats_class4['accuracy'].append(accuracy_class)
            elif i==4:
                bootstrap_stats_class5['accuracy'].append(accuracy_class)
            elif i==5:
                bootstrap_stats_class6['accuracy'].append(accuracy_class)
       
        # Calculate and store the specificity for each class
        specificities = []
        for i in range(n_classes):
            #True Negatives = Sum of diagonal elements - True Positives of the current class.
            true_negatives = np.sum(mcm[:, 0, 0]) - mcm[i, 0, 0]
            # False Positives = Sum of the elements in current row - True Positives.
            false_positives = np.sum(mcm[i, 0, 1])
            # Calculate specificities
            specificity = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
            if i==0:
                bootstrap_stats_class1['specificity'].append(specificity)
            elif i==1:
                bootstrap_stats_class2['specificity'].append(specificity)
            elif i==2:
                bootstrap_stats_class3['specificity'].append(specificity)
            elif i==3:
                bootstrap_stats_class4['specificity'].append(specificity)
            elif i==4:
                bootstrap_stats_class5['specificity'].append(specificity)
            elif i==5:
                bootstrap_stats_class6['specificity'].append(specificity)
    
    return bootstrap_stats, bootstrap_stats_class1, bootstrap_stats_class2, bootstrap_stats_class3, bootstrap_stats_class4, bootstrap_stats_class5, bootstrap_stats_class6

#Validation funciton with bootstrap - only for AUROC and AUPRC
def predict_with_rfecv_bootstrap_RPC(K, classifiers, df, y, n_bootstrap):
    
    #Storage space for results
    bootstrap_stats = {
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class1= {
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class2={
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class3= {
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class4={
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class5= {
        'auc': [],
        'prc': [],
    }
    bootstrap_stats_class6= {
        'auc': [],
        'prc': [],
    }

    base_labels=K 
    num_rows = len(df) 
    num_labels = y.shape[1]
    # Store the prediction results for all Bootstrap samples.
    all_predictions_proba = np.zeros((num_rows, num_labels))
    for b in range(n_bootstrap):
        # Bootstrap resampling allows for the selection of samples with replacement.
        indices = resample(df.index, n_samples= num_rows, replace=True, random_state = b)
        df_bootstrap = df.loc[indices].reset_index(drop=True)
        y_test = y.loc[indices].reset_index(drop=True)
        sample_labels_V =y_test.values

        for index, row in df_bootstrap.iterrows():  # Iterate through each row of df_bootstrap.
            predictions_R_prob = np.zeros((len(classifiers), num_labels), dtype=int)
        
            for i, (clf, support, remaining_label_index) in enumerate(classifiers):
                # Apply the features selected by RFECV.
                X_selected = row[support].values.reshape(1, -1)  #Select the features in the row.
                X_selected_d = pd.DataFrame (X_selected)
                #Use the corresponding binary classifier to make predictions.
                clf_predictions_proba, clf_predictions = predict_Multilabel(clf, X_selected_d)
                #The predict_Multilabel function returns a one - dimensional array that contains all the label predictions from the current classifier.
                predictions_R_prob[i, :] = np.insert(clf_predictions_proba, remaining_label_index, 0)
                
            # The combination number C(5,4) equals 10. In other cases it is the value of the combination numberC(num_labels-1,K-1)    
            column_means=np.sum(predictions_R_prob, axis=0)/5
            # Store the prediction probabilities for the current dataset.
            all_predictions_proba[index, :] = column_means

        # Calculate Micro-AUROC
        micro_auc = roc_auc_score(sample_labels_V.ravel(), all_predictions_proba.ravel(), average='micro')
        precisionq, recallq, _ = precision_recall_curve(sample_labels_V.ravel(), all_predictions_proba.ravel())
        # Calculate Micro-AUPRC
        micro_prc_auc = auc(recallq, precisionq)
        bootstrap_stats['auc'].append(micro_auc)
        bootstrap_stats['prc'].append(micro_prc_auc)
        
        fprs = dict()
        tprs = dict()
        roc_aucs = dict()
        precisions = dict()
        recalls = dict()
        pr_aucs = dict()

        #Calculate the AUROC for each class.
        for i in range(num_labels):
            fprs[i], tprs[i], _ = roc_curve(sample_labels_V[:, i], all_predictions_proba[:, i])
            roc_aucs[i] = auc(fprs[i], tprs[i])
            if i==0:
                bootstrap_stats_class1['auc'].append(roc_aucs[0])
            elif i==1:
                bootstrap_stats_class2['auc'].append(roc_aucs[1])
            elif i==2:
                bootstrap_stats_class3['auc'].append(roc_aucs[2])
            elif i==3:
                bootstrap_stats_class4['auc'].append(roc_aucs[3])
            elif i==4:
                bootstrap_stats_class5['auc'].append(roc_aucs[4])
            elif i==5:
                bootstrap_stats_class6['auc'].append(roc_aucs[5])
                
        # Calculate the AUPRC for each class.
        for i in range(num_labels):
            precisions[i], recalls[i], _ = precision_recall_curve(sample_labels_V[:, i], all_predictions_proba[:, i])
            pr_aucs[i] = auc(recalls[i], precisions[i])
            if i==0:
                bootstrap_stats_class1['prc'].append(pr_aucs[i])
            elif i==1:
                bootstrap_stats_class2['prc'].append(pr_aucs[i])
            elif i==2:
                bootstrap_stats_class3['prc'].append(pr_aucs[i])
            elif i==3:
                bootstrap_stats_class4['prc'].append(pr_aucs[i])
            elif i==4:
                bootstrap_stats_class5['prc'].append(pr_aucs[i])
            elif i==5:
                bootstrap_stats_class6['prc'].append(pr_aucs[i])
    
    return bootstrap_stats, bootstrap_stats_class1, bootstrap_stats_class2, bootstrap_stats_class3, bootstrap_stats_class4, bootstrap_stats_class5, bootstrap_stats_class6
    
#Automatically identify continuous and binary variables.
continuous_vars = []
binary_vars = []
for col in X1.columns:
# Automatically identify binary variables by iterating through each column in the dataset. 
#If a column is numeric, has exactly two unique values, and those values are 0 and 1, it is considered a binary variable.
    if X1[col].dtype.kind in 'biufc' and X1[col].nunique() == 2 and set(X1[col].unique()) == {0, 1}:
       binary_vars.append(col)
    # Otherwise, it is considered a continuous variable.
    else:
       continuous_vars.append(col)
    # Define variable groups.
    groups = {
        'Continuous': continuous_vars,
         'Binary': binary_vars
    }

# Apply logarithmic transformation and standardization to continuous variables.
log_X1 = X1.copy()
log_X1[continuous_vars] = np.log1p(X1[continuous_vars])  #Apply logarithmic transformation to continuous variables.
scaler = MinMaxScaler()
X_scaled = log_X1.copy()
X_scaled[continuous_vars] = scaler.fit_transform(X_scaled[continuous_vars])  #Apply standardization to continuous variables.

# Perform train - test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.20, random_state=5)

#Initialize the lists for classifiers and supports for each label.
classifiers1 = []
label_supports = []

#Perform RFECV feature selection separately for each category label.
for j in range(y_train.shape[1]):
    rfecv = RFECV(estimator=classifier1, step=1, cv=5, scoring='accuracy', n_jobs=-1)
    rfecv.fit(X_train, y_train.iloc[:, j])
    label_supports.append(rfecv.support_)

# Iterate through all combinations of C(num_labels,K) categories.
for combo in combinations:
    remaining_label_index = [i for i, label in enumerate(labels) if label not in combo]
    subset = pd.concat([X_train, y_train.loc[:, combo]], axis=1)
    filtered_subset = subset[y_train.loc[:, combo].sum(axis=1) > 0]
    columns_to_drop = [col for col in combo if col in filtered_subset.columns]
    X1_train = filtered_subset.drop(columns_to_drop, axis=1)
    y1_train = filtered_subset.loc[:, combo]

    #Combine the support features of all labels involved in the current combination.
    combo_support = np.logical_or.reduce([label_supports[j] for j in range(y_train.shape[1]) if labels[j] in combo])

    best_clf = None
    best_accuracy = 1.0
    for train_index, test_index in kf.split(filtered_subset):
        X_T_train, X_T_test = X1_train.iloc[train_index], X1_train.iloc[test_index]
        y_T_train, y_T_test = y1_train.iloc[train_index], y1_train.iloc[test_index]
        # Use the combined support vector(Index vector) to select features.
        X_T_train_selected = X_T_train.loc[:, X_T_train.columns[combo_support]]
        X_T_test_selected = X_T_test.loc[:, X_T_test.columns[combo_support]]
        # Train the classifiers.
        pos_weights = weight_Class (y_T_train)
        co_occurrence_matrix = np.dot(y_T_train.values.T, y_T_train.values) / y_T_train.values.shape[0]
        model = GRIDSEARCH (X_T_train_selected, y_T_train, pos_weights, co_occurrence_matrix)
        y_pred_proba, y_pred = predict_Multilabel(model, X_T_test_selected)
        
        hamming_loss_val = hamming_loss(y_T_test, y_pred)
        #If the current model's Hamming Loss is smaller, then update the best model.
        if hamming_loss_val < best_accuracy:
            best_accuracy = hamming_loss_val
            best_clf = (model, combo_support, remaining_label_index)
    if best_clf:
        classifiers1.append(best_clf)

# Use the prediction function for validation with bootstrap
warnings.filterwarnings('ignore', category=UserWarning)
bootstrap_stats, bootstrap_stats_class1, bootstrap_stats_class2, bootstrap_stats_class3,bootstrap_stats_class4, bootstrap_stats_class5, bootstrap_stats_class6 = predict_with_rfecv_bootstrap(K, classifiers1, X_test, y_test, n_bootstrap=600)
bootstrap_stat, bootstrap_stat_class1, bootstrap_stat_class2, bootstrap_stat_class3,bootstrap_stat_class4, bootstrap_stat_class5, bootstrap_stat_class6 = predict_with_rfecv_bootstrap(K, classifiers1, X_train, y_train, n_bootstrap=600)
strap_stats, strap_stats_class1, strap_stats_class2, strap_stats_class3, strap_stats_class4, strap_stats_class5, strap_stats_class6 = predict_with_rfecv_bootstrap_RPC(K, classifiers1, X_test.reset_index(drop=True), y_test.reset_index(drop=True), n_bootstrap=600)
strap_stat, strap_stat_class1, strap_stat_class2, strap_stat_class3, strap_stat_class4, strap_stat_class5, strap_stat_class6 = predict_with_rfecv_bootstrap_RPC(K, classifiers1, X_train.reset_index(drop=True), y_train.reset_index(drop=True), n_bootstrap=600)

# Print the metric results
print(f"micro- Ave-hamming distance in external validation: {np.mean(bootstrap_stats['han']):.4f}", f"micro- Std-hamming distance in external validation: {np.std(bootstrap_stats['han']):.4f}")
print(f"micro- Ave-sensitivity in external validation: {np.mean(bootstrap_stats['recall']):.4f}", f"micro- Std-sensitivity in external validation: {np.std(bootstrap_stats['recall']):.4f}")
print(f"micro- Ave-AUROC in external validation: {np.mean(strap_stats['auc']):.4f}", f"micro- Std-AUROC in external validation: {np.std(strap_stats['auc']):.4f}")
print(f"micro- Ave-precision in external validation: {np.mean(bootstrap_stats['precision']):.4f}", f"micro- Std-precision in external validation: {np.std(bootstrap_stats['precision']):.4f}")
print(f"subset-Ave-accyracy in external validation: {np.mean(bootstrap_stats['accuracy']):.4f}", f"subset- Std-accuracy in external validation: {np.std(bootstrap_stats['accuracy']):.4f}")
print(f"micro- Ave-F1 in external validation: {np.mean(bootstrap_stats['F1']):.4f}", f"micro- Std-F1 in external validation: {np.std(bootstrap_stats['F1']):.4f}")
print(f"micro- Ave-AUPRC in external validation: {np.mean(strap_stats['prc']):.4f}", f"micro- Std-F1 in external validation: {np.std(strap_stats['prc']):.4f}")

print(f"micro- Ave-hamming distance in internal validation: {np.mean(bootstrap_stat['han']):.4f}", f"micro- Std-hamming distance in internal validation: {np.std(bootstrap_stat['han']):.4f}")
print(f"micro- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat['recall']):.4f}", f"micro- Std-sensitivity in internal validation: {np.std(bootstrap_stat['recall']):.4f}")
print(f"micro- Ave-AUROC in internal validation: {np.mean(strap_stat['auc']):.4f}", f"micro- Std-AUROC in internal cross-validation: {np.std(strap_stat['auc']):.4f}")
print(f"micro- Ave-precision in internal validation: {np.mean(bootstrap_stat['precision']):.4f}", f"micro- Std-precision in internal validation:  {np.std(bootstrap_stat['precision']):.4f}")
print(f"subset-Ave-accyracy in internal validation: {np.mean(bootstrap_stat['accuracy']):.4f}", f"subset- Std-accuracy in internal validation: {np.std(bootstrap_stat['accuracy']):.4f}")
print(f"micro- Ave-F1 in internal validation: {np.mean(bootstrap_stat['F1']):.4f}", f"micro- Std-F1 in internal validation: {np.std(bootstrap_stat['F1']):.4f}")
print(f"micro- Ave-AUPRC in internal validation: {np.mean(strap_stat['prc']):.4f}", f"micro- Std-F1 in internal validation: {np.std(strap_stat['prc']):.4f}")

print(f"subclass1- Ave-specificity in external validation: {np.mean(bootstrap_stats_class1['specificity']):.4f}", f"subclass1- Std-specificity in external validation: {np.std(bootstrap_stats_class1['specificity']):.4f}")
print(f"subclass1- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class1['recall']):.4f}", f"subclass1- Std-sensitivity in external validation: {np.std(bootstrap_stats_class1['recall']):.4f}")
print(f"subclass1- Ave-AUROC in external validation: {np.mean(strap_stats_class1['auc']):.4f}", f"subclass1- Std-AUROC in external validation: {np.std(strap_stats_class1['auc']):.4f}")
print(f"subclass1- Ave-precision in external validation: {np.mean(bootstrap_stats_class1['precision']):.4f}", f"subclass1- Std-precision in external validation: {np.std(bootstrap_stats_class1['precision']):.4f}")
print(f"subclass1- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class1['accuracy']):.4f}", f"subclass1- Std-accuracy in external validation: {np.std(bootstrap_stats_class1['accuracy']):.4f}")
print(f"subclass1- Ave-F1 in external validation: {np.mean(bootstrap_stats_class1['F1']):.4f}", f"subclass1- Std-F1 in external validation: {np.std(bootstrap_stats_class1['F1']):.4f}")
print(f"subclass1- Ave-AUPRC in external validation: {np.mean(strap_stats_class1['prc']):.4f}", f"subclass1- Std-AUPRC in external validation: {np.std(strap_stats_class1['prc']):.4f}")

print(f"subclass2- Ave-specificity in external validation: {np.mean(bootstrap_stats_class2['specificity']):.4f}", f"subclass2- Std-specificity in external validation: {np.std(bootstrap_stats_class2['specificity']):.4f}")
print(f"subclass2- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class2['recall']):.4f}", f"subclass2- Std-sensitivity in external validation: {np.std(bootstrap_stats_class2['recall']):.4f}")
print(f"subclass2- Ave-AUROC in external validation: {np.mean(strap_stats_class2['auc']):.4f}", f"subclass2- Std-AUROC in external validation: {np.std(strap_stats_class2['auc']):.4f}")
print(f"subclass2- Ave-precision in external validation: {np.mean(bootstrap_stats_class2['precision']):.4f}", f"subclass2- Std-precision in external validation: {np.std(bootstrap_stats_class2['precision']):.4f}")
print(f"subclass2- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class2['accuracy']):.4f}", f"subclass2- Std-accuracy in external validation: {np.std(bootstrap_stats_class2['accuracy']):.4f}")
print(f"subclass2- Ave-F1 in external validation: {np.mean(bootstrap_stats_class2['F1']):.4f}", f"subclass2- Std-F1 in external validation: {np.std(bootstrap_stats_class2['F1']):.4f}")
print(f"subclass2- Ave-AUPRC in external validation: {np.mean(strap_stats_class2['prc']):.4f}", f"subclass2- Std-AUPRC in external validation: {np.std(strap_stats_class2['prc']):.4f}")

print(f"subclass3- Ave-specificity in external validation: {np.mean(bootstrap_stats_class3['specificity']):.4f}", f"subclass3- Std-specificity in external validation: {np.std(bootstrap_stats_class3['specificity']):.4f}")
print(f"subclass3- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class3['recall']):.4f}", f"subclass3- Std-sensitivity in external validation: {np.std(bootstrap_stats_class3['recall']):.4f}")
print(f"subclass3- Ave-AUROC in external validation: {np.mean(strap_stats_class3['auc']):.4f}", f"subclass3- Std-AUROC in external validation: {np.std(strap_stats_class3['auc']):.4f}")
print(f"subclass3- Ave-precision in external validation: {np.mean(bootstrap_stats_class3['precision']):.4f}", f"subclass3- Std-precision in external validation: {np.std(bootstrap_stats_class3['precision']):.4f}")
print(f"subclass3- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class3['accuracy']):.4f}", f"subclass3- Std-accuracy in external validation: {np.std(bootstrap_stats_class3['accuracy']):.4f}")
print(f"subclass3- Ave-F1 in external validation: {np.mean(bootstrap_stats_class3['F1']):.4f}", f"subclass3- Std-F1 in external validation: {np.std(bootstrap_stats_class3['F1']):.4f}")
print(f"subclass3- Ave-AUPRC in external validation: {np.mean(strap_stats_class3['prc']):.4f}", f"subclass3- Std-AUPRC in external validation: {np.std(strap_stats_class3['prc']):.4f}")

print(f"subclass4- Ave-specificity in external validation: {np.mean(bootstrap_stats_class4['specificity']):.4f}", f"subclass4- Std-specificity in external validation: {np.std(bootstrap_stats_class4['specificity']):.4f}")
print(f"subclass4- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class4['recall']):.4f}", f"subclass4- Std-sensitivity in external validation: {np.std(bootstrap_stats_class4['recall']):.4f}")
print(f"subclass4- Ave-AUROC in external validation: {np.mean(strap_stats_class4['auc']):.4f}", f"subclass4- Std-AUROC in external validation: {np.std(strap_stats_class4['auc']):.4f}")
print(f"subclass4- Ave-precision in external validation: {np.mean(bootstrap_stats_class4['precision']):.4f}", f"subclass4- Std-precision in external validation: {np.std(bootstrap_stats_class4['precision']):.4f}")
print(f"subclass4- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class4['accuracy']):.4f}", f"subclass4- Std-accuracy in external validation: {np.std(bootstrap_stats_class4['accuracy']):.4f}")
print(f"subclass4- Ave-F1 in external validation: {np.mean(bootstrap_stats_class4['F1']):.4f}", f"subclass4- Std-F1 in external validation: {np.std(bootstrap_stats_class4['F1']):.4f}")
print(f"subclass4- Ave-AUPRC in external validation: {np.mean(strap_stats_class4['prc']):.4f}", f"subclass4- Std-AUPRC in external validation: {np.std(strap_stats_class4['prc']):.4f}")

print(f"subclass5- Ave-specificity in external validation:{np.mean(bootstrap_stats_class5['specificity']):.4f}", f"subclass5- Std-specificity in external validation: {np.std(bootstrap_stats_class5['specificity']):.4f}")
print(f"subclass5- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class5['recall']):.4f}", f"subclass5- Std-sensitivity in external validation:{np.std(bootstrap_stats_class5['recall']):.4f}")
print(f"subclass5- Ave-AUROC in external validation:{np.mean(strap_stats_class5['auc']):.4f}", f"subclass5- Std-AUROC in external validation: {np.std(strap_stats_class5['auc']):.4f}")
print(f"subclass5- Ave-precision in external validation: {np.mean(bootstrap_stats_class5['precision']):.4f}", f"subclass5- Std-precision in external validation: {np.std(bootstrap_stats_class5['precision']):.4f}")
print(f"subclass5- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class5['accuracy']):.4f}", f"subclass5- Std-accuracy in external validation: {np.std(bootstrap_stats_class5['accuracy']):.4f}")
print(f"subclass5- Ave-F1 in external validation: {np.mean(bootstrap_stats_class5['F1']):.4f}", f"subclass5- Std-F1 in external validation: {np.std(bootstrap_stats_class5['F1']):.4f}")
print(f"subclass5- Ave-AUPRC in external validation: {np.mean(strap_stats_class5['prc']):.4f}", f"subclass5- Std-AUPRC in external validation: {np.std(strap_stats_class5['prc']):.4f}")

print(f"subclass6- Ave-specificity in external validation: {np.mean(bootstrap_stats_class6['specificity']):.4f}", f"subclass6- Std-specificity in external validation: {np.std(bootstrap_stats_class6['specificity']):.4f}")
print(f"subclass6- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class6['recall']):.4f}", f"subclass6- Std-sensitivity in external validation: {np.std(bootstrap_stats_class6['recall']):.4f}")
print(f"subclass6- Ave-AUROC in external validation: {np.mean(strap_stats_class6['auc']):.4f}", f"subclass6- Std-AUROC in external validation: {np.std(strap_stats_class6['auc']):.4f}")
print(f"subclass6- Ave-precision in external validation: {np.mean(bootstrap_stats_class6['precision']):.4f}", f"subclass6- Std-precision in external validation: {np.std(bootstrap_stats_class6['precision']):.4f}")
print(f"subclass6- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class6['accuracy']):.4f}", f"subclass6- Std-accuracy in external validation: {np.std(bootstrap_stats_class6['accuracy']):.4f}")
print(f"subclass6- Ave-F1 in external validation: {np.mean(bootstrap_stats_class6['F1']):.4f}", f"subclass6- Std-F1 in external validation: {np.std(bootstrap_stats_class6['F1']):.4f}")
print(f"subclass6- Ave-AUPRC in external validation: {np.mean(strap_stats_class6['prc']):.4f}", f"subclass6- Std-AUPRC in external validation: {np.std(strap_stats_class6['prc']):.4f}")

print(f"subclass1- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class1['specificity']):.4f}", f"subclass1- Std-specificity in internal validation: {np.std(bootstrap_stat_class1['specificity']):.4f}")
print(f"subclass1- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class1['recall']):.4f}", f"subclass1- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class1['recall']):.4f}")
print(f"subclass1- Ave-AUROC in internal validation: {np.mean(strap_stat_class1['auc']):.4f}", f"subclass1- Std-AUROC in internal validation: {np.std(strap_stat_class1['auc']):.4f}")
print(f"subclass1- Ave-precision in internal validation: {np.mean(bootstrap_stat_class1['precision']):.4f}", f"subclass1- Std-precision in internal validation: {np.std(bootstrap_stat_class1['precision']):.4f}")
print(f"subclass1- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class1['accuracy']):.4f}", f"subclass1- Std-accuracy in internal validation: {np.std(bootstrap_stat_class1['accuracy']):.4f}")
print(f"subclass1- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class1['F1']):.4f}", f"subclass1- Std-F1 in internal validation: {np.std(bootstrap_stat_class1['F1']):.4f}")
print(f"subclass1- Ave-AUPRC in internal validation: {np.mean(strap_stat_class1['prc']):.4f}", f"subclass1- Std-AUPRC in internal validation: {np.std(strap_stat_class1['prc']):.4f}")

print(f"subclass2- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class2['specificity']):.4f}", f"subclass2- Std-specificity in internal validation: {np.std(bootstrap_stat_class2['specificity']):.4f}")
print(f"subclass2- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class2['recall']):.4f}", f"subclass2- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class2['recall']):.4f}")
print(f"subclass2- Ave-AUROC in internal validation: {np.mean(strap_stat_class2['auc']):.4f}", f"subclass2- Std-AUROC in internal validation: {np.std(strap_stat_class2['auc']):.4f}")
print(f"subclass2- Ave-precision in internal validation: {np.mean(bootstrap_stat_class2['precision']):.4f}", f"subclass2- Std-precision in internal validation: {np.std(bootstrap_stat_class2['precision']):.4f}")
print(f"subclass2- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class2['accuracy']):.4f}", f"subclass2- Std-accuracy in internal validation: {np.std(bootstrap_stat_class2['accuracy']):.4f}")
print(f"subclass2- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class2['F1']):.4f}", f"subclass2- Std-F1 in internal validation: {np.std(bootstrap_stat_class2['F1']):.4f}")
print(f"subclass2- Ave-AUPRC in internal cross-validation: {np.mean(strap_stat_class2['prc']):.4f}", f"subclass2- Std-AUPRC in internal validation: {np.std(strap_stat_class2['prc']):.4f}")

print(f"subclass3- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class3['specificity']):.4f}", f"subclass3- Std-specificity in internal validation: {np.std(bootstrap_stat_class3['specificity']):.4f}")
print(f"subclass3- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class3['recall']):.4f}", f"subclass3- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class3['recall']):.4f}")
print(f"subclass3- Ave-AUROC in internal validation: {np.mean(strap_stat_class3['auc']):.4f}", f"subclass3- Std-AUROC in internal validation: {np.std(strap_stat_class3['auc']):.4f}")
print(f"subclass3- Ave-precision in internal validation: {np.mean(bootstrap_stat_class3['precision']):.4f}", f"subclass3- Std-precision in internal validation: {np.std(bootstrap_stat_class3['precision']):.4f}")
print(f"subclass3- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class3['accuracy']):.4f}", f"subclass3- Std-accuracy in internal validation: {np.std(bootstrap_stat_class3['accuracy']):.4f}")
print(f"subclass3- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class3['F1']):.4f}", f"subclass3- Std-F1 in internal validation: {np.std(bootstrap_stat_class3['F1']):.4f}")
print(f"subclass3- Ave-AUPRC in internal validation: {np.mean(strap_stat_class3['prc']):.4f}", f"subclass3 Std-AUPRC in internal validation: {np.std(strap_stat_class3['prc']):.4f}")

print(f"subclass4- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class4['specificity']):.4f}", f"subclass4- Std-specificity in internal validation: {np.std(bootstrap_stat_class4['specificity']):.4f}")
print(f"subclass4- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class4['recall']):.4f}", f"subclass4- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class4['recall']):.4f}")
print(f"subclass4- Ave-AUROC in internal validation:{np.mean(strap_stat_class4['auc']):.4f}", f"subclass4- Std-AUROC in internal validation: {np.std(strap_stat_class4['auc']):.4f}")
print(f"subclass4- Ave-precision in internal validation: {np.mean(bootstrap_stat_class4['precision']):.4f}", f"subclass4- Std-precision in internal validation: {np.std(bootstrap_stat_class4['precision']):.4f}")
print(f"subclass4- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class4['accuracy']):.4f}", f"subclass4- Std-accuracy in internal validation: {np.std(bootstrap_stat_class4['accuracy']):.4f}")
print(f"subclass4- Ave-F1 in internal validation:{np.mean(bootstrap_stat_class4['F1']):.4f}", f"subclass4- Std-F1 in internal validation: {np.std(bootstrap_stat_class4['F1']):.4f}")
print(f"subclass4- Ave-AUPRC in internal validation: {np.mean(strap_stat_class4['prc']):.4f}", f"subclass4- Std-AUPRC in internal validation: {np.std(strap_stat_class4['prc']):.4f}")

print(f"subclass5- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class5['specificity']):.4f}", f"subclass5- Std-specificity in internal validation: {np.std(bootstrap_stat_class5['specificity']):.4f}")
print(f"subclass5- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class5['recall']):.4f}", f"subclass5- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class5['recall']):.4f}")
print(f"subclass5- Ave-AUROC in internal validation: {np.mean(strap_stat_class5['auc']):.4f}", f"subclass5- Std-AUROC in internal validation: {np.std(strap_stat_class5['auc']):.4f}")
print(f"subclass5- Ave-precision in internal validation: {np.mean(bootstrap_stat_class5['precision']):.4f}", f"subclass5- Std-precision in internal validation: {np.std(bootstrap_stat_class5['precision']):.4f}")
print(f"subclass5- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class5['accuracy']):.4f}", f"subclass5- Std-accuracy in internal validation: {np.std(bootstrap_stat_class5['accuracy']):.4f}")
print(f"subclass5- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class5['F1']):.4f}", f"subclass5- Std-F1 in internal validation: {np.std(bootstrap_stat_class5['F1']):.4f}")
print(f"subclass5- Ave-AUPRC in internal alidation: {np.mean(strap_stat_class5['prc']):.4f}", f"subclass5- Std-AUPRC in internal validation: {np.std(strap_stat_class5['prc']):.4f}")

print(f"subclass6- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class6['specificity']):.4f}", f"subclass6- Std-specificity in internal validation: {np.std(bootstrap_stat_class6['specificity']):.4f}")
print(f"subclass6- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class6['recall']):.4f}", f"subclass6- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class6['recall']):.4f}")
print(f"subclass6- Ave-AUROC in internal validation:{np.mean(strap_stat_class6['auc']):.4f}", f"subclass6- Std-AUROC in internal validation: {np.std(strap_stat_class6['auc']):.4f}")
print(f"subclass6- Ave-precision in internal validation: {np.mean(bootstrap_stat_class6['precision']):.4f}", f"subclass6- Std-precision in internal validation: {np.std(bootstrap_stat_class6['precision']):.4f}")
print(f"subclass6- Ave-accyracy in internal validation:: {np.mean(bootstrap_stat_class6['accuracy']):.4f}", f"subclass6- Std-accuracy in internal validation: {np.std(bootstrap_stat_class6['accuracy']):.4f}")
print(f"subclass6- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class6['F1']):.4f}", f"subclass6- Std-F1 in internal validation: {np.std(bootstrap_stat_class6['F1']):.4f}")
print(f"subclass6- Ave-AUPRC in internal validation:{np.mean(strap_stat_class6['prc']):.4f}", f"subclass6- Std-AUPRC in internal validation: {np.std(strap_stat_class6['prc']):.4f}")


In [None]:
#This is the code for diesease pre-diagnosis using the Ensemble learning-DNN approach.
#This code is only avaliable for Ensemble learning-C(num_labels,K=num_labels)
import numpy as np
import pandas as pd
import random
import itertools
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import warnings
matplotlib.rcParams['font.sans-serif']=['SimHei']
matplotlib.rcParams['axes.unicode_minus']= False
matplotlib.rcParams['font.size']= 11
from pandas import DataFrame
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
from sklearn.metrics import RocCurveDisplay, roc_curve, auc, roc_auc_score
from sklearn.metrics import hamming_loss, f1_score, precision_score, recall_score
from sklearn.metrics import multilabel_confusion_matrix, f1_score, precision_recall_curve
from sklearn.model_selection import train_test_split, KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.utils import resample

# This section imports the data. 
filepath = "C:\\Users\\PS\\Desktop\\Open code\\Demo.csv" #Set the user's own path.
df = pd.read_csv(filepath, header=0, encoding="gbk")
X1 = df.drop(["Pancreas disease", "Biliary tract disease", "Gastroduodenal disease", "Intestinal tract disease", "liver disease", "others"], axis=1)
y = df[["Pancreas disease", "Intestinal tract disease", "liver disease", "Biliary tract disease", "Gastroduodenal disease", "others"]]

# This section generates all possible C(num_labels,K) combinations. 
labels = y.columns.tolist()
num_labels = y.shape[1]
#Use itertools.combinations to generate all possible combinations of K subclasses.
combinations = list(itertools.combinations(labels, num_labels))

#N-fold cross-validation
N = 5
kf = KFold(n_splits=N, shuffle=True, random_state=3)

#逻辑回归
classifier1= LogisticRegression(solver='lbfgs', penalty='l2', dual=False, tol=1e-3, C=1.0, fit_intercept=True,
                                intercept_scaling=1, class_weight='balanced', random_state=None,
                                max_iter=100,verbose=0,warm_start=False, n_jobs=-1)


#weight_Class reflects the class weights.
def weight_Class (y):
    # Calculate weight of each subclass.
    # Initialize weight dictionary.
    class_weights = {}
    # Iterate through each column (label)
    for i, column in enumerate(y.columns):
        # Calculate the frequency of each class
        counts = np.bincount(y[column].astype(int), minlength=2)
        # Calculate the weights, avoiding division by zero.
        total_samples = len(y)
        weights = total_samples / counts
        weights[counts == 0] = 0  # avoiding division by zero
        #Store the weights in a dictionary with class labels as keys and weights as values.
        class_weights[column] = dict({0: weights[0], 1: weights[1]})
        #Create a weight array for each sample in the training set.
    pos_weights = [class_weights[category][1] for category in class_weights]

    return pos_weights

#Label Co-occurrence Adjustment Layer
class FeatureAdjustmentLayer(nn.Module):
    def __init__(self, hidden_size, num_labels, co_occurrence_matrix):
        super(FeatureAdjustmentLayer, self).__init__()
        self.co_occurrence_matrix = nn.Parameter(torch.from_numpy(co_occurrence_matrix).float(), requires_grad=False)
        #Define a linear layer to map from hidden_size to num_labels.
        self.map_to_labels = nn.Linear(hidden_size, num_labels)

    def forward(self, x):
        # Map the hidden layer features to a dimension equal to the number of labels.
        mapped_features = self.map_to_labels(x)  # [batch_size, num_labels]
        # Adjust the features using the co-occurrence matrix.
        # Note: Ensure that the dimensions of mapped_features and co_occurrence_matrix are compatible.
        adjusted_with_cooccurrence = torch.matmul(mapped_features, self.co_occurrence_matrix)  # [batch_size, num_labels]
        return adjusted_with_cooccurrence

#DNN for MultiLabelClassification。
class MultiLabelClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_labels, co_occurrence_matrix):
        super(MultiLabelClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.adjustment_layer = FeatureAdjustmentLayer(hidden_size, num_labels, co_occurrence_matrix)
        self.fc3 = nn.Linear(num_labels, num_labels)  #Ensure the input dimension here is num_labels.

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.adjustment_layer(x)
        output = torch.sigmoid(self.fc3(x))  #Ensure that the input dimension of self.fc3 matches the dimension of x.
        return output

#This function defines the basic method of model training.
def Fit_MLP(X, y, pos_weights, co_occurrence_matrix, num_epochs, hidden_size=64, learning_rate=0.01):
    #device set
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    pos_weights_tensor = torch.tensor(pos_weights, device=device)
    X_Ten = torch.tensor(X.values, dtype=torch.float32).to(device)
    y_Ten = torch.tensor(y.values, dtype=torch.float32).to(device)
    dataset = TensorDataset(X_Ten, y_Ten)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    #Initialize the model, ensuring that the co_occurrence_matrix has been converted to a format suitable for the model.
    model = MultiLabelClassifier(X.shape[1], hidden_size, y.shape[1], co_occurrence_matrix).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weights_tensor)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
   
    for epoch in range(num_epochs):
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    return model

#This is a self-defined GRIDSEARCH function
def GRIDSEARCH (X_train, y_train, pos_weights, co_occurrence_matrix):
    # Define the parameter grid.
    param_grid = {'hidden_size': [8, 16, 32], 'learning_rate': [0.01, 0.05], 'num_epochs': [10, 20, 30]}
    best_clf = None
    best_score = 0
    best_params = {}
    for hidden_size in param_grid['hidden_size']:
        for lr in param_grid['learning_rate']:
            for epochs in param_grid['num_epochs']:
                model = Fit_MLP(X_train, y_train, pos_weights, co_occurrence_matrix, epochs, hidden_size, lr)
                y_pred_proba, y_pred = predict_Multilabel(model, X_train)
                y_true = y_train.values
                #y_pred = y_pred.numpy()
                score = f1_score(y_true, y_pred, average='macro')
                if score > best_score:
                    best_score = score
                    best_params = {'hidden_size': hidden_size, 'learning_rate': lr, 'num_epochs': epochs}
                    best_clf = model
                    
    return best_clf

#This function defines the basic method for multi-label prediction.
def predict_Multilabel(classifier, X):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    classifier = classifier.to(device)
    X_Ten = torch.tensor(X.values, dtype=torch.float32).to(device)
    dataset = TensorDataset(X_Ten)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
    classifier.eval()
    y_pred_proba_combined = []
    with torch.no_grad():
         for batch in dataloader:
                inputs = batch[0].to(device)
                probabilities = classifier(inputs)
                y_pred_proba_combined.append(probabilities.cpu().numpy())
    
    y_pred_proba_combined = np.concatenate(y_pred_proba_combined)
    binary_vector = (y_pred_proba_combined > 0.5).astype(int)
    return y_pred_proba_combined, binary_vector

#This function defines the basic method for multi-label prediction with voting. 
#Compared to the approach for C(num_labels,K<num_labels-1) and C(num_labels,K=num_labels-1) , there are slight differences here.        
def predict_with_rfecv(classifiers, df, num_labels):

    num_rows = len(df) 
    vote_results_list = []  #Store the prediction results for each row.
    prob_results_list = []  #Store the prediction probabilities for each row.
    
    for index, row in df.iterrows():  #Iterate through each row of df.
        predictions = np.zeros((len(classifiers), num_labels), dtype=int)
        predictions_R = np.zeros((len(classifiers), num_labels), dtype=float)
        # Store the prediction results for each label.
        vote_results = np.zeros((num_labels), dtype=int)
        
        for i, (clf, support) in enumerate(classifiers):
            # Apply the features selected by RFECV.
            X_selected = row[support].values.reshape(1, -1)  # 选择行中的特征
            X_selected_d = pd.DataFrame (X_selected)
            # Use the corresponding binary classifier to make predictions.
            clf_predictions_proba, clf_predictions = predict_Multilabel(clf, X_selected_d)
            # The predict_Multilabel function returns a one - dimensional array that contains all the label predictions from the current classifier.
            predictions[i, :] = clf_predictions
            predictions_R[i, :] = clf_predictions_proba
            
        # Conduct voting to obtain the final results.
        for j in range(num_labels):
            votes = np.sum(predictions[:, j])
            vote_results[j] = 1 if votes == np.max(np.sum(predictions, axis=0)) else 0
        vote_results_list.append(vote_results)  # Add the prediction results of the current row to the list.
        prob_results_list.append(np.sum(predictions_R, axis=0))  # Add the prediction probabilities of the current row to the list.
        
    return np.array(prob_results_list), np.array(vote_results_list) 


def predict_with_rfecv_bootstrap(classifiers, df, y_test, n_bootstrap):
    
    #Storage space for results
    bootstrap_stats= {
        'recall': [],
        'auc': [],
        'prc': [],
        'accuracy': [],
        'precision': [],  
        'F1': [],
        'han':[]
    }
    bootstrap_stats_class1= {
        'recall': [],
        'specificity': [],
        'auc': [],
        'prc': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class2={
        'recall': [],
        'specificity': [],
        'auc': [],
        'prc': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class3= {
        'recall': [],
        'specificity': [],
        'auc': [],
        'prc': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class4= {
        'recall': [],
        'specificity': [],
        'auc': [],
        'prc': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class5= {
        'recall': [],
        'specificity': [],
        'auc': [],
        'prc': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    bootstrap_stats_class6= {
        'recall': [],
        'specificity': [],
        'auc': [],
        'prc': [],
        'accuracy': [],
        'precision': [],  
        'F1': [] 
    }
    
    num_rows = len(df) 
    f1_vals = np.zeros(n_bootstrap, dtype=float)
    precision_vals = np.zeros(n_bootstrap, dtype=float)
    accuracy_vals = np.zeros(n_bootstrap, dtype=float)
    recall_vals = np.zeros(n_bootstrap, dtype=float)
    
    for b in range(n_bootstrap):
        indices = resample(df.index, n_samples=num_rows, replace=True, random_state=b)
        df_bootstrap = df.loc[indices]
        y_test_bootstrap = y_test.loc[indices]
        predictions_R, predictions = predict_with_rfecv(classifiers, df_bootstrap, num_labels)  # 确保这个函数返回预测结果
        warnings.filterwarnings('ignore', category=UserWarning)

        hamming_loss_val = hamming_loss(y_test_bootstrap, predictions)
        f1_val = f1_score(y_test_bootstrap, predictions, average='micro')
        precision_val = precision_score(y_test_bootstrap, predictions, average='micro')
        accuracy_val = accuracy_score(y_test_bootstrap, predictions)
        recall_val = recall_score(y_test_bootstrap, predictions, average='micro')
        # Calculate Micro-AUROC
        micro_auc = roc_auc_score(y_test_bootstrap.values.ravel(), predictions_R.ravel(), average='micro')
        precisionq, recallq, _ = precision_recall_curve(y_test_bootstrap.values.ravel(), predictions_R.ravel())
        # Calculate Micro-AUPRC
        micro_prc_auc = auc(recallq, precisionq)
        bootstrap_stats['recall'].append(recall_val)
        bootstrap_stats['precision'].append(precision_val)
        bootstrap_stats['accuracy'].append(accuracy_val)
        bootstrap_stats['F1'].append(f1_val)
        bootstrap_stats['auc'].append(micro_auc)
        bootstrap_stats['prc'].append(micro_prc_auc)
        bootstrap_stats['han'].append(hamming_loss_val)

        # Calculate the multi-label confusion matrix.
        mcm = multilabel_confusion_matrix(y_test_bootstrap, predictions)
        #Compute and store the precision, recall, and F1 score for each subclass.
        n_classes = y_test.shape[1]
        precision = precision_score(y_test_bootstrap, predictions, average=None)
        recall = recall_score(y_test_bootstrap, predictions, average=None)
        f1 = f1_score(y_test_bootstrap, predictions, average=None)

        bootstrap_stats_class1['precision'].append(precision[0])
        bootstrap_stats_class2['precision'].append(precision[1])
        bootstrap_stats_class3['precision'].append(precision[2])
        bootstrap_stats_class4['precision'].append(precision[3])
        bootstrap_stats_class5['precision'].append(precision[4])
        bootstrap_stats_class6['precision'].append(precision[5])
        
        bootstrap_stats_class1['recall'].append(recall[0])
        bootstrap_stats_class2['recall'].append(recall[1])
        bootstrap_stats_class3['recall'].append(recall[2])
        bootstrap_stats_class4['recall'].append(recall[3])
        bootstrap_stats_class5['recall'].append(recall[4])
        bootstrap_stats_class6['recall'].append(recall[5])
        
        bootstrap_stats_class1['F1'].append(f1[0])
        bootstrap_stats_class2['F1'].append(f1[1])
        bootstrap_stats_class3['F1'].append(f1[2])
        bootstrap_stats_class4['F1'].append(f1[3])
        bootstrap_stats_class5['F1'].append(f1[4])
        bootstrap_stats_class6['F1'].append(f1[5])

        #Calculate and store the accuracy for each subclass.
        accuracies = []
        for i in range(n_classes):
            # Extract the true and predicted labels for the i-th class.
            y_true_class = y_test_bootstrap.values
            # Calculate the accuracy for the i-th class.
            accuracy_class = accuracy_score(y_true_class[:, i], predictions[:, i])
            if i==0:
                bootstrap_stats_class1['accuracy'].append(accuracy_class)
            elif i==1:
                bootstrap_stats_class2['accuracy'].append(accuracy_class)
            elif i==2:
                bootstrap_stats_class3['accuracy'].append(accuracy_class)
            elif i==3:
                bootstrap_stats_class4['accuracy'].append(accuracy_class)
            elif i==4:
                bootstrap_stats_class5['accuracy'].append(accuracy_class)
            elif i==5:
                bootstrap_stats_class6['accuracy'].append(accuracy_class)
       
        # Calculate and store the specificity for each class
        specificities = []
        for i in range(n_classes):
            #True Negatives = Sum of diagonal elements - True Positives of the current class.
            true_negatives = np.sum(mcm[:, 0, 0]) - mcm[i, 0, 0]
            # False Positives = Sum of the elements in current row - True Positives.
            false_positives = np.sum(mcm[i, 0, 1])
            # Calculate specificities
            specificity = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
            if i==0:
                bootstrap_stats_class1['specificity'].append(specificity)
            elif i==1:
                bootstrap_stats_class2['specificity'].append(specificity)
            elif i==2:
                bootstrap_stats_class3['specificity'].append(specificity)
            elif i==3:
                bootstrap_stats_class4['specificity'].append(specificity)
            elif i==4:
                bootstrap_stats_class5['specificity'].append(specificity)
            elif i==5:
                bootstrap_stats_class6['specificity'].append(specificity)

        fprs = dict()
        tprs = dict()
        roc_aucs = dict()
        #Calculate the AUROC for each class.
        for i in range(n_classes):
            fprs[i], tprs[i], _ = roc_curve(y_true_class[:, i], predictions_R[:, i])
            roc_aucs[i] = auc(fprs[i], tprs[i])
            if i==0:
               bootstrap_stats_class1['auc'].append(roc_aucs[0])
            elif i==1:
               bootstrap_stats_class2['auc'].append(roc_aucs[1])
            elif i==2:
               bootstrap_stats_class3['auc'].append(roc_aucs[2])
            elif i==3:
               bootstrap_stats_class4['auc'].append(roc_aucs[3])
            elif i==4:
               bootstrap_stats_class5['auc'].append(roc_aucs[4])
            elif i==5:
               bootstrap_stats_class6['auc'].append(roc_aucs[5])


        # Calculate the AUPRC for each class.
        precisions = dict()
        recalls = dict()
        pr_aucs = dict()
        for i in range(n_classes):
            precisions[i], recalls[i], _ = precision_recall_curve(y_true_class[:, i], predictions_R[:, i])
            pr_aucs[i] = auc(recalls[i], precisions[i])
            if i==0:
               bootstrap_stats_class1['prc'].append(pr_aucs[0])
            elif i==1:
               bootstrap_stats_class2['prc'].append(pr_aucs[1])
            elif i==2:
               bootstrap_stats_class3['prc'].append(pr_aucs[2])
            elif i==3:
               bootstrap_stats_class4['prc'].append(pr_aucs[3])
            elif i==4:
               bootstrap_stats_class5['prc'].append(pr_aucs[4])
            elif i==5:
               bootstrap_stats_class6['prc'].append(pr_aucs[5])
    
    return bootstrap_stats, bootstrap_stats_class1, bootstrap_stats_class2, bootstrap_stats_class3, bootstrap_stats_class4, bootstrap_stats_class5, bootstrap_stats_class6
    
#Automatically identify continuous and binary variables.
continuous_vars = []
binary_vars = []
for col in X1.columns:
# Automatically identify binary variables by iterating through each column in the dataset. 
#If a column is numeric, has exactly two unique values, and those values are 0 and 1, it is considered a binary variable.
    if X1[col].dtype.kind in 'biufc' and X1[col].nunique() == 2 and set(X1[col].unique()) == {0, 1}:
       binary_vars.append(col)
    # Otherwise, it is considered a continuous variable.
    else:
       continuous_vars.append(col)
    # Define variable groups.
    groups = {
        'Continuous': continuous_vars,
         'Binary': binary_vars
    }

# Apply logarithmic transformation and standardization to continuous variables.
log_X1 = X1.copy()
log_X1[continuous_vars] = np.log1p(X1[continuous_vars])  #Apply logarithmic transformation to continuous variables.
scaler = MinMaxScaler()
X_scaled = log_X1.copy()
X_scaled[continuous_vars] = scaler.fit_transform(X_scaled[continuous_vars])  #Apply standardization to continuous variables.

# Perform train - test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.20, random_state=5)

#Initialize the lists for classifiers and supports for each label.
classifiers1 = []
label_supports = []

#Perform RFECV feature selection separately for each category label.
for j in range(y_train.shape[1]):
    rfecv = RFECV(estimator=classifier1, step=1, cv=5, scoring='accuracy', n_jobs=-1)
    rfecv.fit(X_train, y_train.iloc[:, j])
    label_supports.append(rfecv.support_)

# Iterate through all combinations of C(num_labels,num_labels) categories.
for combo in combinations:
    subset = pd.concat([X_train, y_train.loc[:, combo]], axis=1)
    filtered_subset = subset[subset.sum(axis=1) > 0]
    #filtered_subset = subset[y_train[combo_list].sum(axis=1) > 0]
    columns_to_drop = [col for col in combo if col in filtered_subset.columns]
    X1_train = filtered_subset.drop(columns_to_drop, axis=1)
    y1_train = filtered_subset.loc[:, combo]

    #Combine the support features of all labels involved in the current combination.
    combo_support = np.logical_or.reduce([label_supports[j] for j in range(y_train.shape[1]) if labels[j] in combo])
    
    best_clf = None
    best_accuracy = 1.0
    for train_index, test_index in kf.split(filtered_subset):
        X_T_train, X_T_test = X1_train.iloc[train_index], X1_train.iloc[test_index]
        y_T_train, y_T_test = y1_train.iloc[train_index], y1_train.iloc[test_index]
        #Use the combined support vector(Index vector) to select features.
        X_T_train_selected = X_T_train.loc[:, X_T_train.columns[combo_support]]
        X_T_test_selected = X_T_test.loc[:, X_T_test.columns[combo_support]]
        # Train the classifiers.
        co_occurrence_matrix = np.dot(y_T_train.values.T, y_T_train.values) / y_T_train.values.shape[0]
        pos_weights = weight_Class (y_T_train)
        model = GRIDSEARCH (X_T_train_selected, y_T_train, pos_weights, co_occurrence_matrix)
        y_pred_proba, y_pred = predict_Multilabel(model, X_T_test_selected)
        hamming_loss_val = hamming_loss(y_T_test, y_pred)
        #If the current model's Hamming Loss is smaller, then update the best model.
        if hamming_loss_val < best_accuracy:
            best_accuracy = hamming_loss_val
            best_clf = (model, combo_support)
    if best_clf:
        classifiers1.append(best_clf)

# Use the prediction function for validation with bootstrap
warnings.filterwarnings('ignore', category=UserWarning)
bootstrap_stats, bootstrap_stats_class1, bootstrap_stats_class2, bootstrap_stats_class3, bootstrap_stats_class4, bootstrap_stats_class5, bootstrap_stats_class6 = predict_with_rfecv_bootstrap(classifiers1, X_test, y_test, n_bootstrap=500)
bootstrap_stat, bootstrap_stat_class1, bootstrap_stat_class2, bootstrap_stat_class3, bootstrap_stat_class4, bootstrap_stat_class5, bootstrap_stat_class6 = predict_with_rfecv_bootstrap(classifiers1, X_train, y_train, n_bootstrap=500)

# Print the metric results
print(f"micro- Ave-hamming distance in external validation: {np.mean(bootstrap_stats['han']):.4f}", f"micro- Std-hamming distance in external validation: {np.std(bootstrap_stats['han']):.4f}")
print(f"micro- Ave-sensitivity in external validation: {np.mean(bootstrap_stats['recall']):.4f}", f"micro- Std-sensitivity in external validation: {np.std(bootstrap_stats['recall']):.4f}")
print(f"micro- Ave-AUROC in external validation: {np.mean(bootstrap_stats['auc']):.4f}", f"micro- Std-AUROC in external validation: {np.std(bootstrap_stats['auc']):.4f}")
print(f"micro- Ave-precision in external validation: {np.mean(bootstrap_stats['precision']):.4f}", f"micro- Std-precision in external validation: {np.std(bootstrap_stats['precision']):.4f}")
print(f"subset-Ave-accyracy in external validation: {np.mean(bootstrap_stats['accuracy']):.4f}", f"subset- Std-accuracy in external validation: {np.std(bootstrap_stats['accuracy']):.4f}")
print(f"micro- Ave-F1 in external validation: {np.mean(bootstrap_stats['F1']):.4f}", f"micro- Std-F1 in external validation: {np.std(bootstrap_stats['F1']):.4f}")
print(f"micro- Ave-AUPRC in external validation: {np.mean(bootstrap_stats['prc']):.4f}", f"micro- Std-F1 in external validation: {np.std(bootstrap_stats['prc']):.4f}")

print(f"micro- Ave-hamming distance in internal validation: {np.mean(bootstrap_stat['han']):.4f}", f"micro- Std-hamming distance in internal validation: {np.std(bootstrap_stat['han']):.4f}")
print(f"micro- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat['recall']):.4f}", f"micro- Std-sensitivity in internal validation: {np.std(bootstrap_stat['recall']):.4f}")
print(f"micro- Ave-AUROC in internal validation: {np.mean(bootstrap_stat['auc']):.4f}", f"micro- Std-AUROC in internal cross-validation: {np.std(bootstrap_stat['auc']):.4f}")
print(f"micro- Ave-precision in internal validation: {np.mean(bootstrap_stat['precision']):.4f}", f"micro- Std-precision in internal validation:  {np.std(bootstrap_stat['precision']):.4f}")
print(f"subset-Ave-accyracy in internal validation: {np.mean(bootstrap_stat['accuracy']):.4f}", f"subset- Std-accuracy in internal validation: {np.std(bootstrap_stat['accuracy']):.4f}")
print(f"micro- Ave-F1 in internal validation: {np.mean(bootstrap_stat['F1']):.4f}", f"micro- Std-F1 in internal validation: {np.std(bootstrap_stat['F1']):.4f}")
print(f"micro- Ave-AUPRC in internal validation: {np.mean(bootstrap_stat['prc']):.4f}", f"micro- Std-F1 in internal validation: {np.std(bootstrap_stat['prc']):.4f}")

print(f"subclass1- Ave-specificity in external validation: {np.mean(bootstrap_stats_class1['specificity']):.4f}", f"subclass1- Std-specificity in external validation: {np.std(bootstrap_stats_class1['specificity']):.4f}")
print(f"subclass1- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class1['recall']):.4f}", f"subclass1- Std-sensitivity in external validation: {np.std(bootstrap_stats_class1['recall']):.4f}")
print(f"subclass1- Ave-AUROC in external validation: {np.mean(bootstrap_stats_class1['auc']):.4f}", f"subclass1- Std-AUROC in external validation: {np.std(bootstrap_stats_class1['auc']):.4f}")
print(f"subclass1- Ave-precision in external validation: {np.mean(bootstrap_stats_class1['precision']):.4f}", f"subclass1- Std-precision in external validation: {np.std(bootstrap_stats_class1['precision']):.4f}")
print(f"subclass1- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class1['accuracy']):.4f}", f"subclass1- Std-accuracy in external validation: {np.std(bootstrap_stats_class1['accuracy']):.4f}")
print(f"subclass1- Ave-F1 in external validation: {np.mean(bootstrap_stats_class1['F1']):.4f}", f"subclass1- Std-F1 in external validation: {np.std(bootstrap_stats_class1['F1']):.4f}")
print(f"subclass1- Ave-AUPRC in external validation: {np.mean(bootstrap_stats_class1['prc']):.4f}", f"subclass1- Std-AUPRC in external validation: {np.std(bootstrap_stats_class1['prc']):.4f}")

print(f"subclass2- Ave-specificity in external validation: {np.mean(bootstrap_stats_class2['specificity']):.4f}", f"subclass2- Std-specificity in external validation: {np.std(bootstrap_stats_class2['specificity']):.4f}")
print(f"subclass2- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class2['recall']):.4f}", f"subclass2- Std-sensitivity in external validation: {np.std(bootstrap_stats_class2['recall']):.4f}")
print(f"subclass2- Ave-AUROC in external validation: {np.mean(bootstrap_stats_class2['auc']):.4f}", f"subclass2- Std-AUROC in external validation: {np.std(bootstrap_stats_class2['auc']):.4f}")
print(f"subclass2- Ave-precision in external validation: {np.mean(bootstrap_stats_class2['precision']):.4f}", f"subclass2- Std-precision in external validation: {np.std(bootstrap_stats_class2['precision']):.4f}")
print(f"subclass2- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class2['accuracy']):.4f}", f"subclass2- Std-accuracy in external validation: {np.std(bootstrap_stats_class2['accuracy']):.4f}")
print(f"subclass2- Ave-F1 in external validation: {np.mean(bootstrap_stats_class2['F1']):.4f}", f"subclass2- Std-F1 in external validation: {np.std(bootstrap_stats_class2['F1']):.4f}")
print(f"subclass2- Ave-AUPRC in external validation: {np.mean(bootstrap_stats_class2['prc']):.4f}", f"subclass2- Std-AUPRC in external validation: {np.std(bootstrap_stats_class2['prc']):.4f}")

print(f"subclass3- Ave-specificity in external validation: {np.mean(bootstrap_stats_class3['specificity']):.4f}", f"subclass3- Std-specificity in external validation: {np.std(bootstrap_stats_class3['specificity']):.4f}")
print(f"subclass3- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class3['recall']):.4f}", f"subclass3- Std-sensitivity in external validation: {np.std(bootstrap_stats_class3['recall']):.4f}")
print(f"subclass3- Ave-AUROC in external validation: {np.mean(bootstrap_stats_class3['auc']):.4f}", f"subclass3- Std-AUROC in external validation: {np.std(bootstrap_stats_class3['auc']):.4f}")
print(f"subclass3- Ave-precision in external validation: {np.mean(bootstrap_stats_class3['precision']):.4f}", f"subclass3- Std-precision in external validation: {np.std(bootstrap_stats_class3['precision']):.4f}")
print(f"subclass3- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class3['accuracy']):.4f}", f"subclass3- Std-accuracy in external validation: {np.std(bootstrap_stats_class3['accuracy']):.4f}")
print(f"subclass3- Ave-F1 in external validation: {np.mean(bootstrap_stats_class3['F1']):.4f}", f"subclass3- Std-F1 in external validation: {np.std(bootstrap_stats_class3['F1']):.4f}")
print(f"subclass3- Ave-AUPRC in external validation: {np.mean(bootstrap_stats_class3['prc']):.4f}", f"subclass3- Std-AUPRC in external validation: {np.std(bootstrap_stats_class3['prc']):.4f}")

print(f"subclass4- Ave-specificity in external validation: {np.mean(bootstrap_stats_class4['specificity']):.4f}", f"subclass4- Std-specificity in external validation: {np.std(bootstrap_stats_class4['specificity']):.4f}")
print(f"subclass4- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class4['recall']):.4f}", f"subclass4- Std-sensitivity in external validation: {np.std(bootstrap_stats_class4['recall']):.4f}")
print(f"subclass4- Ave-AUROC in external validation: {np.mean(bootstrap_stats_class4['auc']):.4f}", f"subclass4- Std-AUROC in external validation: {np.std(bootstrap_stats_class4['auc']):.4f}")
print(f"subclass4- Ave-precision in external validation: {np.mean(bootstrap_stats_class4['precision']):.4f}", f"subclass4- Std-precision in external validation: {np.std(bootstrap_stats_class4['precision']):.4f}")
print(f"subclass4- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class4['accuracy']):.4f}", f"subclass4- Std-accuracy in external validation: {np.std(bootstrap_stats_class4['accuracy']):.4f}")
print(f"subclass4- Ave-F1 in external validation: {np.mean(bootstrap_stats_class4['F1']):.4f}", f"subclass4- Std-F1 in external validation: {np.std(bootstrap_stats_class4['F1']):.4f}")
print(f"subclass4- Ave-AUPRC in external validation: {np.mean(bootstrap_stats_class4['prc']):.4f}", f"subclass4- Std-AUPRC in external validation: {np.std(bootstrap_stats_class4['prc']):.4f}")

print(f"subclass5- Ave-specificity in external validation:{np.mean(bootstrap_stats_class5['specificity']):.4f}", f"subclass5- Std-specificity in external validation: {np.std(bootstrap_stats_class5['specificity']):.4f}")
print(f"subclass5- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class5['recall']):.4f}", f"subclass5- Std-sensitivity in external validation:{np.std(bootstrap_stats_class5['recall']):.4f}")
print(f"subclass5- Ave-AUROC in external validation:{np.mean(bootstrap_stats_class5['auc']):.4f}", f"subclass5- Std-AUROC in external validation: {np.std(bootstrap_stats_class5['auc']):.4f}")
print(f"subclass5- Ave-precision in external validation: {np.mean(bootstrap_stats_class5['precision']):.4f}", f"subclass5- Std-precision in external validation: {np.std(bootstrap_stats_class5['precision']):.4f}")
print(f"subclass5- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class5['accuracy']):.4f}", f"subclass5- Std-accuracy in external validation: {np.std(bootstrap_stats_class5['accuracy']):.4f}")
print(f"subclass5- Ave-F1 in external validation: {np.mean(bootstrap_stats_class5['F1']):.4f}", f"subclass5- Std-F1 in external validation: {np.std(bootstrap_stats_class5['F1']):.4f}")
print(f"subclass5- Ave-AUPRC in external validation: {np.mean(bootstrap_stats_class5['prc']):.4f}", f"subclass5- Std-AUPRC in external validation: {np.std(bootstrap_stats_class5['prc']):.4f}")

print(f"subclass6- Ave-specificity in external validation: {np.mean(bootstrap_stats_class6['specificity']):.4f}", f"subclass6- Std-specificity in external validation: {np.std(bootstrap_stats_class6['specificity']):.4f}")
print(f"subclass6- Ave-sensitivity in external validation: {np.mean(bootstrap_stats_class6['recall']):.4f}", f"subclass6- Std-sensitivity in external validation: {np.std(bootstrap_stats_class6['recall']):.4f}")
print(f"subclass6- Ave-AUROC in external validation: {np.mean(bootstrap_stats_class6['auc']):.4f}", f"subclass6- Std-AUROC in external validation: {np.std(bootstrap_stats_class6['auc']):.4f}")
print(f"subclass6- Ave-precision in external validation: {np.mean(bootstrap_stats_class6['precision']):.4f}", f"subclass6- Std-precision in external validation: {np.std(bootstrap_stats_class6['precision']):.4f}")
print(f"subclass6- Ave-accyracy in external validation: {np.mean(bootstrap_stats_class6['accuracy']):.4f}", f"subclass6- Std-accuracy in external validation: {np.std(bootstrap_stats_class6['accuracy']):.4f}")
print(f"subclass6- Ave-F1 in external validation: {np.mean(bootstrap_stats_class6['F1']):.4f}", f"subclass6- Std-F1 in external validation: {np.std(bootstrap_stats_class6['F1']):.4f}")
print(f"subclass6- Ave-AUPRC in external validation: {np.mean(bootstrap_stats_class6['prc']):.4f}", f"subclass6- Std-AUPRC in external validation: {np.std(bootstrap_stats_class6['prc']):.4f}")

print(f"subclass1- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class1['specificity']):.4f}", f"subclass1- Std-specificity in internal validation: {np.std(bootstrap_stat_class1['specificity']):.4f}")
print(f"subclass1- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class1['recall']):.4f}", f"subclass1- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class1['recall']):.4f}")
print(f"subclass1- Ave-AUROC in internal validation: {np.mean(bootstrap_stat_class1['auc']):.4f}", f"subclass1- Std-AUROC in internal validation: {np.std(bootstrap_stat_class1['auc']):.4f}")
print(f"subclass1- Ave-precision in internal validation: {np.mean(bootstrap_stat_class1['precision']):.4f}", f"subclass1- Std-precision in internal validation: {np.std(bootstrap_stat_class1['precision']):.4f}")
print(f"subclass1- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class1['accuracy']):.4f}", f"subclass1- Std-accuracy in internal validation: {np.std(bootstrap_stat_class1['accuracy']):.4f}")
print(f"subclass1- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class1['F1']):.4f}", f"subclass1- Std-F1 in internal validation: {np.std(bootstrap_stat_class1['F1']):.4f}")
print(f"subclass1- Ave-AUPRC in internal validation: {np.mean(bootstrap_stat_class1['prc']):.4f}", f"subclass1- Std-AUPRC in internal validation: {np.std(bootstrap_stat_class1['prc']):.4f}")

print(f"subclass2- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class2['specificity']):.4f}", f"subclass2- Std-specificity in internal validation: {np.std(bootstrap_stat_class2['specificity']):.4f}")
print(f"subclass2- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class2['recall']):.4f}", f"subclass2- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class2['recall']):.4f}")
print(f"subclass2- Ave-AUROC in internal validation: {np.mean(bootstrap_stat_class2['auc']):.4f}", f"subclass2- Std-AUROC in internal validation: {np.std(bootstrap_stat_class2['auc']):.4f}")
print(f"subclass2- Ave-precision in internal validation: {np.mean(bootstrap_stat_class2['precision']):.4f}", f"subclass2- Std-precision in internal validation: {np.std(bootstrap_stat_class2['precision']):.4f}")
print(f"subclass2- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class2['accuracy']):.4f}", f"subclass2- Std-accuracy in internal validation: {np.std(bootstrap_stat_class2['accuracy']):.4f}")
print(f"subclass2- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class2['F1']):.4f}", f"subclass2- Std-F1 in internal validation: {np.std(bootstrap_stat_class2['F1']):.4f}")
print(f"subclass2- Ave-AUPRC in internal cross-validation: {np.mean(bootstrap_stat_class2['prc']):.4f}", f"subclass2- Std-AUPRC in internal validation: {np.std(bootstrap_stat_class2['prc']):.4f}")

print(f"subclass3- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class3['specificity']):.4f}", f"subclass3- Std-specificity in internal validation: {np.std(bootstrap_stat_class3['specificity']):.4f}")
print(f"subclass3- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class3['recall']):.4f}", f"subclass3- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class3['recall']):.4f}")
print(f"subclass3- Ave-AUROC in internal validation: {np.mean(bootstrap_stat_class3['auc']):.4f}", f"subclass3- Std-AUROC in internal validation: {np.std(bootstrap_stat_class3['auc']):.4f}")
print(f"subclass3- Ave-precision in internal validation: {np.mean(bootstrap_stat_class3['precision']):.4f}", f"subclass3- Std-precision in internal validation: {np.std(bootstrap_stat_class3['precision']):.4f}")
print(f"subclass3- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class3['accuracy']):.4f}", f"subclass3- Std-accuracy in internal validation: {np.std(bootstrap_stat_class3['accuracy']):.4f}")
print(f"subclass3- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class3['F1']):.4f}", f"subclass3- Std-F1 in internal validation: {np.std(bootstrap_stat_class3['F1']):.4f}")
print(f"subclass3- Ave-AUPRC in internal validation: {np.mean(bootstrap_stat_class3['prc']):.4f}", f"subclass3 Std-AUPRC in internal validation: {np.std(bootstrap_stat_class3['prc']):.4f}")

print(f"subclass4- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class4['specificity']):.4f}", f"subclass4- Std-specificity in internal validation: {np.std(bootstrap_stat_class4['specificity']):.4f}")
print(f"subclass4- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class4['recall']):.4f}", f"subclass4- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class4['recall']):.4f}")
print(f"subclass4- Ave-AUROC in internal validation:{np.mean(bootstrap_stat_class4['auc']):.4f}", f"subclass4- Std-AUROC in internal validation: {np.std(bootstrap_stat_class4['auc']):.4f}")
print(f"subclass4- Ave-precision in internal validation: {np.mean(bootstrap_stat_class4['precision']):.4f}", f"subclass4- Std-precision in internal validation: {np.std(bootstrap_stat_class4['precision']):.4f}")
print(f"subclass4- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class4['accuracy']):.4f}", f"subclass4- Std-accuracy in internal validation: {np.std(bootstrap_stat_class4['accuracy']):.4f}")
print(f"subclass4- Ave-F1 in internal validation:{np.mean(bootstrap_stat_class4['F1']):.4f}", f"subclass4- Std-F1 in internal validation: {np.std(bootstrap_stat_class4['F1']):.4f}")
print(f"subclass4- Ave-AUPRC in internal validation: {np.mean(bootstrap_stat_class4['prc']):.4f}", f"subclass4- Std-AUPRC in internal validation: {np.std(bootstrap_stat_class4['prc']):.4f}")

print(f"subclass5- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class5['specificity']):.4f}", f"subclass5- Std-specificity in internal validation: {np.std(bootstrap_stat_class5['specificity']):.4f}")
print(f"subclass5- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class5['recall']):.4f}", f"subclass5- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class5['recall']):.4f}")
print(f"subclass5- Ave-AUROC in internal validation: {np.mean(bootstrap_stat_class5['auc']):.4f}", f"subclass5- Std-AUROC in internal validation: {np.std(bootstrap_stat_class5['auc']):.4f}")
print(f"subclass5- Ave-precision in internal validation: {np.mean(bootstrap_stat_class5['precision']):.4f}", f"subclass5- Std-precision in internal validation: {np.std(bootstrap_stat_class5['precision']):.4f}")
print(f"subclass5- Ave-accyracy in internal validation: {np.mean(bootstrap_stat_class5['accuracy']):.4f}", f"subclass5- Std-accuracy in internal validation: {np.std(bootstrap_stat_class5['accuracy']):.4f}")
print(f"subclass5- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class5['F1']):.4f}", f"subclass5- Std-F1 in internal validation: {np.std(bootstrap_stat_class5['F1']):.4f}")
print(f"subclass5- Ave-AUPRC in internal alidation: {np.mean(bootstrap_stat_class5['prc']):.4f}", f"subclass5- Std-AUPRC in internal validation: {np.std(bootstrap_stat_class5['prc']):.4f}")

print(f"subclass6- Ave-specificity in internal validation: {np.mean(bootstrap_stat_class6['specificity']):.4f}", f"subclass6- Std-specificity in internal validation: {np.std(bootstrap_stat_class6['specificity']):.4f}")
print(f"subclass6- Ave-sensitivity in internal validation: {np.mean(bootstrap_stat_class6['recall']):.4f}", f"subclass6- Std-sensitivity in internal validation: {np.std(bootstrap_stat_class6['recall']):.4f}")
print(f"subclass6- Ave-AUROC in internal validation:{np.mean(bootstrap_stat_class6['auc']):.4f}", f"subclass6- Std-AUROC in internal validation: {np.std(bootstrap_stat_class6['auc']):.4f}")
print(f"subclass6- Ave-precision in internal validation: {np.mean(bootstrap_stat_class6['precision']):.4f}", f"subclass6- Std-precision in internal validation: {np.std(bootstrap_stat_class6['precision']):.4f}")
print(f"subclass6- Ave-accyracy in internal validation:: {np.mean(bootstrap_stat_class6['accuracy']):.4f}", f"subclass6- Std-accuracy in internal validation: {np.std(bootstrap_stat_class6['accuracy']):.4f}")
print(f"subclass6- Ave-F1 in internal validation: {np.mean(bootstrap_stat_class6['F1']):.4f}", f"subclass6- Std-F1 in internal validation: {np.std(bootstrap_stat_class6['F1']):.4f}")
print(f"subclass6- Ave-AUPRC in internal validation:{np.mean(bootstrap_stat_class6['prc']):.4f}", f"subclass6- Std-AUPRC in internal validation: {np.std(bootstrap_stat_class6['prc']):.4f}")
