In [1]:
import warnings, os
warnings.filterwarnings("ignore")

from copy import copy
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, precision_recall_curve, accuracy_score
from sklearn.metrics import roc_auc_score, matthews_corrcoef, precision_score, recall_score, f1_score


import seaborn as sns
import matplotlib.pyplot as plt
from joblib import dump, load

from aggmap import AggMap, loadmap
from aggmap import AggMapNet as AggModel

from aggmap.AggMapNet import load_model, save_model
from aggmap import show


np.random.seed(666) #just for reaptable results


def score(dfr):
    y_true = dfr.y_true
    y_score = dfr.y_score
    y_pred = dfr.y_pred

    '''
    the metrics are taken from orignal paper:
    Meta-Signer: Metagenomic Signature Identifier based on Rank Aggregation of Features
    https://github.com/YDaiLab/Meta-Signer/blob/bd6a1cd98d1035f848ecb6e53d9ee67a85871db2/src/utils/metasigner_io.py#L34
    '''
    auc = roc_auc_score(y_true, y_score, average='weighted')        
    mcc = matthews_corrcoef(y_true, y_pred)
    pres = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    acc = accuracy_score(y_true, y_pred)
    
    print('roc-auc: %.3f, mcc: %.3f, pres: %.3f, recall: %.3f, f1: %.3f' % (auc, mcc, pres, recall, f1))

    return acc, auc, mcc, pres, recall, f1

2022-10-03 16:17:55.102175: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


# read data

In [3]:
task = 'Cirrhosis'
data_path = '../../../01_data/species_level/%s/' % (task)
save_dir = '%s_results' % task
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

dfa = pd.read_csv(os.path.join(data_path, 'abundance.tsv'),sep='\t', header=None, index_col=0)
dfy = pd.read_csv(os.path.join(data_path, 'labels.txt'),sep='\t', header=None)
dfx = dfa.T
dfy = pd.get_dummies(dfy[0].map({'cirrhosis':1, 'n':0}))
Y = dfy.values

# 10FCV

In [4]:
gpuid = 0

outer_fold = 10
repeat_seeds = [8, 16, 32, 64, 128, 256, 1024, 2048, 4096, 8192] #10 repeats random seeds 8, 16, 32, 64, 128

each_fold_results = []
run_all_res = []

for i, repeat_seed in enumerate(repeat_seeds): 
    outer = StratifiedKFold(n_splits = outer_fold, shuffle = True, random_state = repeat_seed)
    outer_idx = outer.split(range(len(dfy)), dfy.idxmax(axis=1))
    run_one_res = []
    for j, idx in enumerate(outer_idx):
        fold_num = "fold_%s" % str(j).zfill(2) 
        print('#'*50 + ' repeat_seed: %s; %s ' % (repeat_seed, fold_num) + '#'*50 )
        
        train_idx, test_idx = idx
        dfx_train = dfx.iloc[train_idx]
        dfy_train = dfy.iloc[train_idx]
        
        ## get best parameters 
        if (i == 0) & (j == 0):
            from tune import finetune_HPs
            best_fill, best_scale_method, best_channel_number, best_epochs, batch_size = finetune_HPs(dfx_train, dfy_train, gpuid=gpuid)
            featHPs = {"best_fill":best_fill, "best_scale_method":best_scale_method, "best_channel_number":best_channel_number}
            dfx = np.log(dfx + best_fill)
            mp = AggMap(dfx, metric = 'correlation')
            mp = mp.fit(cluster_channels = best_channel_number, verbose = 0, var_thr = 0)
            X = mp.batch_transform(dfx.values, scale = best_scale_method) # NaN values should be the lowest value
            mp.save(os.path.join(save_dir, 'agg.mp'))
            
        testY = Y[test_idx]
        testX = X[test_idx]
        
        trainX = X[train_idx]
        trainY = Y[train_idx]

        print("\n input train and test X shape is %s, %s " % (trainX.shape,  testX.shape))

        clf = AggModel.MultiClassEstimator(epochs = best_epochs,  batch_size = batch_size, verbose = 0, gpuid=gpuid) #
        clf.fit(trainX, trainY)  #, 
        
        ## save model for explaination
        if i == 0:
            clf.save_model(os.path.join(save_dir, '%s.model' % fold_num))
            paras = clf.get_params()
            paras.update({'featHPs':featHPs})
            pd.Series(paras).to_json(os.path.join(save_dir, 'HPs.json'))

        pred_proba = clf.predict_proba(testX)
        y_true = testY[:,1] 
        y_score = pred_proba[:,1]
        y_pred = np.argmax(pred_proba, axis=1)
        
        dfr = pd.DataFrame([y_true, y_score, y_pred]).T
        dfr.columns = ['y_true', 'y_score', 'y_pred']
        dfr.index = dfy.iloc[test_idx].index

        acc, auc, mcc, pres, recall, f1  = score(dfr)
        run_one_res.append(dfr)
        ts = pd.Series([acc, auc, mcc, pres, recall, f1, i, repeat_seed]).round(3)
        ts.index = ['acc','auc', 'mcc', 'pres', 'recall', 'f1', 'i', 'repeat_seed']

        print(ts.to_dict())
        each_fold_results.append(ts.to_dict())
    run_all_res.append(pd.concat(run_one_res))

################################################## repeat_seed: 8; fold_00 ##################################################
2022-10-03 16:18:07,767 - [32mINFO[0m - [bidd-aggmap][0m - Calculating distance ...[0m
2022-10-03 16:18:07,776 - [32mINFO[0m - [bidd-aggmap][0m - the number of process is 16[0m


100%|#############################################################################################################################| 146611/146611 [00:10<00:00, 14067.24it/s]
100%|###########################################################################################################################| 146611/146611 [00:00<00:00, 5311799.60it/s]
100%|####################################################################################################################################| 542/542 [00:00<00:00, 6194.41it/s]


2022-10-03 16:18:18,471 - [32mINFO[0m - [bidd-aggmap][0m - applying hierarchical clustering to obtain group information ...[0m
2022-10-03 16:18:20,636 - [32mINFO[0m - [bidd-aggmap][0m - Applying grid assignment of feature points, this may take several minutes(1~30 min)[0m
2022-10-03 16:18:20,967 - [32mINFO[0m - [bidd-aggmap][0m - Finished[0m


100%|#####################################################################################################################################| 208/208 [00:01<00:00, 118.95it/s]
2022-10-03 16:18:22.788322: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-03 16:18:22.804358: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-03 16:18:22.804450: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-03 16:18:22.804770: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) 

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)


 node zero
2022-10-03 16:18:22.806309: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-03 16:18:22.806430: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-03 16:18:23.063889: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-03 16:18:23.064022: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-03 16:18:23.064086: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node 

{'best_loss': 0.592, 'best_epoch': 5, 'fill': 0.01, 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.227, 'best_epoch': 21, 'fill': 0.01, 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estima

100%|#############################################################################################################################| 146611/146611 [00:07<00:00, 18921.73it/s]
100%|###########################################################################################################################| 146611/146611 [00:00<00:00, 4494880.41it/s]
100%|####################################################################################################################################| 542/542 [00:00<00:00, 5430.73it/s]


2022-10-03 16:19:49,451 - [32mINFO[0m - [bidd-aggmap][0m - applying hierarchical clustering to obtain group information ...[0m
2022-10-03 16:19:49,886 - [32mINFO[0m - [bidd-aggmap][0m - Applying grid assignment of feature points, this may take several minutes(1~30 min)[0m
2022-10-03 16:19:50,125 - [32mINFO[0m - [bidd-aggmap][0m - Finished[0m


100%|####################################################################################################################################| 208/208 [00:00<00:00, 2466.78it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.563, 'best_epoch': 1, 'fill': 1e-05, 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.252, 'best_epoch': 10, 'fill': 1e-05, 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Esti

100%|#############################################################################################################################| 146611/146611 [00:06<00:00, 21806.25it/s]
100%|###########################################################################################################################| 146611/146611 [00:00<00:00, 5025342.85it/s]
100%|####################################################################################################################################| 542/542 [00:00<00:00, 5107.02it/s]


2022-10-03 16:21:26,755 - [32mINFO[0m - [bidd-aggmap][0m - applying hierarchical clustering to obtain group information ...[0m
2022-10-03 16:21:27,186 - [32mINFO[0m - [bidd-aggmap][0m - Applying grid assignment of feature points, this may take several minutes(1~30 min)[0m
2022-10-03 16:21:27,466 - [32mINFO[0m - [bidd-aggmap][0m - Finished[0m


100%|####################################################################################################################################| 208/208 [00:00<00:00, 2434.46it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.563, 'best_epoch': 1, 'fill': 1e-08, 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.273, 'best_epoch': 10, 'fill': 1e-08, 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Esti

100%|#############################################################################################################################| 146611/146611 [00:08<00:00, 17237.88it/s]
100%|###########################################################################################################################| 146611/146611 [00:00<00:00, 4923111.63it/s]
100%|####################################################################################################################################| 542/542 [00:00<00:00, 5115.81it/s]


2022-10-03 16:23:02,073 - [32mINFO[0m - [bidd-aggmap][0m - applying hierarchical clustering to obtain group information ...[0m
2022-10-03 16:23:02,548 - [32mINFO[0m - [bidd-aggmap][0m - Applying grid assignment of feature points, this may take several minutes(1~30 min)[0m
2022-10-03 16:23:02,829 - [32mINFO[0m - [bidd-aggmap][0m - Finished[0m


100%|####################################################################################################################################| 208/208 [00:00<00:00, 2344.23it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.563, 'best_epoch': 1, 'scale_method': 'minmax', 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.276, 'best_epoch': 10, 'scale_method': 'minmax', 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='

100%|####################################################################################################################################| 208/208 [00:00<00:00, 3426.76it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.592, 'best_epoch': 1, 'scale_method': 'standard', 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.403, 'best_epoch': 6, 'scale_method': 'standard', 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=30, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    nam

100%|#############################################################################################################################| 146611/146611 [00:08<00:00, 17336.09it/s]
100%|###########################################################################################################################| 146611/146611 [00:00<00:00, 5024070.06it/s]
100%|####################################################################################################################################| 542/542 [00:00<00:00, 5041.16it/s]


2022-10-03 16:26:04,383 - [32mINFO[0m - [bidd-aggmap][0m - applying hierarchical clustering to obtain group information ...[0m
2022-10-03 16:26:04,814 - [32mINFO[0m - [bidd-aggmap][0m - Applying grid assignment of feature points, this may take several minutes(1~30 min)[0m
2022-10-03 16:26:05,090 - [32mINFO[0m - [bidd-aggmap][0m - Finished[0m


100%|####################################################################################################################################| 208/208 [00:00<00:00, 2417.34it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.539, 'best_epoch': 5, 'cluster_channels': 1, 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.325, 'best_epoch': 10, 'cluster_channels': 1, 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap

100%|####################################################################################################################################| 208/208 [00:00<00:00, 3845.76it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.563, 'best_epoch': 1, 'cluster_channels': 5, 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.277, 'best_epoch': 10, 'cluster_channels': 5, 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap

100%|####################################################################################################################################| 208/208 [00:00<00:00, 3443.14it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.555, 'best_epoch': 1, 'cluster_channels': 9, 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.224, 'best_epoch': 10, 'cluster_channels': 9, 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap

100%|####################################################################################################################################| 208/208 [00:00<00:00, 3485.02it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.553, 'best_epoch': 1, 'cluster_channels': 13, 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.2, 'best_epoch': 10, 'cluster_channels': 13, 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap

100%|####################################################################################################################################| 208/208 [00:00<00:00, 3485.02it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.559, 'best_epoch': 1, 'cluster_channels': 17, 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.205, 'best_epoch': 10, 'cluster_channels': 17, 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggM

100%|####################################################################################################################################| 208/208 [00:00<00:00, 3426.37it/s]

MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)





{'best_loss': 0.547, 'best_epoch': 1, 'cluster_channels': 21, 'fold_num': 'fold_01'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=1000,
                    random_state=32, verbose=0)
{'best_loss': 0.214, 'best_epoch': 10, 'cluster_channels': 21, 'fold_num': 'fold_03'}
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=10, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggM

100%|#############################################################################################################################| 146611/146611 [00:07<00:00, 19517.16it/s]
100%|###########################################################################################################################| 146611/146611 [00:00<00:00, 5145910.04it/s]
100%|####################################################################################################################################| 542/542 [00:00<00:00, 5387.05it/s]


2022-10-03 16:29:21,954 - [32mINFO[0m - [bidd-aggmap][0m - applying hierarchical clustering to obtain group information ...[0m
2022-10-03 16:29:22,406 - [32mINFO[0m - [bidd-aggmap][0m - Applying grid assignment of feature points, this may take several minutes(1~30 min)[0m
2022-10-03 16:29:22,789 - [32mINFO[0m - [bidd-aggmap][0m - Finished[0m


100%|####################################################################################################################################| 232/232 [00:00<00:00, 2565.50it/s]


 input train and test X shape is (208, 24, 23, 13), (24, 24, 23, 13) 
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=19, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=10000,
                    random_state=32, verbose=0)





saving model to Cirrhosis_results/fold_00.model
roc-auc: 1.000, mcc: 0.920, pres: 0.962, recall: 0.958, f1: 0.958
{'acc': 0.958, 'auc': 1.0, 'mcc': 0.92, 'pres': 0.962, 'recall': 0.958, 'f1': 0.958, 'i': 0.0, 'repeat_seed': 8.0}
################################################## repeat_seed: 8; fold_01 ##################################################

 input train and test X shape is (208, 24, 23, 13), (24, 24, 23, 13) 
MultiClassEstimator(batch_norm=False, batch_size=2, conv1_kernel_size=13,
                    dense_avf='relu', dense_layers=[128], dropout=0.0,
                    epochs=19, gpuid='0', last_avf='softmax',
                    loss='categorical_crossentropy', lr=0.0001, metric='ACC',
                    monitor='val_loss', n_inception=2,
                    name='AggMap MultiClass Estimator', patience=10000,
                    random_state=32, verbose=0)
saving model to Cirrhosis_results/fold_01.model
roc-auc: 0.847, mcc: 0.676, pres: 0.843, recall: 0.833, f1: 0.832


In [8]:
pd.DataFrame(each_fold_results).groupby('repeat_seed').mean().mean()

acc       0.88703
auc       0.94409
mcc       0.78210
pres      0.89539
recall    0.88703
f1        0.88625
i         4.50000
dtype: float64

In [6]:
pd.DataFrame(each_fold_results).groupby('repeat_seed').std().mean()

acc       0.064574
auc       0.052648
mcc       0.125645
pres      0.061458
recall    0.064574
f1        0.065282
i         0.000000
dtype: float64

In [7]:
pd.DataFrame(each_fold_results).to_csv(os.path.join(save_dir, 'performance_results.csv'))

# feature importance

In [None]:
all_imps = []
for i in range(10):
    clf = load_model(os.path.join(save_dir, 'fold_%s.model' % str(i).zfill(2)))
    dfe = clf.explain_model(mp, clf.X_, clf.y_, binary_task=True, apply_logrithm=False)
    df_imp = dfe.col_1_importance.to_frame(name = 'fold_%s_imp' % str(i).zfill(2))
    all_imps.append(df_imp)

In [None]:
dfi = dfe[dfe.columns[:-1]]
dfi['scatter_x'] = dfi.v.map(mp.df_embedding.x)
dfi['scatter_y'] = dfi.v.map(mp.df_embedding.y)

dfimp_all = pd.concat(all_imps, axis=1)
dfi = dfi.join(dfimp_all.mean(axis=1).to_frame(name = 'avg_imp'))
dfi = dfi.join(dfimp_all)
dfi.to_csv(os.path.join(save_dir, 'feature_imp_results.csv'))

In [None]:
dfi

In [1]:
24*24

576