In [13]:
import numpy as np
import pandas as pd
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings(action='ignore')
from scipy import stats
from scipy.stats import boxcox
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import  TensorDataset, DataLoader
from torchvision.utils import save_image
import os
from os import listdir
from os.path import isfile, join
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, auc, roc_auc_score
from sklearn.metrics import balanced_accuracy_score, roc_curve, average_precision_score, precision_recall_curve
from sklearn.metrics import plot_confusion_matrix
pd.options.display.float_format = '{:.2f}'.format

In [14]:
data=pd.read_csv("./data2100205.csv")

columns = data.iloc[:,2:-19].columns
columns =columns.drop(['pleura_tb', 'pleura_bac','blood_positive', 'blood_obscure', 'cancer'])
float_column=data[columns].select_dtypes(include=['float']).columns
object_column = data[columns].select_dtypes(include=['object']).columns
not_object = ['D-dimer', 'BNP', 'procalcitonin', 'ANA titer','ANA 1:40','ionized calcium','E-ANC_blood']

data[not_object[0]] = data[not_object[0]].replace("\<","",regex=True).replace("\>","", regex=True).astype(float)
data[not_object[1]] = data[not_object[1]].replace("\<","",regex=True).replace("\>","", regex=True).astype(float)
data[not_object[2]] = data[not_object[2]].replace("\<","",regex=True).replace("\>","", regex=True).astype(float)
data[not_object[5]] = data[not_object[5]].replace("\<","",regex=True).replace("\>","", regex=True).astype(float)

def floating(x):
    try:
        value = float(x)
    except ValueError :
        value = np.nan
    return value

data[not_object[6]] = data[not_object[6]].replace("\-","",regex=True).apply(floating).astype(float)
data[not_object[4]] = data[not_object[4]].replace({np.nan:0, 
  'Neg(<1:40)':0,
  'Non(<1:40)':0,
  'Pos. Speckled':1,
  'Reactive Speckled':2,
  'Cytoplasmic':3,
  'Reactive Nuclear membrane':4,
  'Positive Speckled':5})

data[not_object[3]] =  data[not_object[3]].replace({np.nan:0,
 '<1:40':0,
 'Speckled 1:1280':1,
 'Speckled 1:40':2,
 'Nucleolar 1:80':3,
 'Homogene.1:640':4,
 'Mixed type(Remark)':5,
 'Nucle.Membrane 1:80':6,
 'Speckled 1:160':7,
 'Homogene.1:40':8,
 'Nucleolar 1:640':9,
 'Homogene.1:160':10,
 'Discr.speckled 1:1280':11,
 'Nucleolar 1:40':12,
 'Homogene.1:80':13,
 'Homogene.1:1280':14,
 'Speckled 1:320':15})

data[['ANA1','ANA2','ANA3','ANA4','ANA5','ANA6','ANA7','ANA8','ANA9','ANA10','ANA11','ANA12','ANA13','ANA14',
    'ANA15','ANA16']]=pd.DataFrame(np.eye(len(data[not_object[3]].unique()))[data[not_object[3]]])

data[['ANA_1','ANA_2','ANA_3','ANA_4','ANA_5','ANA_6']]=pd.DataFrame(np.eye(len(data[not_object[4]].unique()))[data[not_object[4]]])

reallist = float_column.tolist()+['D-dimer', 'BNP', 'procalcitonin','ionized calcium','E-ANC_blood']
analist = ['ANA1','ANA2','ANA3','ANA4','ANA5','ANA6','ANA7','ANA8','ANA9','ANA10','ANA11','ANA12','ANA13','ANA14',
    'ANA15','ANA16','ANA_1','ANA_2','ANA_3','ANA_4','ANA_5','ANA_6']

count_label = pd.DataFrame(data.count()/len(data))

data.loc[data['ph_pleural fluid'].isna(), 'ph_pleural fluid'] = data.loc[data['ph_pleural fluid'].isna(), 'pH_pleural fluid(Qn) ']

list_04 = ['ADA_pleural fluid', 'LD_pleural fluid', 'albumin_pleural fluid', 'ph_pleural fluid', 'Total protein_pleural fluid', 'Glucose_pleural fluid',
'Total amylase_pleural fluid', 'Creatinine', 'AST(SGOT)', 'Alkaline phosphatase', 'total bilirubin', 'glucose', 'Albumin', 'BUN',
'Total protein', 'LD', 'ALT(SGPT)', 'r-GT', 'CRP', 'D-dimer', 'BNP', 'procalcitonin', 'total calcium', 'sodium', 'chloride', 'potassium', 'phosphorus', 'Hb_blood',
'RDW_blood', 'WBC_blood', 'MCHC_blood', 'MCV_blood', 'PDW_blood', 'Hct_blood', 'E-ANC_blood', 'MCH_blood', 'RBC_blood', 'MPV_blood', 'Platelet_blood', "Histiocyte (Qn)[Cytospin,Wright's stain],Pleural fluid",
"Neutrophil (Qn)[Cytospin,Wright's stain],Pleural fluid", "RBC (Qn)[Cytospin,Wright's stain],Pleural fluid", "Eosinophil (Qn)[Cytospin,Wright's stain],Pleural fluid",
"Nucleated cells (Qn)[Cytospin,Wright's stain],Pleural fluid", "Lymphocyte (Qn)[Cytospin,Wright's stain],Pleural fluid", "Mesothelial cell (Qn)[Cytospin,Wright's stain],Pleural fluid"
          ]

xlist =dict()

for i in range(len(list_04)):
    xlist.update({data[list_04[i]].name: stats.mode(data[list_04[i]]).mode.item()})

for i in range(len(list_04)):
    data[list_04[i]] = data[list_04[i]].replace(np.nan, xlist[list_04[i]])

data.loc[data['pleura_bac']>0,'Labeling']=3

data.loc[((data['Labeling'].isin([3,17,"3","17"]))& (data['제외']!=1)),'new_label']="bacterial"
data.loc[((data['Labeling'].isin([1,"1"]))& (data['제외']!=1)),'new_label']="tuberculosis"
data.loc[((data['Labeling'].isin([2,"2"]))& (data['제외']!=1)),'new_label']="malignancy"
data.loc[((data['Labeling'].isin([4,5,6,18,22,26,"4","5","6","18","22","26"]))
          & (data['제외']!=1)),'new_label']="volume"
data.loc[((data['Labeling'].isin([7,10,14,19,25,29,30,"7","10","14","19","25","29","30"]))
          & (data['제외']!=1)),'new_label']="other"
data.loc[((data['Labeling'].isin([8,16,24,23,20,21,27,28,"8","16","24","23","20","21","27","28"]))
          & (data['제외']!=1)),'new_label']="other"
data.loc[((data['Labeling'].isin([9,15,"9","15"]))& (data['제외']!=1))
         ,'new_label']="other"

data.loc[((data['Labeling'].isin([3,17,"3","17"]))& (data['제외']!=1)),'new_int']=0
data.loc[((data['Labeling'].isin([1,"1"]))& (data['제외']!=1)),'new_int']=1
data.loc[((data['Labeling'].isin([2,"2"]))& (data['제외']!=1)),'new_int']=2
data.loc[((data['Labeling'].isin([4,5,6,18,22,26,"4","5","6","18","22","26"]))
          & (data['제외']!=1)),'new_int']=3
data.loc[((data['Labeling'].isin([7,10,14,19,25,29,30,"7","10","14","19","25","29","30"]))
          & (data['제외']!=1)),'new_int']=4
data.loc[((data['Labeling'].isin([8,16,24,23,20,21,27,28,"8","16","24","23","20","21","27","28"]))
          & (data['제외']!=1)),'new_int']=4
data.loc[((data['Labeling'].isin([9,15,"9","15"]))& (data['제외']!=1))
         ,'new_int']=4
data.loc[((data['new_int'].isna())& (data['제외']!=1))
         ,'new_int']=4

In [15]:

### test set process

data.loc[((data['final'].isin([3,17,"3","17"]))& (data['제외']!=1)),'final_label']="bacterial"
data.loc[((data['final'].isin([1,"1"]))& (data['제외']!=1)),'final_label']="tuberculosis"
data.loc[((data['final'].isin([2,"2"]))& (data['제외']!=1)),'final_label']="malignancy"
data.loc[((data['final'].isin([4,5,6,18,22,26,"4","5","6","18","22","26"]))
          & (data['제외']!=1)),'final_label']="volume"
data.loc[((data['final'].isin([7,10,14,19,25,29,30,35,36,"7","10","14","19","25","29","30","35","36"]))
          & (data['제외']!=1)),'final_label']="other"
data.loc[((data['final'].isin([8,16,24,23,20,21,27,28,31,32,"8","16","24","23","20","21","27","28","31","32"]))
          & (data['제외']!=1)),'final_label']="other"
data.loc[((data['final'].isin([9,15,33,34,"9","15","33","34"]))& (data['제외']!=1))
         ,'final_label']="other"

data.loc[((data['final'].isin([3,17,"3","17"]))& (data['제외']!=1)),'final_int']=0
data.loc[((data['final'].isin([1,"1"]))& (data['제외']!=1)),'final_int']=1
data.loc[((data['final'].isin([2,"2"]))& (data['제외']!=1)),'final_int']=2
data.loc[((data['final'].isin([4,5,6,18,22,26,"4","5","6","18","22","26"]))
          & (data['제외']!=1)),'final_int']=3
data.loc[((data['final'].isin([7,10,14,19,25,29,30,35,36,"7","10","14","19","25","29","30","35","36"]))
          & (data['제외']!=1)),'final_int']=4
data.loc[((data['final'].isin([8,16,24,23,20,21,27,28,31,32,"8","16","24","23","20","21","27","28","31","32"]))
          & (data['제외']!=1)),'final_int']=4
data.loc[((data['final'].isin([9,15,33,34,"9","15","33","34"]))& (data['제외']!=1))
         ,'final_int']=4

data['LD_ratio'] = data['LD_pleural fluid']/data['LD']>=0.6
data['LD_raw'] = data['LD_pleural fluid']>= 250*2/3
data['PF_ratio'] = data['Total protein_pleural fluid']/data['Total protein']>=0.6


data['sum_pleural'] = data['LD_ratio'].replace(True, 1).replace(False,0) +data['LD_raw'].replace(True, 1).replace(False,0)+data['PF_ratio'].replace(True, 1).replace(False,0)

data['exudate']= data['sum_pleural']>=1


data['low_pH']=data['ph_pleural fluid']<7.2
data['low_glu']=data['Glucose_pleural fluid']<61
data['high_LDH']=data['LD_pleural fluid']>1000

data['score']=data['low_pH'].replace(True, 1).replace(False,0)+data['low_glu'].replace(True, 1).replace(False,0)+data['high_LDH'].replace(True, 1).replace(False,0)

data['complicated_pleural_effusion']=data['score']>2

data['might_Tb']= data['ADA_pleural fluid']>50
data['lymph_dominant'] = data["Lymphocyte (Qn)[Cytospin,Wright's stain],Pleural fluid"]/data["Neutrophil (Qn)[Cytospin,Wright's stain],Pleural fluid"]>0.75
data.loc[(data['lymph_dominant']==True) & (data['might_Tb']==True),'Tuberculosis']=1

### preprocessing
scaling, transformation, rescaling

In [16]:
data['trans_label']=data['final_label']
data.loc[data['Tuberculosis']==True,'trans_label']='rule_tuberculosis'
data.loc[(data['Tuberculosis']==True)&
         (data['final_label']!='tuberculosis'),'trans_label']='not_tuberculosis'
data['trans_int']=data['final_int']
data.loc[data['Tuberculosis']==True,'trans_int']=5
data.loc[(data['Tuberculosis']==True)&
         (data['final_label']!='tuberculosis'),'trans_int']=7

data['trans_label2']=data['final_label']
data.loc[data['complicated_pleural_effusion']==True,'trans_label2']='rule_complicated_effusion'
data.loc[(data['complicated_pleural_effusion']==True)&
         (data['final_label']!='bacterial'),'trans_label2']='not_bacteria'
data['trans_int2']=data['final_int']
data.loc[data['complicated_pleural_effusion']==True,'trans_int2']=6
data.loc[(data['complicated_pleural_effusion']==True)&
         (data['final_label']!='bacterial'),'trans_int2']=8

In [17]:
data = data.loc[data['제외']!=1]

training = data[list_04+['new_label']+['new_int']]
training_only = training.loc[training['new_label'].isna()==False]
col_name  = training.columns.difference(["new_label",'new_int'])

range_lower = 0.05
range_upper = 0.95
outcome_quantile = training[col_name].quantile([range_lower, range_upper])

for i in col_name:
    training.loc[(training[i]<=outcome_quantile[i][range_lower]),i]=\
        outcome_quantile[i][range_lower]

    training.loc[(training[i]>=outcome_quantile[i][range_upper]),i]=\
        outcome_quantile[i][range_upper]

scaler01 = MinMaxScaler(feature_range=(0.01,1))
scaler01 = scaler01.fit(training[col_name])
training[col_name] = scaler01.transform(training[col_name])

training = training.loc[training['new_label'].isna()==False]

In [47]:
from contrast import small_encoder, added_on_model, parse_option
from contrast import train, validate
from losses import SupConLoss
import torch.backends.cudnn as cudnn

import sys
sys.argv=['']
import os
import argparse
import time
import math
from util import AverageMeter
from util import save_model, accuracy

try:
    import apex
    from apex import amp, optimizers
except ImportError:
    pass

In [19]:
from torch.utils.data import WeightedRandomSampler
batch_size, val_batch_size = 450,223

#weighted sampler 
# test-set의 proportion
weight = 1/np.array([201, 123, 600, 100, 200])

samples_weight = np.array([weight[t] for t in y_train.numpy().astype(int)])
samples_weight = torch.from_numpy(samples_weight)
samples_weight = samples_weight.double()
train_sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

val_samples_weight = np.array([weight[t] for t in y_nottrain.numpy().astype(int)])
val_samples_weight = torch.from_numpy(val_samples_weight)
val_samples_weight = val_samples_weight.double()
val_sampler = WeightedRandomSampler(val_samples_weight, len(val_samples_weight))

In [200]:
def split():
    train_, test_ = train_test_split(training, test_size = 0.3,
                                 stratify=training['new_int']
                                                   )
    X_train, X_notrain, y_train, y_nottrain = train_test_split(train_[col_name],
                                                     train_['new_int'], test_size = 0.3
                                                   , stratify=train_['new_int'])
    X_train = torch.tensor(np.array(X_train), dtype=torch.float32)
    y_train = torch.tensor(y_train.values.astype(int), dtype=torch.long)
    X_notrain = torch.tensor(np.array(X_notrain), dtype=torch.float32)
    y_nottrain = torch.tensor(y_nottrain.values.astype(int), dtype=torch.long)
    X_test_ = torch.tensor(np.array(test_[col_name]), dtype=torch.float32)
    y_test_ = torch.tensor(test_['new_int'].values.astype(int), dtype=torch.long)
    train_data = TensorDataset(X_train, y_train)
    total_val_data = TensorDataset(X_notrain, y_nottrain)

    return X_test_, y_test_, train_data, total_val_data, X_notrain, y_nottrain

### validate로 early stopping적용 학습모델

In [284]:
opt = parse_option()
input_len = 46
dimension = 20

layer_list = [128,256,128,32]

opt.model_path = './revise/{}_{}_models'.format("effu220120_weighted", dimension)
if not os.path.isdir(opt.model_path):
    os.makedirs(opt.model_path)

opt.tb_path = './revise/{}_{}_tensorboard'.format("effu220120_weighted", dimension)
if not os.path.isdir(opt.tb_path):
    os.makedirs(opt.tb_path)

opt.print_freq = 100

min_epoch = 600
n_epochs_stop =100
epoch =5000

In [285]:
def train_1():
    train_loader = torch.utils.data.DataLoader(dataset=train_data, 
                                           batch_size=batch_size, 
                                           sampler = train_sampler)

    val_loader = torch.utils.data.DataLoader(dataset=total_val_data, 
                                               batch_size=val_batch_size, 
                                               sampler = val_sampler)
    model = small_encoder(input_len, layer_list, dimension)
    criterion = SupConLoss()
    model = model.cuda()
    cudnn.benchmark = True
    criterion = criterion.cuda()
    optimizer = optim.Adam(model.parameters(),
                         lr=0.002, weight_decay=1e-5)
    
    opt.epochs = 5000
    best_val_loss = np.inf
        # training routine
    for epoch in range(1, opt.epochs + 1):
        # adjust_learning_rate(opt, optimizer, epoch)
        # train for one epoch
        loss  = train(train_loader, model, criterion, optimizer, epoch, opt)

        
        # evaluation
        val_loss = validate(val_loader, model, criterion, epoch, opt)

        if val_loss <= best_val_loss:
            filename = [f for f in os.listdir(opt.model_path) if f.startswith("model_{layer}".format(layer=layer_list))]
            if filename:
                if os.path.isfile(opt.model_path+"/"+filename[0]):
                    os.remove(opt.model_path+"/"+filename[0])
                    
            save_file = os.path.join(
                opt.model_path, 'model_{layer}_loss_{loss:.3f}.pth'.format(layer=layer_list,
                                                                             loss=val_loss))

            save_model(model, optimizer, opt, epoch, save_file)
            epochs_no_improve = 0
            best_val_loss = val_loss
        else:
            epochs_no_improve += 1
            
        if epoch > min_epoch and epochs_no_improve > n_epochs_stop:
            break
        else:
            continue

In [286]:
add_layer_list = [128,5]


# writer = SummaryWriter('runs/model_classifier')
PATH = './model_classifier221024/'
loss_function = nn.CrossEntropyLoss()

In [287]:
def add_on():
    classifier_ = added_on_model(model_=model, parameter=add_layer_list, dimension = 20 )
    classifier_ = classifier_.cuda()        
    optimizer = optim.Adam(classifier_.parameters(), weight_decay = 1e-4, lr=0.0005)
    min_val_loss = np.Inf
    epochs_no_improve = 20
    
    for num_epoch in range(epoch):

        average_loss = 0
        device = 'cuda'
        classifier_ = classifier_.train()
        for batch_idx, (train_X,train_Y) in enumerate(train_loader):
            train_X = train_X.to(device)
            train_Y = torch.tensor(train_Y, dtype=torch.long).to(device)
            output = classifier_(train_X)
            loss = loss_function(output, train_Y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            average_loss += loss.item()

        average_loss /=len(train_loader)
        aver_val_loss = 0

        classifier_= classifier_.eval()
        for val_X,val_Y in val_loader:
            val_X = val_X.to(device)
            val_Y = torch.tensor(val_Y, dtype=torch.long).to(device)
            val_output = classifier_(val_X)
            loss_val = loss_function(val_output, val_Y)
            aver_val_loss += loss_val.item()

        aver_val_loss /=len(val_loader)

        if aver_val_loss <= min_val_loss:
            directory = PATH
            #directory 이름은 layer 첫번째 및 두번째 그리고 마지막 layer수 
            if not os.path.exists(directory):
                os.makedirs(directory)
            filename = [f for f in os.listdir(directory) if f.startswith("model_{}".format(add_layer_list))]
            if filename:
                if os.path.isfile(directory+filename[0]):
                    os.remove(directory+filename[0])
            torch.save(classifier_, directory+'model_{}_{:.0f}.pt'.format(add_layer_list,aver_val_loss*1000))
            epochs_no_improve = 0
            min_val_loss = aver_val_loss
        else:
            epochs_no_improve += 1

        # If the validation loss is at a minimum
        if num_epoch > min_epoch and epochs_no_improve == n_epochs_stop:
            early_stop = True
            break
        else:
            continue

In [288]:
def eval_model():
    vp_label = classifier_model(X_notrain.cuda())
    v_predict = torch.argmax(vp_label, axis=1)
    val_acc1, val_acc2 = accuracy(vp_label.cpu(), y_nottrain, topk=(1, 2))
    val_acc1 = val_acc1.item()
    val_acc2 = val_acc2.item()
    one_hot_y = nn.functional.one_hot(y_nottrain)
    roc_weight_val = roc_auc_score(one_hot_y, soft(vp_label).cpu().detach().numpy(),  multi_class= 'ovr', average = 'weighted')
    roc_mirco_val = roc_auc_score(one_hot_y, soft(vp_label).cpu().detach().numpy(),  multi_class= 'ovr', average = 'micro')
    
    testp_label = classifier_model(X_test_.cuda())
    testpredict = torch.argmax(testp_label, axis=1)
    test_acc1, test_acc2 = accuracy(testp_label.cpu(), y_test_, topk=(1, 2))
    test_acc1 = test_acc1.item()
    test_acc2 = test_acc2.item()    
    
    one_hot_y_test = nn.functional.one_hot(y_test_)
    roc_weight_test = roc_auc_score(one_hot_y_test, soft(testp_label).cpu().detach().numpy(),  multi_class= 'ovr', average = 'weighted')
    roc_micro_test = roc_auc_score(one_hot_y_test, soft(testp_label).cpu().detach().numpy(),  multi_class= 'ovr', average = 'micro')
    return val_acc1, val_acc2, roc_weight_val, roc_mirco_val, test_acc1, test_acc2, roc_weight_test, roc_micro_test

In [289]:
dataframe = pd.DataFrame()
for i in range(100):

    X_test_, y_test_, train_data, total_val_data, X_notrain, y_nottrain = split()

    train_1()

    #### load constrastive model 

    saved_file = os.listdir("./revise/effu220120_weighted_20_models/")
    loaded_model = torch.load(opt.model_path+f"/{saved_file[0]}")

    model = small_encoder(input_len, [128, 256, 128, 32], dimension)
    model.load_state_dict(loaded_model["model"])
    model = model.cuda()

    for param in model.parameters():
        param.requires_grad = False

    ### classifier training
    add_on()

    clas_mod = os.listdir('./model_classifier221024/')[0]
    classifier_model = torch.load(f'./model_classifier221024/{clas_mod}')
    classifier_model = classifier_model.eval()
    soft = nn.Softmax(dim=1)

    val_acc1, val_acc2, roc_weight_val, roc_mirco_val, test_acc1, test_acc2, roc_weight_test, roc_micro_test = eval_model()
    dataframe = dataframe.append({"val_acc1":val_acc1,
                  "val_acc2":val_acc2, 
                  "roc_weight_val": roc_weight_val, 
                  "roc_mirco_val":roc_mirco_val, 
                  "test_acc1": test_acc1, 
                  "test_acc2":test_acc2, 
                  "roc_weight_test":roc_weight_test, 
                  "roc_micro_test":roc_micro_test}, ignore_index=True)

Train: [99][1/3]	training loss 6.755 (average: 6.755)
Train: [99][2/3]	training loss 6.746 (average: 6.750)
Train: [99][3/3]	training loss 4.890 (average: 6.618)
Test: [0/2]	val Loss: 5.9979 (val loss average: 5.9979)
Test: [1/2]	val Loss: 5.8384 (val loss average: 5.9239)
Train: [199][1/3]	training loss 6.593 (average: 6.593)
Train: [199][2/3]	training loss 6.649 (average: 6.621)
Train: [199][3/3]	training loss 4.699 (average: 6.484)
Test: [0/2]	val Loss: 5.8920 (val loss average: 5.8920)
Test: [1/2]	val Loss: 5.7627 (val loss average: 5.8320)
Train: [299][1/3]	training loss 6.524 (average: 6.524)
Train: [299][2/3]	training loss 6.583 (average: 6.553)
Train: [299][3/3]	training loss 4.693 (average: 6.421)
Test: [0/2]	val Loss: 5.7615 (val loss average: 5.7615)
Test: [1/2]	val Loss: 5.7315 (val loss average: 5.7476)
Train: [399][1/3]	training loss 6.581 (average: 6.581)
Train: [399][2/3]	training loss 6.457 (average: 6.519)
Train: [399][3/3]	training loss 4.651 (average: 6.386)
Test: [

In [292]:
dataframe.to_csv("./final_result.csv")

In [310]:
dataframe.describe().to_csv("./finalsummary.csv")

In [326]:
dataframe.describe()

Unnamed: 0,roc_micro_test,roc_mirco_val,roc_weight_test,roc_weight_val,test_acc1,test_acc2,val_acc1,val_acc2
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.93,0.93,0.89,0.89,75.1,88.47,75.13,88.47
std,0.01,0.01,0.01,0.01,1.49,1.16,1.87,1.38
min,0.91,0.92,0.85,0.86,70.37,84.68,70.67,85.1
25%,0.93,0.93,0.88,0.88,74.41,87.88,74.04,87.44
50%,0.93,0.93,0.89,0.89,75.08,88.38,75.48,88.46
75%,0.94,0.94,0.89,0.9,76.09,89.23,76.44,89.42
max,0.95,0.95,0.91,0.92,79.12,91.25,79.57,91.11


In [325]:
dataframe.sort_values(by="roc_micro_test", ascending=False).head(10)

Unnamed: 0,roc_micro_test,roc_mirco_val,roc_weight_test,roc_weight_val,test_acc1,test_acc2,val_acc1,val_acc2
77,0.95,0.93,0.91,0.89,79.12,90.74,74.04,88.22
1,0.95,0.93,0.91,0.88,76.26,90.24,75.96,91.11
41,0.94,0.93,0.9,0.9,75.08,91.25,76.68,87.5
81,0.94,0.93,0.91,0.89,75.76,88.22,75.96,89.18
25,0.94,0.94,0.9,0.9,77.78,89.56,74.04,90.14
80,0.94,0.93,0.9,0.88,76.6,89.39,75.72,89.18
6,0.94,0.92,0.89,0.86,76.26,90.57,73.56,85.58
55,0.94,0.93,0.9,0.89,75.25,89.39,75.24,87.02
97,0.94,0.93,0.9,0.89,76.43,88.55,76.44,87.98
34,0.94,0.94,0.9,0.89,77.61,90.57,75.48,88.94
