In [None]:
# Lazy algorithms
# - K-nearest Neighbors
# - Local Regression
# - Lazy Naive Bayes (Naive Bayes + Kernel Density Estimation (KDE))
# https://jakevdp.github.io/PythonDataScienceHandbook/05.13-kernel-density-estimation.html

# Imbalance data handling
# - Random under-sampling
# - Random over-sampling
# - NearMiss

# Domains
# - Fraud detection
# - Spam filtering
# - Disease screening
# - SaaS subscription churn
# - Advertising click-throughs

In [5]:
# Load numpy, pandas, and sklern library
# https://numpy.org/devdocs/user/absolute_beginneencoder_rs.html
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors
# https://matplotlib.org/stable/gallery/mplot3d/scatter3d.html
# from mpl_toolkits.mplot3d import Axes3D
import os
import glob
# File processing
import sys
# import scipy.spatial.distance as sc

# KDTree
#from sklearn.neighbors import KDTree
from scipy.spatial import cKDTree

# Distance Calculation
import math, statistics
from scipy.spatial import distance

#from chebyshev import Chebyshev
import numpy.polynomial.chebyshev 

# Data Transformation
# LabelEncoder = dummy coding
# OneHotEncoder = unique integers
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, normalize
from sklearn.decomposition import PCA

# Baselines (Undersampling)
# https://imbalanced-learn.org/
from imblearn.under_sampling import AllKNN
from imblearn.under_sampling import ClusterCentroids 
from imblearn.under_sampling import CondensedNearestNeighbour
from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.under_sampling import RepeatedEditedNearestNeighbours
from imblearn.under_sampling import InstanceHardnessThreshold
from imblearn.under_sampling import NearMiss 
from imblearn.under_sampling import NeighbourhoodCleaningRule 
from imblearn.under_sampling import OneSidedSelection
from imblearn.under_sampling import RandomUnderSampler 
from imblearn.under_sampling import TomekLinks 
#from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import RandomOverSampler

# Model
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
# from sklearn.neural_network import MLPClassifier
# from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import StratifiedKFold # train_test_split, GridSearchCV, cross_validate, cross_val_score, StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold # train_test_split, GridSearchCV, cross_validate, cross_val_score, StratifiedKFold
from sklearn.model_selection import RepeatedKFold # train_test_split, GridSearchCV, cross_validate, cross_val_score, StratifiedKFold
from sklearn.model_selection import KFold # train_test_split, GridSearchCV, cross_validate, cross_val_score, StratifiedKFold
from sklearn.model_selection import cross_validate

# Performance
#from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
from sklearn.metrics import make_scorer, cohen_kappa_score

# Complexity
import time
import tracemalloc

# Multiprocessing
import multiprocessing
from joblib import Parallel, delayed
num_cores = multiprocessing.cpu_count()
if (num_cores > 10):
    num_cores -= 4
# num_cores = 20

# Warning
import warnings

# -------------------------------------------------------------------------------------
# DATA REDUCTION FUNCTION
# -------------------------------------------------------------------------------------

# KD-Tree
def custom_distance(x, y):
    if x[-1] == y[-1]:
        return np.inf
    else:
        return np.linalg.norm(x[:-1] - y[:-1])

# https://docs.scipy.org/doc/scipy/reference/spatial.distance.html
d_name = ('Cityblock','Chebyshev','Correlation','Cosine','Euclidean')
def find_minimum_distance(d,i):
    min_dist = 999999999999999.0
    min_vec = -1
    if (d == 0): # Cityblock
        for j in range(tempdata.shape[0]):
            if (templabel[i] != templabel[j]):
#                new_dist = distance.cityblock(tempdata[i],tempdata[j])
                new_dist = distance.minkowski(tempdata[i],tempdata[j],1)
                if (new_dist < min_dist):
                    min_dist = new_dist
                    min_vec = j
    elif (d == 1): # Chebyshev
        for j in range(tempdata.shape[0]):
            if (templabel[i] != templabel[j]):
                new_dist = distance.chebyshev(tempdata[i],tempdata[j])
                if (new_dist < min_dist):
                    min_dist = new_dist
                    min_vec = j
    elif (d == 2): # Correlation
        for j in range(tempdata.shape[0]):
            if (templabel[i] != templabel[j]):
                new_dist = distance.correlation(tempdata[i],tempdata[j])
                if (new_dist < min_dist):
                    min_dist = new_dist
                    min_vec = j
    elif (d == 3): # Cosine
        for j in range(tempdata.shape[0]):
            if (templabel[i] != templabel[j]):
                new_dist = distance.cosine(tempdata[i],tempdata[j])
                if (new_dist < min_dist):
                    min_dist = new_dist
                    min_vec = j
    elif (d == 4): # Euclidean
        for j in range(tempdata.shape[0]):
            if (templabel[i] != templabel[j]):
#                new_dist = distance.euclidean(tempdata[i],tempdata[j]) # fast
                new_dist = distance.minkowski(tempdata[i],tempdata[j],2)
                if (new_dist < min_dist):
                    min_dist = new_dist
                    min_vec = j
#    elif (d == 5): # Hamming
#        for j in range(tempdata.shape[0]):
#            if (templabel[i] != templabel[j]):
#                new_dist = distance.hamming(tempdata[i],tempdata[j])
#                if (new_dist < min_dist):
#                    min_dist = new_dist
#                    min_vec = j
#    elif (d == 5): # Mahalanobis
#        for j in range(tempdata.shape[0]):
#            if (templabel[i] != templabel[j]):
#                new_dist = distance.mahalanobis(tempdata[i],tempdata[j])
#                if (new_dist < min_dist):
#                    min_dist = new_dist
#                    min_vec = j
    elif (d == 5): # Minkowski p=3
        for j in range(tempdata.shape[0]):
            if (templabel[i] != templabel[j]):
                new_dist = distance.minkowski(tempdata[i],tempdata[j],3)
                
                if (new_dist < min_dist):
                    min_dist = new_dist
                    min_vec = j
                    
    return min_vec
            
# -------------------------------------------------------------------------------------
# PARAMETERS
# -------------------------------------------------------------------------------------

start_set = 0
max_level = 4
max_baseline = 14
max_technique = max_baseline+max_level*4 # DIS1-4, IBP1-4, DISKD1-4, IBPKD1-4
max_k = 5
max_distance = 1
reduction_load = 0
reduction_save = 0
encoder = 1 # label = 1, onehot = 2
pca = 1
pca_components = 0.99
normalization = 1
cross_validation = 1
cross_validation_fold = 5
parallel = 1
model = 'knn' # knn, svm
d = 4
training = 1
train_ratio = 0.3
train_ratio_100k = 0.1
train_ratio_500k = 0.01
performance_save = 1
figure_save = 1
label_title = ('Accuracy','Precision','Recall','F1-Score','AUC (One-vs-Rest)','AUC (One-vs-One)','Reduction Rate','Class Balance')
# Subscript = _, Superscript = ^, use {} for multiple letters
#label_traintype = ('ORG','CNN','ENN','RENN','All KNN','TL','OSS','NCL','NM$_1$','NM$_2$','NM$_3$','IHT','CC','RUS','IBP$_1$','IBP$_2$','IBP$_3$','IBP$_4$')
label_traintype = ('ORG','CNN','ENN','RENN','All KNN','TL','OSS','NCL','NM$_1$','NM$_2$','NM$_3$','IHT','CC','RUS','DIS$_1$','DIS$_2$','DIS$_3$','DIS$_4$','IBP$_1$','IBP$_2$','IBP$_3$','IBP$_4$','DISKD$_1$','DISKD$_2$','DISKD$_3$','DISKD$_4$','IBPKD$_1$','IBPKD$_2$','IBPKD$_3$','IBPKD$_4$')

# -------------------------------------------------------------------------------------
# LOADING DATA SETS
# -------------------------------------------------------------------------------------

# Define the location of the dataset
# https://archive.ics.uci.edu/ml/datasets.php
# http://www.timeseriesclassification.com/dataset.php
files = []
file_header = []
file_separator = []
file_label = []
file_big = []

# SYNTHETIC DATA SETS
# pca = 0
# files.append("https://piyabute.com/data/research/syntactic-square.csv"); file_header.append(None); file_separator.append("\t"); file_label.append(-1); file_big.append(0); # 6A4C
# files.append("https://piyabute.com/data/research/syntactic-wave.csv"); file_header.append(None); file_separator.append("\t"); file_label.append(-1); file_big.append(0); # 6A4C

# REAL-WORLD
#pca = 1
pca = 0
files.append("https://www.piyabute.com/data/research/banknote_authentication.csv"); file_header.append(None); file_separator.append(","); file_label.append(-1); file_big.append(0); # 6A4C
files.append("https://www.piyabute.com/data/research/car.data.csv"); file_header.append(None); file_separator.append(","); file_label.append(-1); file_big.append(0); # 6A4C
files.append("https://www.piyabute.com/data/research/crowdsourced_mapping.custom.csv"); file_header.append(1); file_separator.append(","); file_label.append(0); file_big.append(0); # 6A4C
files.append("https://www.piyabute.com/data/research/letter-recognition.data.csv"); file_header.append(None); file_separator.append(","); file_label.append(0); file_big.append(0); # 8A5C
files.append("https://www.piyabute.com/data/research/optdigits.custom.csv"); file_header.append(None); file_separator.append(","); file_label.append(-1); file_big.append(0); # 64A10C
files.append("https://www.piyabute.com/data/research/pendigits.custom.csv"); file_header.append(None); file_separator.append(","); file_label.append(-1); file_big.append(0); # 16A2C
files.append("https://www.piyabute.com/data/research/statlog_landsat.custom.csv"); file_header.append(None); file_separator.append(","); file_label.append(-1); file_big.append(0); # 36A6C
files.append("https://www.piyabute.com/data/research/tic-tac-toe.data.csv"); file_header.append(None); file_separator.append(","); file_label.append(-1); file_big.append(0); # 6A4C
files.append("https://www.piyabute.com/data/research/TUANDROMD.csv"); file_header.append(0); file_separator.append(","); file_label.append(-1); file_big.append(0); # 36A6C
files.append("https://www.piyabute.com/data/research/waveform-+noise.v2.data.csv"); file_header.append(None); file_separator.append(","); file_label.append(-1); file_big.append(0); # 6A4C

max_set = len(files)

In [6]:
# Reset performance files(Cross Validation)
if (performance_save == 1):

    files_removal = glob.glob('*.txt')
    for f in files_removal:
        os.remove(f)    
    files_removal = glob.glob('*.pdf')
    for f in files_removal:
        os.remove(f)    

for dataset in range(start_set,max_set):

    # Define output storage
    # perf_confusion_matrix=np.zeros(10,4,10,10)
    # perf_confusion_matrix = np.zeros((max_set,max_technique,max_k),dtype=np.float64)
    reduction = np.zeros((max_technique),dtype = np.intc)
    perf_fit_time = np.zeros((max_technique,max_k),dtype = np.float64)
    perf_score_time = np.zeros((max_technique,max_k),dtype = np.float64)
    perf_train_accuracy = np.zeros((max_technique,max_k),dtype = np.float64)
    perf_train_precision = np.zeros((max_technique,max_k),dtype = np.float64)
    perf_train_recall = np.zeros((max_technique,max_k),dtype = np.float64)
    perf_train_f1 = np.zeros((max_technique,max_k),dtype = np.float64)
    perf_train_aucovr = np.zeros((max_technique,max_k),dtype = np.float64)
    perf_train_aucovo = np.zeros((max_technique,max_k),dtype = np.float64)
    time_training = np.zeros((max_technique,max_k),dtype = np.float64)
    time_reduction = np.zeros((max_technique),dtype = np.float64)
    mem_training_current = np.zeros((max_technique,max_k),dtype = np.float64)
    mem_training_peak = np.zeros((max_technique,max_k),dtype = np.float64)
    k_array = np.zeros((max_k),dtype = np.intc)
    d_array = np.zeros((max_set),dtype = np.intc)
    l_array = np.zeros((max_technique),dtype = np.intc)
    l_reduction = np.zeros((max_technique),dtype = np.float64)
    
    # Load data file to DataFrame
    inputfile = files[dataset]
    print("Dataset:",dataset)
    print("Data Source:",inputfile)
    if (inputfile[-2:] == "gz"):
        df = pd.read_csv(inputfile, header = file_header[dataset], sep = file_separator[dataset], compression="gzip")
    elif (inputfile[-3:] == ".zip"):
        df = pd.read_csv(inputfile, header = file_header[dataset], sep = file_separator[dataset], compression="zip")
    elif (inputfile[-4:] == ".xls") or (inputfile[-4:] == "xlsx"):
        df = pd.read_excel(inputfile, header = file_header[dataset])      
    else:
        df = pd.read_csv(inputfile, header = file_header[dataset], sep = file_separator[dataset])

        
    # -------------------------------------------------------------------------------------
    # PANDAS DATA PRE-PROCESSING
    # -------------------------------------------------------------------------------------

    # Drop specific rows
    if (inputfile == "https://piyabute.com/data/research/TUANDROMD.csv"): df.drop(df.index[2533], inplace=True)
    if (inputfile == "https://piyabute.com/data/research/yeast.data"): df.drop(labels=None, axis=0, inplace=True)
   
    # Stratified sampling
    if (file_big[dataset] > 0): 
        # Generate column index
        df.columns = df.columns.map(str)
        df = df.groupby(df.columns[file_label[dataset]], group_keys=False).apply(lambda x: x.sample(file_big[dataset]))
    
    # Categorical to Numerical
    
    if (encoder == 1):
        # Initialize the LabelEncoder
        labelencoder = LabelEncoder()
        # Apply LabelEncoder on the specified column
        #label_column_name = df.columns[file_label[dataset]]
        #df[label_column_name] = labelencoder.fit_transform(df[label_column_name])
        # Loop through each column in the DataFrame and apply label encoding
        for column in df.columns:
            df[column] = labelencoder.fit_transform(df[column])
    elif (encoder == 2): # Incomplete!
        # Convert the label column into one hot encoding
        one_hot = pd.get_dummies(df.iloc[:, file_label[dataset]])
        # Drop the label column
        last_column_name = df.columns[file_label[dataset]]
        df.drop(columns=[last_column_name], inplace=True)
        df = df.join(one_hot)
        
    # Fill missing values (slow for big data) with the mode (most frequent value) of the column
    for column in df.columns:
        df[column].fillna(df[column].mode()[0], inplace=True)
        # df = df.fillna(df.mean())
        # df = df.fillna(df.median())
        # df = df.fillna(df.mode())

    # Convert dataframe to numpy
    raw = df.to_numpy()

    # -------------------------------------------------------------------------------------
    # RAW DATA PRE-PROCESSING
    # -------------------------------------------------------------------------------------

    # Categorical to Numerical
#    if (encoder == 1):
#        labelencoder = LabelEncoder()
#        print("Preprocessing: Attribute =",df.shape[1],"[",end="")
#        dflist = df.dtypes.tolist()
#        for i in range(0,len(dflist)):
#            if (dflist[i]=="object"):
#                print(i,"E ",end="",sep="")
#                raw[:,i] = labelencoder.fit_transform(raw[:,i])
#            elif (i==(len(dflist)-1) and file_label[dataset]==-1):
#                print(i,"E ",end="",sep="")
#                raw[:,i] = labelencoder.fit_transform(raw[:,i])
#            elif (i==file_label[dataset]):
#                print(i,"E ",end="",sep="")
#                raw[:,i] = labelencoder.fit_transform(raw[:,i])
#            else:
#                print(i," ",end="",sep="")
#        print("] (LabelEncoding)")
#    elif (encoder == 2): # Incomplete!
#        print("Preprocessing: Attribute =",df.shape[1],"(",end="")
#        dflist = df.dtypes.tolist()
#        for i in range(0,len(dflist)-1):
#            if (dflist[i]=="object"):
#                print(i,"E ",end="",sep="")
#                # creating instance of one-hot-encoder
#                enc = OneHotEncoder(categories='all', drop=None, sparse=True, handle_unknown='ignore')
#                encode_temp = enc.fit_transform(raw[:,i].reshape(-1,1)).toarray()
#                raw = np.concatenate((raw, encode_temp), axis=0, out=None)
#            else:
#                print(i," ",end="",sep="")
#        labelencoder = LabelEncoder()
#        raw[:,len(dflist)-1] = labelencoder.fit_transform(raw[:,len(dflist)-1])
#        print(") (OneHotEncoding)")
        
    # Extract the label attribute
    trainsetlabel = raw[:,file_label[dataset]]
    trainsetlabel = trainsetlabel.astype(int)

    # Remove the label attribute
    trainsetdata = np.delete(raw,file_label[dataset],1)
    
    # Calculate PCA
    if (pca == 1):
        # pca = PCA(n_components=pca_components,svd_solver="full")
        mypca = PCA(n_components=pca_components)
        traindata = mypca.fit_transform(trainsetdata)
        trainlabel = trainsetlabel
    else:
        traindata = trainsetdata
        trainlabel = trainsetlabel
    
    # Normalize normal attributes using MinMacScaler
    if (normalization == 1):
        scaler = MinMaxScaler(feature_range=(0.01, 0.99))
        scaler.fit(traindata)
        traindata = scaler.transform(traindata)
        trainlabel = trainlabel - np.amin(trainlabel)
 
    # Construct the training set and test set
    if (cross_validation == 1):
        testdata = []
        testlabel = []
        trainsize = traindata.shape[0]
        traindimension = traindata.shape[1]
        reduction[0] = trainsize
    else:
        if ((traindata.shape[0]*traindata.shape[1])<100000):
            traindata, testdata, trainlabel, testlabel = train_test_split(traindata, trainlabel, train_size=train_ratio, stratify=trainlabel, random_state=42)
        elif ((traindata.shape[0]*traindata.shape[1])<500000):
            traindata, testdata, trainlabel, testlabel = train_test_split(traindata, trainlabel, train_size=train_ratio_100k, stratify=trainlabel, random_state=42)
        else:
            traindata, testdata, trainlabel, testlabel = train_test_split(traindata, trainlabel, train_size=train_ratio_500k, stratify=trainlabel, random_state=42)
        trainsize = traindata.shape[0]
        traindimension = traindata.shape[1]
        testsize = testdata.shape[0]
        testdimension = testdata.shape[1]
        reduction[0] = trainsize     

    print("Preprocessing: Attribute =",traindimension,"(PCA)" if pca==1 else "(Non-PCA)","(Norm)" if normalization==1 else "(Non-Norm)")
    print("Preprocessing: Sample    =",trainsize)
    print("Preprocessing: Class     =", len(np.unique(trainlabel)), np.unique(trainlabel))
    reduction_class = np.zeros((max_technique,len(np.unique(trainlabel))),dtype = np.float64)

    # Print class balance
    if (cross_validation == 1):
        unique_train, counts_train = np.unique(trainlabel, return_counts=True)
        print("Preprocessing: Train     =",dict(zip(unique_train, counts_train)))
    else:
        unique_test, counts_test = np.unique(testlabel, return_counts=True)
        print("Preprocessing: Test      =",dict(zip(unique_test, counts_test)))

    # -------------------------------------------------------------------------------------
    # DISTANCE-BASED INSTANCE SELECTION
    # -------------------------------------------------------------------------------------

    tempset = raw
    tempdata = traindata
    templabel = trainlabel
    scatter_x = []
    scatter_y = []

    for level in range(0,max_level):

        # Start clock
        time_start = time.perf_counter()

        # Parallel or serialized reduction
        if (parallel == 1):
            if __name__ == "__main__":
                ProcessList = Parallel(n_jobs=num_cores)(delayed(find_minimum_distance)(d,i) for i in range(tempdata.shape[0]))
        else:
            ProcessList = [0]*tempdata.shape[0]
            for i in range(0,tempdata.shape[0]):
                ProcessList[i] = find_minimum_distance(i)
                print(i,templabel[i],ProcessList[i],templabel[ProcessList[i]])

        # Remove duplicates
        dis_vec = np.unique(ProcessList)

        # Stop clock
        time_stop = time.perf_counter()

        # Save execution time
        # print(max_baseline-max_level+level*2)
        # print(max_baseline-max_level+level*2+1)
        time_reduction[max_baseline+level] = time_stop - time_start
        time_reduction[max_baseline+max_level+level] = time_stop - time_start

        # Construct reduced data set
        disdata = tempdata[dis_vec]
        dislabel = templabel[dis_vec]
        
        if level == 0:
            disdata1 = disdata
            dislabel1 = dislabel
            tempsetbefore = tempdata.shape[0]
            tempdata1 = np.delete(tempdata,dis_vec,0)
            templabel1 = np.delete(templabel,dis_vec,0)

#                # Maintain minimum values for n-fold cross-validation
#                unique_templabel, counts_templabel = np.unique(templabel1, return_counts=True)
#                mydict = dict(zip(unique_templabel,counts_templabel))
#                for i in mydict: 
#                    mydict[i]=np.maximum(mydict[i],cross_validation_fold*max_k*2)
#                mymodel = SMOTE(n_jobs=-1,sampling_strategy=mydict)
#                tempdata, templabel = mymodel.fit_resample(tempdata1, templabel1)
#                tempsetafter = tempdata.shape[0]

            tempdata = tempdata1
            templabel = templabel1
            tempsetafter = tempdata1.shape[0]
        elif level == 1:
            disdata2 = disdata
            dislabel2 = dislabel
            tempsetbefore = tempdata.shape[0]
            tempdata2 = np.delete(tempdata,dis_vec,0)
            templabel2 = np.delete(templabel,dis_vec,0)

#                # Maintain minimum values for n-fold cross-validation
#                unique_templabel, counts_templabel = np.unique(templabel2, return_counts=True)
#                mydict = dict(zip(unique_templabel,counts_templabel))IBP
#                for i in mydict: 
#                    mydict[i]=np.maximum(mydict[i],cross_validation_fold*max_k*2)
#                mymodel = SMOTE(n_jobs=-1,sampling_strategy=mydict)
#                tempdata, templabel = mymodel.fit_resample(tempdata2, templabel2)
#                tempsetafter = tempdata.shape[0]

            tempdata = tempdata2
            templabel = templabel2
            tempsetafter = tempdata2.shape[0]
        elif level == 2:
            disdata3 = disdata
            dislabel3 = dislabel
            tempsetbefore = tempdata.shape[0]
            tempdata3 = np.delete(tempdata,dis_vec,0)
            templabel3 = np.delete(templabel,dis_vec,0)

#                # Maintain minimum values for n-fold cross-validation
#                unique_templabel, counts_templabel = np.unique(templabel3, return_counts=True)
#                mydict = dict(zip(unique_templabel,counts_templabel))
#                for i in mydict: 
#                    mydict[i]=np.maximum(mydict[i],cross_validation_fold*max_k*2)
#                mymodel = SMOTE(n_jobs=-1,sampling_strategy=mydict)
#                tempdata, templabel = mymodel.fit_resample(tempdata3, templabel3)
#                tempsetafter = tempdata.shape[0]

            tempdata = tempdata3
            templabel = templabel3
            tempsetafter = tempdata3.shape[0]
        elif level == 3:
            disdata4 = disdata
            dislabel4 = dislabel
            tempsetbefore = tempdata.shape[0]
            tempdata4 = np.delete(tempdata,dis_vec,0)
            templabel4 = np.delete(templabel,dis_vec,0)

#                # Maintain minimum values for n-fold cross-validation
#                unique_templabel, counts_templabel = np.unique(templabel4, return_counts=True)
#                mydict = dict(zip(unique_templabel,counts_templabel))
#                for i in mydict: 
#                    mydict[i]=np.maximum(mydict[i],cross_validation_fold*max_k*2)
#                mymodel = SMOTE(n_jobs=-1,sampling_strategy=mydict)
#                tempdata, templabel = mymodel.fit_resample(tempdata4, templabel4)
#                tempsetafter = tempdata.shape[0]

            tempdata = tempdata4
            templabel = templabel4
            tempsetafter = tempdata4.shape[0]

        print("DIS Reduction: Level ",level+1," is constructed. / Time = ","{:0.2f}".format(time_stop - time_start)," seconds / Size = ",disdata.shape[0]," / Original = ",tempsetbefore," / Remain = ", tempsetafter, sep = "")

        # Save reduced data sets
        if (reduction_save == 1):
            if (pca == 1):
                outputfile = inputfile[:-4]+"_pca_dis_"+str(level+1)+".csv"
            else:
                outputfile = inputfile[:-4]+"_dis_"+str(level+1)+".csv"
            with open(outputfile, "ab") as file:
                if (level == 0):
                    np.savetxt(file, np.c_[disdata1, np.array(dislabel1)], delimiter=",", fmt="%1.4f")            
                elif (level == 1):
                    np.savetxt(file, np.c_[disdata2, np.array(dislabel2)], delimiter=",", fmt="%1.4f")            
                elif (level == 2):
                    np.savetxt(file, np.c_[disdata3, np.array(dislabel3)], delimiter=",", fmt="%1.4f")            
                elif (level == 3):
                    np.savetxt(file, np.c_[disdata4, np.array(dislabel4)], delimiter=",", fmt="%1.4f")            
                print("Saving level "+str(level+1)+" to "+outputfile, sep=" ")

    # -------------------------------------------------------------------------------------
    # FAST DISTANCED-BASED INSTANCE SELECTION (DISKD)
    # -------------------------------------------------------------------------------------
    
    tempset = raw
    tempdata = traindata
    templabel = trainlabel
    scatter_x = []
    scatter_y = []

    for level in range(0,max_level):

        # Initialize arrays to store the nearest instance and its distance for each instance
        diskd_vec = np.zeros(len(tempdata), dtype=np.int32)
        diskd_dis = np.ones(len(tempdata)) * np.inf

        # Start clock
        time_start = time.perf_counter()

        # Create a cKDTree object from the feature data
        tree = cKDTree(tempdata)

        # Loop through each instance in the feature data
        for i in range(len(tempdata)):
            # Find the distance and index of the nearest neighbor
            min_dis, min_vec = tree.query(tempdata[i], k=int(len(trainlabel)/100))

            # Check if the nearest neighbor belongs to a different class
            for j in range(1, len(min_vec)):
                if (templabel[i] != templabel[min_vec[j]]):
                    # Store the index of the nearest neighbor and its distance from the current instance
                    # nearest_neighbors.append((min_vec[j], min_dis[j]))
                    diskd_vec[i] = min_vec[j]
                    diskd_dis[i] = min_dis[j]
                    break

        # Remove duplicates
        diskd_vec = np.unique(diskd_vec)

        # Stop clock
        time_stop = time.perf_counter()

        # Save execution time
        # time_reduction_diskd[level] = time_stop - time_start
        time_reduction[max_baseline+max_level*2+level] = time_stop - time_start
        time_reduction[max_baseline+max_level*2+max_level+level] = time_stop - time_start

        # Construct reduced data set
        diskddata = tempdata[diskd_vec]
        diskdlabel = templabel[diskd_vec]

        if level == 0:
            diskddata1 = diskddata
            diskdlabel1 = diskdlabel
            tempsetbefore = tempdata.shape[0]
            tempdata1 = np.delete(tempdata,diskd_vec,0)
            templabel1 = np.delete(templabel,diskd_vec,0)
            tempdata = tempdata1
            templabel = templabel1
            tempsetafter = tempdata1.shape[0]
        elif level == 1:
            diskddata2 = diskddata
            diskdlabel2 = diskdlabel
            tempsetbefore = tempdata.shape[0]
            tempdata2 = np.delete(tempdata,diskd_vec,0)
            templabel2 = np.delete(templabel,diskd_vec,0)
            tempdata = tempdata2
            templabel = templabel2
            tempsetafter = tempdata2.shape[0]
        elif level == 2:
            diskddata3 = diskddata
            diskdlabel3 = diskdlabel
            tempsetbefore = tempdata.shape[0]
            tempdata3 = np.delete(tempdata,diskd_vec,0)
            templabel3 = np.delete(templabel,diskd_vec,0)
            tempdata = tempdata3
            templabel = templabel3
            tempsetafter = tempdata3.shape[0]
        elif level == 3:
            diskddata4 = diskddata
            diskdlabel4 = diskdlabel
            tempsetbefore = tempdata.shape[0]
            tempdata4 = np.delete(tempdata,diskd_vec,0)
            templabel4 = np.delete(templabel,diskd_vec,0)
            tempdata = tempdata4
            templabel = templabel4
            tempsetafter = tempdata4.shape[0]

        print("DISKD Reduction (k=",int(len(trainlabel)/100),"): Level ",level+1," is constructed. / Time = ","{:0.2f}".format(time_stop - time_start)," seconds / Size = ",diskddata.shape[0]," / Original = ",tempsetbefore," / Remain = ", tempsetafter, sep = "")

        # Save reduced data sets
        if (reduction_save == 1):
            if (pca == 1):
                outputfile = inputfile[:-4]+"_pca_diskd_"+str(level+1)+".csv"
            else:
                outputfile = inputfile[:-4]+"_diskd_"+str(level+1)+".csv"
            with open(outputfile, "ab") as file:
                if (level == 0):
                    np.savetxt(file, np.c_[diskddata1, np.array(diskdlabel1)], delimiter=",", fmt="%1.4f")            
                elif (level == 1):
                    np.savetxt(file, np.c_[diskddata2, np.array(diskdlabel2)], delimiter=",", fmt="%1.4f")            
                elif (level == 2):
                    np.savetxt(file, np.c_[diskddata3, np.array(diskdlabel3)], delimiter=",", fmt="%1.4f")            
                elif (level == 3):
                    np.savetxt(file, np.c_[diskddata4, np.array(diskdlabel4)], delimiter=",", fmt="%1.4f")            
                print("Saving level "+str(level+1)+" to "+outputfile, sep=" ")
  
    # -------------------------------------------------------------------------------------
    # TRAINING SET CONSTRUCTION
    # -------------------------------------------------------------------------------------

    # https://imbalanced-learn.org/
    for traintype in range(0,max_technique):
        if traintype == 0:
            xready = traindata
            tready = trainlabel
            temp, reduction_class_org = np.unique(tready, return_counts=True)
        elif traintype < 14:
            time_start = time.perf_counter()
            if traintype == 1:
                mymodel = CondensedNearestNeighbour(n_jobs=-1,sampling_strategy='all')
            elif traintype == 2:
                mymodel = EditedNearestNeighbours(n_jobs=-1,sampling_strategy='all',kind_sel='mode')
            elif traintype == 3:
                mymodel = RepeatedEditedNearestNeighbours(n_jobs=-1,sampling_strategy='all',kind_sel='mode')
            elif traintype == 4:
                mymodel = AllKNN(n_jobs=-1,sampling_strategy='all',kind_sel='mode')
            elif traintype == 5:
                mymodel = TomekLinks(n_jobs=-1,sampling_strategy='all')
            elif traintype == 6:
                mymodel = OneSidedSelection(n_jobs=-1,sampling_strategy='all')
            elif traintype == 7:
                mymodel = NeighbourhoodCleaningRule(n_jobs=-1,sampling_strategy='all',kind_sel='mode')
            elif traintype == 8:
                mymodel = NearMiss(n_jobs=-1,sampling_strategy='all',version=1)
            elif traintype == 9:
                mymodel = NearMiss(n_jobs=-1,sampling_strategy='all',version=2)
            elif traintype == 10:
                mymodel = NearMiss(n_jobs=-1,sampling_strategy='all',version=3)
            elif traintype == 11:
                mymodel = InstanceHardnessThreshold(n_jobs=-1,sampling_strategy='all')
            elif traintype == 12:
                mymodel = ClusterCentroids(sampling_strategy='all',voting='auto')
            elif traintype == 13:
                mymodel = RandomUnderSampler(sampling_strategy='all',replacement=True)

            xready, tready = mymodel.fit_resample(traindata, trainlabel)
            time_stop = time.perf_counter()
            time_reduction[traintype] = time_stop - time_start
        else: # DIS1,DIS2,DIS3,DIS4
            if traintype == 14: # DIS1
                xready = disdata1
                tready = dislabel1
            elif traintype == 15: # DIS2
                xready = np.vstack((disdata1,disdata2))
                tready = np.hstack((dislabel1,dislabel2))
            elif traintype == 16: # DIS3
                xready = np.vstack((disdata1,disdata2,disdata3))
                tready = np.hstack((dislabel1,dislabel2,dislabel3))
            elif traintype == 17: # DIS4
                xready = np.vstack((disdata1,disdata2,disdata3,disdata4))
                tready = np.hstack((dislabel1,dislabel2,dislabel3,dislabel4))
            elif traintype == 18: # IBP1
                xready = disdata1
                tready = dislabel1
            elif traintype == 19: # IBP2
                xready = np.vstack((disdata1,disdata2))
                tready = np.hstack((dislabel1,dislabel2))
            elif traintype == 20: # IBP3
                xready = np.vstack((disdata1,disdata2,disdata3))
                tready = np.hstack((dislabel1,dislabel2,dislabel3))
            elif traintype == 21: # IBP4
                xready = np.vstack((disdata1,disdata2,disdata3,disdata4))
                tready = np.hstack((dislabel1,dislabel2,dislabel3,dislabel4))
            elif traintype == 22: # DISKD1
                xready = diskddata1
                tready = diskdlabel1
            elif traintype == 23: # DISKD2
                xready = np.vstack((diskddata1,diskddata2))
                tready = np.hstack((diskdlabel1,diskdlabel2))
            elif traintype == 24: # DISKD3
                xready = np.vstack((diskddata1,diskddata2,diskddata3))
                tready = np.hstack((diskdlabel1,diskdlabel2,diskdlabel3))
            elif traintype == 25: # DISKD4
                xready = np.vstack((diskddata1,diskddata2,diskddata3,diskddata4))
                tready = np.hstack((diskdlabel1,diskdlabel2,diskdlabel3,diskdlabel4))
            elif traintype == 26: # IBPKD1
                xready = diskddata1
                tready = diskdlabel1
            elif traintype == 27: # IBPKD2
                xready = np.vstack((diskddata1,diskddata2))
                tready = np.hstack((diskdlabel1,diskdlabel2))
            elif traintype == 28: # IBPKD3
                xready = np.vstack((diskddata1,diskddata2,diskddata3))
                tready = np.hstack((diskdlabel1,diskdlabel2,diskdlabel3))
            elif traintype == 29: # IBPKD4
                xready = np.vstack((diskddata1,diskddata2,diskddata3,diskddata4))
                tready = np.hstack((diskdlabel1,diskdlabel2,diskdlabel3,diskdlabel4))

        unique_tready, counts_tready = np.unique(tready, return_counts=True)

        # OverSampling for DIS and DISKD
        if (traintype >= 18 and traintype <= 21) or (traintype >= 26 and traintype <= 29):

            # Maintain minimum values for n-fold cross-validation
            time_start = time.perf_counter()

            unique_tready, counts_tready = np.unique(tready, return_counts=True)
            mydict = dict(zip(unique_tready,counts_tready))
            print("Oversampling (Pre) : Type ",traintype," ",label_traintype[traintype],"; set=",dataset,"; sample=", len(tready)," ({:0.2f}".format(len(tready)/len(trainlabel)*100), "%) ", dict(zip(unique_tready, counts_tready)), sep = "");
            for i in mydict: 
                mydict[i]=np.maximum(mydict[i],cross_validation_fold*max_k*4)

            # mymodel_smote = SMOTE(n_jobs=-1,sampling_strategy=mydict)
            mymodel_ros = RandomOverSampler(sampling_strategy=mydict)
            xready, tready = mymodel_ros.fit_resample(xready, tready)
            unique_tready, counts_tready = np.unique(tready, return_counts=True)

            time_stop = time.perf_counter()
            time_reduction[traintype] = time_reduction[traintype] + time_stop - time_start

            print("Oversampling (Post): Type ",traintype," ",label_traintype[traintype],"; set=",dataset,"; sample=", len(tready)," ({:0.2f}".format(len(tready)/len(trainlabel)*100), "%) ", dict(zip(unique_tready, counts_tready)), sep = "");
        else:
            print("No Oversampling: Type ",traintype," ",label_traintype[traintype],"; set=",dataset,"; sample=", len(tready)," ({:0.2f}".format(len(tready)/len(trainlabel)*100), "%) ", dict(zip(unique_tready, counts_tready)), sep = "");
            
            
        if (len(np.unique(tready))<len(np.unique(trainlabel))):
            print("Training: Cannot maintain all class labels!")
            # continue

        reduction[traintype] = len(tready)

        # Fixing empty class
        j = 0;
        for i in unique_tready:
            reduction_class[traintype][i] = counts_tready[j]
            j += 1
    
        # Plot training sets
        if (figure_save == 1):
            colors = ['g','b','r','m'];
            if (traindimension == 2):
                scatter_x = xready[:,0]
                scatter_y = xready[:,1]
#                    scatter_x_min = min(scatter_x)
#                    scatter_x_max = max(max(scatter_x),1)
#                    scatter_y_min = min(scatter_y)
#                    scatter_y_max = max(max(scatter_y),1)
                scatter_x_min = 0
                scatter_x_max = 1
                scatter_y_min = 0
                scatter_y_max = 1
                group = tready
                plt.scatter(scatter_x, scatter_y, c=tready, cmap=matplotlib.colors.ListedColormap(colors), s = 1, alpha=0.5);
                plt.xlim([scatter_x_min,scatter_x_max])
                plt.ylim([scatter_y_min,scatter_y_max])
                plt.title(label_traintype[traintype],fontsize=30,fontweight='bold')
                plt.xlabel('X')
                plt.ylabel('Y')
                plt.savefig("instance-"+str(dataset+1).zfill(2)+"-"+str(traintype+1).zfill(2)+".pdf", format="pdf", dpi=None, facecolor="w", edgecolor="w", orientation="portrait", transparent=True, bbox_inches="tight", pad_inches=0.02, metadata=None)
                plt.show()
                plt.close()

            if (traindimension == 3):
                scatter_x = xready[:,0]
                scatter_y = xready[:,1]
                scatter_z = xready[:,2]
#                    scatter_x_min = min(scatter_x)
#                    scatter_x_max = max(max(scatter_x),1)
#                    scatter_y_min = min(scatter_y)
#                    scatter_y_max = max(max(scatter_y),1)
#                    scatter_z_min = min(scatter_z)
#                    scatter_z_max = max(max(scatter_z),1)
                scatter_x_min = 0
                scatter_x_max = 1
                scatter_y_min = 0
                scatter_y_max = 1
                scatter_z_min = 0
                scatter_z_max = 1
                group = tready
                plt.scatter(scatter_x, scatter_y, scatter_z, c=tready, cmap=matplotlib.colors.ListedColormap(colors), s = 1, alpha=0.5);
                plt.title(label_traintype[traintype],fontsize=30,fontweight='bold')
                plt.xlim([scatter_x_min,scatter_x_max])
                plt.ylim([scatter_y_min,scatter_y_max])
                plt.zlim([scatter_z_min,scatter_z_max])
                plt.xlabel('X')
                plt.ylabel('Y')
                plt.zlabel('Z')
                plt.savefig("instance-"+str(dataset+1).zfill(2)+"-"+str(traintype+1).zfill(2)+".pdf", format="pdf", dpi=None, facecolor="w", edgecolor="w", orientation="portrait", transparent=True, bbox_inches="tight", pad_inches=0.02, metadata=None)
                plt.show()
                plt.close()

        # -------------------------------------------------------------------------------------
        # LAZY LEARNING ALGORITHMS
        # -------------------------------------------------------------------------------------

        # k-Nearest Neighbors
        # https://stackabuse.com/k-nearest-neighbors-algorithm-in-python-and-scikit-learn/
        # https://scikit-learn.org/stable/modules/model_evaluation.html
        # Local Regression
        # Lazy naive Bayes
        # SVM RBF

        # Execute loop only once for non-knn
        if (model != 'knn'): max_k = 0

        for k in range(0,max_k,1):

            # Training Cross validation
            # https://towardsdatascience.com/cross-validation-using-knn-6babb6e619c8
            # https://towardsdatascience.com/building-a-k-nearest-neighbors-k-nn-model-with-scikit-learn-51209555453a
            # https://scikit-learn.org/stable/modules/cross_validation.html
            # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_validate.html
            # https://scikit-learn.org/stable/modules/model_evaluation.html
            tracemalloc.start()
            time_start = time.perf_counter()
            if (model == 'knn'):
                clf = KNeighborsClassifier(n_neighbors=(2*k)+1,n_jobs=-1)
            elif (model == 'svm'):
                clf = svm.SVC(kernel='linear', C=1, random_state=0,probability=True)

            # Set cross validation to sample size if too small
            if (min(counts_tready)>cross_validation_fold):
                cross_validation_fold_final = cross_validation_fold
            elif (min(counts_tready)>2):
                cross_validation_fold_final = min(counts_tready)
            else:
                cross_validation_fold_final = 2

            # Early break if class members are less than k
            # if (min(counts_tready)/cross_validation_fold_final<k+1): break

            # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html
            # https://sciksit-learn.org/stable/modules/model_evaluation.html
            # scores = cross_val_score(clf, xready, tready, cv=cross_validation_fold)
            scoring = {"accuracy":"accuracy",
                       "precision":"precision_weighted",
                       "recall":"recall_weighted",
                       "f1":"f1_weighted",
                       "aucovr":"roc_auc_ovr_weighted",
                       "aucovo":"roc_auc_ovo_weighted"}
            skf = StratifiedKFold(n_splits=cross_validation_fold_final, random_state=0, shuffle=True)
#                skf = KFold(n_splits=cross_validation_fold_final, random_state=0, shuffle=True)
            scores = cross_validate(clf, xready, tready, scoring=scoring, cv=skf, return_train_score=True, n_jobs=-1)

            time_stop = time.perf_counter()
            current, peak = tracemalloc.get_traced_memory()
            tracemalloc.stop()
            mem_training_current[traintype,k] = current
            mem_training_peak[traintype,k] = peak
            time_training[traintype,k] = time_stop - time_start

#                global_fit_time[dataset,traintype,k] = 
            perf_fit_time[traintype,k] = np.mean(scores["fit_time"])
#                global_score_time[dataset,traintype,k] = 
            perf_score_time[traintype,k] = np.mean(scores["score_time"])
#                global_train_accuracy[dataset,traintype,k] = 
            perf_train_accuracy[traintype,k] = np.mean(scores["train_accuracy"])
#                global_train_precision[dataset,traintype,k] = 
            perf_train_precision[traintype,k] = np.mean(scores["train_precision"])
#                global_train_recall[dataset,traintype,k] = 
            perf_train_recall[traintype,k] = np.mean(scores["train_recall"])
#                global_train_f1[dataset,traintype,k] = 
            perf_train_f1[traintype,k] = np.mean(scores["train_f1"])
#                global_train_aucovr[dataset,traintype,k] = 
            perf_train_aucovr[traintype,k] = np.mean(scores["train_aucovr"])
#                global_train_aucovo[dataset,traintype,k] = dflist
            perf_train_aucovo[traintype,k] = np.mean(scores["train_aucovo"])
#                global_train_kappa[dataset,traintype,k] = 
#                perf_train_kappa[traintype,k] = statistics.mean(scores["train_kappa"])
#                global_test_accuracy[dataset,traintype,k] = 
#             perf_test_accuracy[traintype,k] = statistics.mean(scores["test_accuracy"])
#                global_test_precision[dataset,traintype,k] = 
#             perf_test_precision[traintype,k] = statistics.mean(scores["test_precision"])
#                global_test_recall[dataset,traintype,k] = 
#             perf_test_recall[traintype,k] = statistics.mean(scores["test_recall"])
#                global_test_f1[dataset,traintype,k] = 
#             perf_test_f1[traintype,k] = statistics.mean(scores["test_f1"])
#                global_test_aucovr[dataset,traintype,k] = 
#             perf_test_aucovr[traintype,k] = statistics.mean(scores["test_aucovr"])
#                global_test_aucovo[dataset,traintype,k] = 
#             perf_test_aucovo[traintype,k] = statistics.mean(scores["test_aucovo"])

            if (model == 'knn'):
                print("Training: Type ",label_traintype[traintype],
#                          "; MEM=","{:5.0f}".format(mem_training_current[k]/1000000),"{:5.0f}".format(mem_training_peak[k]/1000000),                     
                      "; cv=",cross_validation_fold_final,
                      "; d=",d,
                      "; k=",(2*k)+1,
                      "; Train=",reduction[traintype],
                      "; Acc=","{:0.2f}".format(perf_train_accuracy[traintype,k]),
                      "; Pre=","{:0.2f}".format(perf_train_precision[traintype,k]),
                      "; Recall=","{:0.2f}".format(perf_train_recall[traintype,k]),
                      "; F1=","{:0.2f}".format(perf_train_f1[traintype,k]),
                      "; AUC OVR=","{:0.2f}".format(perf_train_aucovr[traintype,k]),
                      "; AUC OVO=","{:0.2f}".format(perf_train_aucovo[traintype,k]),sep = "")
            elif (model == 'svm2'):
                print("Training: Type ",label_traintype[traintype],
#                                   "; MEM=","{:5.0f}".format(mem_training_current[k]/1000000),"{:5.0f}".format(mem_training_peak[k]/1000000),                     
                      "; cv=",cross_validation_fold_final,
                      "; d=",d,
                      "; Train=",reduction[traintype],
                      "; Acc=","{:0.2f}".format(sum(perf_train_accuracy[traintype])/len(perf_train_accuracy[traintype])),
                      "; Pre=","{:0.2f}".format(sum(perf_train_precision[traintype]/len(perf_train_precision[traintype]))),
                      "; Recall=","{:0.2f}".format(sum(perf_train_recall[traintype]/len(perf_train_recall[traintype]))),
                      "; F1=","{:0.2f}".format(sum(perf_train_f1[traintype]/len(perf_train_f1[traintype]))),
                      "; AUC OVR=","{:0.2f}".format(sum(perf_train_aucovr[traintype]/len(perf_train_aucovr[traintype]))),
                      "; AUC OVO=","{:0.2f}".format(sum(perf_train_aucovo[traintype]/len(perf_train_aucovo[traintype]))), sep = "")

        # Save every train type
        if (performance_save == 1):
            with open("cross_fit_time.txt", "ab") as file:
                np.savetxt(file, perf_fit_time[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.8f")
            with open("cross_score_time.txt", "ab") as file:
                np.savetxt(file, perf_score_time[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.8f")
            with open("cross_train_accuracy.txt", "ab") as file:
                np.savetxt(file, perf_train_accuracy[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")
            with open("cross_train_precision.txt", "ab") as file:
                np.savetxt(file, perf_train_precision[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
            with open("cross_train_recall.txt", "ab") as file:
                np.savetxt(file, perf_train_recall[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
            with open("cross_train_f1.txt", "ab") as file:
                np.savetxt(file, perf_train_f1[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
            with open("cross_train_aucovr.txt", "ab") as file:
                np.savetxt(file, perf_train_aucovr[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
            with open("cross_train_aucovo.txt", "ab") as file:
                np.savetxt(file, perf_train_aucovo[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
#             with open("cross_test_accuracy.txt", "ab") as file:
#                 np.savetxt(file, perf_test_accuracy[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")
#             with open("cross_test_precision.txt", "ab") as file:
#                 np.savetxt(file, perf_test_precision[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
#             with open("cross_test_recall.txt", "ab") as file:
#                 np.savetxt(file, perf_test_recall[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
#             with open("cross_test_f1.txt", "ab") as file:
#                 np.savetxt(file, perf_test_f1[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
#             with open("cross_test_aucovr.txt", "ab") as file:
#                 np.savetxt(file, perf_test_aucovr[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
#             with open("cross_test_aucovo.txt", "ab") as file:
#                 np.savetxt(file, perf_test_aucovo[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
            with open("time_training.txt", "ab") as file:
                np.savetxt(file, time_training[traintype].reshape(1,max_k), delimiter="\t", fmt="%1.4f")        
            with open("mem_training_current.txt", "ab") as file:
                np.savetxt(file, mem_training_current[traintype].reshape(1,max_k), delimiter="\t", fmt="%0.0f")        
            with open("mem_training_peak.txt", "ab") as file:
                np.savetxt(file, mem_training_peak[traintype].reshape(1,max_k), delimiter="\t", fmt="%0.0f")        

        # Print average scores
        if (model == 'knn2'):
            print("Training: Type ",label_traintype[traintype],
#                      "; MEM=","{:5.0f}".format(mem_training_current[k]/1000000),"{:5.0f}".format(mem_training_peak[k]/1000000),                     
                  "; cv=",cross_validation_fold_final,
                  "; d=",d,
                  "; k=[1-",(2*k)+1,"]",
                  "; Train=",reduction[traintype],
                  "; Acc=","{:0.2f}".format(np.mean(perf_train_accuracy[traintype],axis=0)),
                  "; Pre=","{:0.2f}".format(np.mean(perf_train_precision[traintype],axis=0)),
                  "; Recall=","{:0.2f}".format(np.mean(perf_train_recall[traintype],axis=0)),
                  "; F1=","{:0.2f}".format(np.mean(perf_train_f1[traintype],axis=0)),
                  "; AUC OVR=","{:0.2f}".format(np.mean(perf_train_aucovr[traintype],axis=0)),
                  "; AUC OVO=","{:0.2f}".format(np.mean(perf_train_aucovo[traintype],axis=0)),sep = "")
        elif (model == 'svm'):
            print("Training: Type ",label_traintype[traintype],
#                      "; MEM=","{:5.0f}".format(mem_training_current[k]/1000000),"{:5.0f}".format(mem_training_peak[k]/1000000),                     
                  "; cv=",cross_validation_fold_final,
                  "; d=",d,
                  "; Train=",reduction[traintype],
                  "; Acc=","{:0.2f}".format(np.mean(perf_train_accuracy[traintype],axis=0)),
                  "; Pre=","{:0.2f}".format(np.mean(perf_train_precision[traintype],axis=0)),
                  "; Recall=","{:0.2f}".format(np.mean(perf_train_recall[traintype],axis=0)),
                  "; F1=","{:0.2f}".format(np.mean(perf_train_f1[traintype],axis=0)),
                  "; AUC OVR=","{:0.2f}".format(np.mean(perf_train_aucovr[traintype],axis=0)),
                  "; AUC OVO=","{:0.2f}".format(np.mean(perf_train_aucovo[traintype],axis=0)),sep = "")
        print()

    # Save every data set
    if (performance_save == 1):
        with open("class_balance.txt", "ab") as file:
            np.savetxt(file, counts_train.reshape(1,counts_train.shape[0]), delimiter="\t", fmt="%0.0f")
        with open("class_balance_percentage.txt", "ab") as file:
            np.savetxt(file, (counts_train/traindata.shape[0]).reshape(1,counts_train.shape[0]), delimiter="\t", fmt="%0.4f")
        with open("reduction_size.txt", "ab") as file:
            np.savetxt(file, reduction.reshape(1,reduction.shape[0]), delimiter="\t", fmt="%0.0f")
        with open("reduction_class.txt", "ab") as file:
#                np.savetxt(file, reduction_class.reshape(len(reduction_class),reduction.shape[0]), delimiter="\t", fmt="%0.0f")
            np.savetxt(file, np.transpose(reduction_class), delimiter="\t", fmt="%0.0f")
        with open("time_reduction.txt", "ab") as file:
            np.savetxt(file, time_reduction.reshape(1,max_technique), delimiter="\t", fmt="%1.4f")        

print("Evaluation: Dataset",dataset,"is done!")
print("------------------------------------------------------------------------------\n\r")

# Plot in Plotter.ipynb

Dataset: 0
Data Source: https://www.piyabute.com/data/research/banknote_authentication.csv


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Preprocessing: Attribute = 4 (Non-PCA) (Norm)
Preprocessing: Sample    = 1372
Preprocessing: Class     = 2 [0 1]
Preprocessing: Train     = {0: 762, 1: 610}
DIS Reduction: Level 1 is constructed. / Time = 1.08 seconds / Size = 116 / Original = 1372 / Remain = 1256
DIS Reduction: Level 2 is constructed. / Time = 0.40 seconds / Size = 111 / Original = 1256 / Remain = 1145
DIS Reduction: Level 3 is constructed. / Time = 0.31 seconds / Size = 79 / Original = 1145 / Remain = 1066
DIS Reduction: Level 4 is constructed. / Time = 0.25 seconds / Size = 77 / Original = 1066 / Remain = 989
DISKD Reduction (k=13): Level 1 is constructed. / Time = 0.03 seconds / Size = 16 / Original = 1372 / Remain = 1356
DISKD Reduction (k=13): Level 2 is constructed. / Time = 0.03 seconds / Size = 18 / Original = 1356 / Remain = 1338
DISKD Reduction (k=13): Level 3 is constructed. / Time = 0.02 seconds / Size = 15 / Original = 1338 / Remain = 1323
DISKD Reduction (k=13): Level 4 is constructed. / Time = 0.02 seco



No Oversampling: Type 2 ENN; set=0; sample=1369 (99.78%) {0: 759, 1: 610}
Training: Type ENN; cv=5; d=4; k=1; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=3; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=5; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=7; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=9; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 3 RENN; set=0; sample=1369 (99.78%) {0: 759, 1: 610}
Training: Type RENN; cv=5; d=4; k=1; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RENN; cv=5; d=4; k=3; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RENN; cv=5; d=



Training: Type OSS; cv=2; d=4; k=5; Train=613; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=nan; AUC OVO=nan
Training: Type OSS; cv=2; d=4; k=7; Train=613; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=nan; AUC OVO=nan
Training: Type OSS; cv=2; d=4; k=9; Train=613; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=nan; AUC OVO=nan

No Oversampling: Type 7 NCL; set=0; sample=1369 (99.78%) {0: 759, 1: 610}
Training: Type NCL; cv=5; d=4; k=1; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=3; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=5; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=7; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=9; Train=1369; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.0



Training: Type NM$_3$; cv=5; d=4; k=1; Train=702; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=3; Train=702; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=5; Train=702; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=7; Train=702; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=9; Train=702; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 11 IHT; set=0; sample=1242 (90.52%) {0: 632, 1: 610}
Training: Type IHT; cv=5; d=4; k=1; Train=1242; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IHT; cv=5; d=4; k=3; Train=1242; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IHT; cv=5; d=4; k=5; Train=1242; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC O



No Oversampling: Type 12 CC; set=0; sample=1220 (88.92%) {0: 610, 1: 610}
Training: Type CC; cv=5; d=4; k=1; Train=1220; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=3; Train=1220; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=5; Train=1220; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=7; Train=1220; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=9; Train=1220; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 13 RUS; set=0; sample=1220 (88.92%) {0: 610, 1: 610}
Training: Type RUS; cv=5; d=4; k=1; Train=1220; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RUS; cv=5; d=4; k=3; Train=1220; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RUS; cv=5; d=4; k=5; 



Training: Type IBP$_1$; cv=5; d=4; k=5; Train=200; Acc=0.97; Pre=0.98; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBP$_1$; cv=5; d=4; k=7; Train=200; Acc=0.95; Pre=0.95; Recall=0.95; F1=0.95; AUC OVR=0.99; AUC OVO=0.99
Training: Type IBP$_1$; cv=5; d=4; k=9; Train=200; Acc=0.92; Pre=0.92; Recall=0.92; F1=0.92; AUC OVR=0.99; AUC OVO=0.99

Oversampling (Pre) : Type 19 IBP$_2$; set=0; sample=227 (16.55%) {0: 78, 1: 149}
Oversampling (Post): Type 19 IBP$_2$; set=0; sample=249 (18.15%) {0: 100, 1: 149}
Training: Type IBP$_2$; cv=5; d=4; k=1; Train=249; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBP$_2$; cv=5; d=4; k=3; Train=249; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBP$_2$; cv=5; d=4; k=5; Train=249; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBP$_2$; cv=5; d=4; k=7; Train=249; Acc=0.96; Pre=0.97; Recall=0.96; F1=0.96; AUC OVR=1.00; AUC OVO=1



Training: Type DISKD$_2$; cv=4; d=4; k=3; Train=34; Acc=0.92; Pre=0.87; Recall=0.92; F1=0.89; AUC OVR=0.97; AUC OVO=0.97
Training: Type DISKD$_2$; cv=4; d=4; k=5; Train=34; Acc=0.88; Pre=0.78; Recall=0.88; F1=0.83; AUC OVR=0.91; AUC OVO=0.91
Training: Type DISKD$_2$; cv=4; d=4; k=7; Train=34; Acc=0.88; Pre=0.78; Recall=0.88; F1=0.83; AUC OVR=0.88; AUC OVO=0.88
Training: Type DISKD$_2$; cv=4; d=4; k=9; Train=34; Acc=0.88; Pre=0.78; Recall=0.88; F1=0.83; AUC OVR=0.84; AUC OVO=0.84

No Oversampling: Type 24 DISKD$_3$; set=0; sample=49 (3.57%) {0: 5, 1: 44}
Training: Type DISKD$_3$; cv=5; d=4; k=1; Train=49; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_3$; cv=5; d=4; k=3; Train=49; Acc=0.94; Pre=0.95; Recall=0.94; F1=0.93; AUC OVR=0.99; AUC OVO=0.99
Training: Type DISKD$_3$; cv=5; d=4; k=5; Train=49; Acc=0.90; Pre=0.81; Recall=0.90; F1=0.85; AUC OVR=0.98; AUC OVO=0.98
Training: Type DISKD$_3$; cv=5; d=4; k=7; Train=49; Acc=0.90; Pre=0.81; Recal



Training: Type IBPKD$_1$; cv=5; d=4; k=7; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_1$; cv=5; d=4; k=9; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00

Oversampling (Pre) : Type 27 IBPKD$_2$; set=0; sample=34 (2.48%) {0: 4, 1: 30}
Oversampling (Post): Type 27 IBPKD$_2$; set=0; sample=200 (14.58%) {0: 100, 1: 100}
Training: Type IBPKD$_2$; cv=5; d=4; k=1; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_2$; cv=5; d=4; k=3; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_2$; cv=5; d=4; k=5; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00




Training: Type IBPKD$_2$; cv=5; d=4; k=7; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_2$; cv=5; d=4; k=9; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00

Oversampling (Pre) : Type 28 IBPKD$_3$; set=0; sample=49 (3.57%) {0: 5, 1: 44}
Oversampling (Post): Type 28 IBPKD$_3$; set=0; sample=200 (14.58%) {0: 100, 1: 100}
Training: Type IBPKD$_3$; cv=5; d=4; k=1; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_3$; cv=5; d=4; k=3; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_3$; cv=5; d=4; k=5; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00




Training: Type IBPKD$_3$; cv=5; d=4; k=7; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_3$; cv=5; d=4; k=9; Train=200; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00

Oversampling (Pre) : Type 29 IBPKD$_4$; set=0; sample=60 (4.37%) {0: 6, 1: 54}
Oversampling (Post): Type 29 IBPKD$_4$; set=0; sample=200 (14.58%) {0: 100, 1: 100}
Training: Type IBPKD$_4$; cv=5; d=4; k=1; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_4$; cv=5; d=4; k=3; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_4$; cv=5; d=4; k=5; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00




Training: Type IBPKD$_4$; cv=5; d=4; k=7; Train=200; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_4$; cv=5; d=4; k=9; Train=200; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00

Dataset: 1
Data Source: https://www.piyabute.com/data/research/car.data.csv


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Preprocessing: Attribute = 6 (Non-PCA) (Norm)
Preprocessing: Sample    = 1728
Preprocessing: Class     = 4 [0 1 2 3]
Preprocessing: Train     = {0: 384, 1: 69, 2: 1210, 3: 65}
DIS Reduction: Level 1 is constructed. / Time = 0.93 seconds / Size = 629 / Original = 1728 / Remain = 1099
DIS Reduction: Level 2 is constructed. / Time = 0.17 seconds / Size = 53 / Original = 1099 / Remain = 1046
DIS Reduction: Level 3 is constructed. / Time = 0.12 seconds / Size = 1 / Original = 1046 / Remain = 1045
DIS Reduction: Level 4 is constructed. / Time = 0.11 seconds / Size = 1 / Original = 1045 / Remain = 1044
DISKD Reduction (k=17): Level 1 is constructed. / Time = 0.04 seconds / Size = 613 / Original = 1728 / Remain = 1115
DISKD Reduction (k=17): Level 2 is constructed. / Time = 0.03 seconds / Size = 69 / Original = 1115 / Remain = 1046
DISKD Reduction (k=17): Level 3 is constructed. / Time = 0.02 seconds / Size = 15 / Original = 1046 / Remain = 1031
DISKD Reduction (k=17): Level 4 is constructed. 



Training: Type CNN; cv=2; d=4; k=1; Train=195; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00




Training: Type CNN; cv=2; d=4; k=3; Train=195; Acc=0.77; Pre=0.80; Recall=0.77; F1=0.76; AUC OVR=0.89; AUC OVO=0.90




Training: Type CNN; cv=2; d=4; k=5; Train=195; Acc=0.72; Pre=0.75; Recall=0.72; F1=0.70; AUC OVR=0.88; AUC OVO=0.89
Training: Type CNN; cv=2; d=4; k=7; Train=195; Acc=0.68; Pre=0.70; Recall=0.68; F1=0.65; AUC OVR=0.86; AUC OVO=0.88
Training: Type CNN; cv=2; d=4; k=9; Train=195; Acc=0.66; Pre=0.70; Recall=0.66; F1=0.62; AUC OVR=0.85; AUC OVO=0.87

No Oversampling: Type 2 ENN; set=1; sample=1458 (84.38%) {0: 340, 2: 1094, 3: 24}
Training: Cannot maintain all class labels!




Training: Type ENN; cv=5; d=4; k=1; Train=1458; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=3; Train=1458; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=5; Train=1458; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=7; Train=1458; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=9; Train=1458; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=0.99

No Oversampling: Type 3 RENN; set=1; sample=1458 (84.38%) {0: 340, 2: 1094, 3: 24}
Training: Cannot maintain all class labels!
Training: Type RENN; cv=5; d=4; k=1; Train=1458; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RENN; cv=5; d=4; k=3; Train=1458; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RENN; cv=5; d=4; k=5; Train=1458; Ac



Training: Type OSS; cv=2; d=4; k=1; Train=1424; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type OSS; cv=2; d=4; k=3; Train=1424; Acc=0.95; Pre=0.95; Recall=0.95; F1=0.95; AUC OVR=0.99; AUC OVO=0.98
Training: Type OSS; cv=2; d=4; k=5; Train=1424; Acc=0.93; Pre=0.93; Recall=0.93; F1=0.92; AUC OVR=0.98; AUC OVO=0.97
Training: Type OSS; cv=2; d=4; k=7; Train=1424; Acc=0.93; Pre=0.93; Recall=0.93; F1=0.92; AUC OVR=0.98; AUC OVO=0.95




Training: Type OSS; cv=2; d=4; k=9; Train=1424; Acc=0.92; Pre=0.92; Recall=0.92; F1=0.92; AUC OVR=0.98; AUC OVO=0.96

No Oversampling: Type 7 NCL; set=1; sample=1458 (84.38%) {0: 340, 2: 1094, 3: 24}
Training: Cannot maintain all class labels!
Training: Type NCL; cv=5; d=4; k=1; Train=1458; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=3; Train=1458; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=5; Train=1458; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=7; Train=1458; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=9; Train=1458; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 8 NM$_1$; set=1; sample=260 (15.05%) {0: 65, 1: 65, 2: 65, 3: 65}
Training: Type NM$_1$; cv=5; d=4; k=1; Train=260; Acc=1.00; Pre=1.00; Recall=1.00;



Training: Type NM$_3$; cv=5; d=4; k=1; Train=223; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=3; Train=223; Acc=0.90; Pre=0.92; Recall=0.90; F1=0.90; AUC OVR=0.98; AUC OVO=0.98
Training: Type NM$_3$; cv=5; d=4; k=5; Train=223; Acc=0.86; Pre=0.88; Recall=0.86; F1=0.86; AUC OVR=0.98; AUC OVO=0.98
Training: Type NM$_3$; cv=5; d=4; k=7; Train=223; Acc=0.86; Pre=0.88; Recall=0.86; F1=0.86; AUC OVR=0.97; AUC OVO=0.97
Training: Type NM$_3$; cv=5; d=4; k=9; Train=223; Acc=0.83; Pre=0.85; Recall=0.83; F1=0.82; AUC OVR=0.97; AUC OVO=0.97

No Oversampling: Type 11 IHT; set=1; sample=657 (38.02%) {0: 70, 1: 65, 2: 457, 3: 65}
Training: Type IHT; cv=5; d=4; k=1; Train=657; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IHT; cv=5; d=4; k=3; Train=657; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type IHT; cv=5; d=4; k=5; Train=657; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.



No Oversampling: Type 12 CC; set=1; sample=260 (15.05%) {0: 65, 1: 65, 2: 65, 3: 65}
Training: Type CC; cv=5; d=4; k=1; Train=260; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=3; Train=260; Acc=0.72; Pre=0.73; Recall=0.72; F1=0.69; AUC OVR=0.96; AUC OVO=0.96
Training: Type CC; cv=5; d=4; k=5; Train=260; Acc=0.71; Pre=0.75; Recall=0.71; F1=0.69; AUC OVR=0.93; AUC OVO=0.93
Training: Type CC; cv=5; d=4; k=7; Train=260; Acc=0.71; Pre=0.75; Recall=0.71; F1=0.68; AUC OVR=0.93; AUC OVO=0.93
Training: Type CC; cv=5; d=4; k=9; Train=260; Acc=0.68; Pre=0.71; Recall=0.68; F1=0.64; AUC OVR=0.93; AUC OVO=0.93

No Oversampling: Type 13 RUS; set=1; sample=260 (15.05%) {0: 65, 1: 65, 2: 65, 3: 65}
Training: Type RUS; cv=5; d=4; k=1; Train=260; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RUS; cv=5; d=4; k=3; Train=260; Acc=0.86; Pre=0.87; Recall=0.86; F1=0.86; AUC OVR=0.98; AUC OVO=0.98
Training: Type RUS; c

Training: Type DISKD$_2$; cv=5; d=4; k=5; Train=682; Acc=0.80; Pre=0.84; Recall=0.80; F1=0.78; AUC OVR=0.90; AUC OVO=0.94
Training: Type DISKD$_2$; cv=5; d=4; k=7; Train=682; Acc=0.70; Pre=0.77; Recall=0.70; F1=0.65; AUC OVR=0.85; AUC OVO=0.91
Training: Type DISKD$_2$; cv=5; d=4; k=9; Train=682; Acc=0.67; Pre=0.76; Recall=0.67; F1=0.61; AUC OVR=0.84; AUC OVO=0.89

No Oversampling: Type 24 DISKD$_3$; set=1; sample=697 (40.34%) {0: 383, 1: 69, 2: 183, 3: 62}
Training: Type DISKD$_3$; cv=5; d=4; k=1; Train=697; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_3$; cv=5; d=4; k=3; Train=697; Acc=0.87; Pre=0.88; Recall=0.87; F1=0.86; AUC OVR=0.91; AUC OVO=0.94
Training: Type DISKD$_3$; cv=5; d=4; k=5; Train=697; Acc=0.80; Pre=0.84; Recall=0.80; F1=0.78; AUC OVR=0.91; AUC OVO=0.95
Training: Type DISKD$_3$; cv=5; d=4; k=7; Train=697; Acc=0.72; Pre=0.79; Recall=0.72; F1=0.68; AUC OVR=0.86; AUC OVO=0.92
Training: Type DISKD$_3$; cv=5; d=4; k=9; Train=697

  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is

Preprocessing: Attribute = 28 (Non-PCA) (Norm)
Preprocessing: Sample    = 10844
Preprocessing: Class     = 6 [0 1 2 3 4 5]
Preprocessing: Train     = {0: 1494, 1: 7509, 2: 482, 3: 1009, 4: 100, 5: 250}
DIS Reduction: Level 1 is constructed. / Time = 52.19 seconds / Size = 1644 / Original = 10844 / Remain = 9200
DIS Reduction: Level 2 is constructed. / Time = 28.19 seconds / Size = 1161 / Original = 9200 / Remain = 8039
DIS Reduction: Level 3 is constructed. / Time = 14.70 seconds / Size = 729 / Original = 8039 / Remain = 7310
DIS Reduction: Level 4 is constructed. / Time = 7.18 seconds / Size = 353 / Original = 7310 / Remain = 6957
DISKD Reduction (k=108): Level 1 is constructed. / Time = 5.96 seconds / Size = 1567 / Original = 10844 / Remain = 9277
DISKD Reduction (k=108): Level 2 is constructed. / Time = 4.52 seconds / Size = 997 / Original = 9277 / Remain = 8280
DISKD Reduction (k=108): Level 3 is constructed. / Time = 3.59 seconds / Size = 595 / Original = 8280 / Remain = 7685
DISK



Training: Type CNN; cv=2; d=4; k=9; Train=402; Acc=0.64; Pre=0.63; Recall=0.64; F1=0.62; AUC OVR=0.87; AUC OVO=0.87

No Oversampling: Type 2 ENN; set=2; sample=10481 (96.65%) {0: 1421, 1: 7435, 2: 421, 3: 932, 4: 71, 5: 201}
Training: Type ENN; cv=5; d=4; k=1; Train=10481; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=3; Train=10481; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=5; Train=10481; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=7; Train=10481; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=9; Train=10481; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 3 RENN; set=2; sample=10429 (96.17%) {0: 1406, 1: 7428, 2: 408, 3: 920, 4: 69, 5: 198}
Training: Type RENN; cv=5; d=4; k=1; Train=10429; Acc=1.00; Pre=1.00; Recal



Training: Type OSS; cv=2; d=4; k=1; Train=8371; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=3; Train=8371; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=5; Train=8371; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.97; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=7; Train=8371; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=9; Train=8371; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=0.99

No Oversampling: Type 7 NCL; set=2; sample=10457 (96.43%) {0: 1421, 1: 7411, 2: 421, 3: 932, 4: 71, 5: 201}
Training: Type NCL; cv=5; d=4; k=1; Train=10457; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=3; Train=10457; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=5; Train=10457; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=7; Train=10457; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=9; Train=10457; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 8 NM$_1$; set=2; sample=600 (5.53%) {0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 100}
Training: Type NM$_1$; cv=5; d=4; k=1; Train=600; Acc=1.00; Pre=1.00; Recall



No Oversampling: Type 10 NM$_3$; set=2; sample=501 (4.62%) {0: 100, 1: 100, 2: 94, 3: 69, 4: 100, 5: 38}
Training: Type NM$_3$; cv=5; d=4; k=1; Train=501; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=3; Train=501; Acc=0.85; Pre=0.85; Recall=0.85; F1=0.85; AUC OVR=0.98; AUC OVO=0.98
Training: Type NM$_3$; cv=5; d=4; k=5; Train=501; Acc=0.76; Pre=0.77; Recall=0.76; F1=0.76; AUC OVR=0.97; AUC OVO=0.97
Training: Type NM$_3$; cv=5; d=4; k=7; Train=501; Acc=0.71; Pre=0.74; Recall=0.71; F1=0.71; AUC OVR=0.96; AUC OVO=0.96
Training: Type NM$_3$; cv=5; d=4; k=9; Train=501; Acc=0.67; Pre=0.71; Recall=0.67; F1=0.67; AUC OVR=0.95; AUC OVO=0.95

No Oversampling: Type 11 IHT; set=2; sample=1862 (17.17%) {0: 123, 1: 1306, 2: 104, 3: 125, 4: 100, 5: 104}
Training: Type IHT; cv=5; d=4; k=1; Train=1862; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IHT; cv=5; d=4; k=3; Train=1862; Acc=0.99; Pre=0.99; Recall



No Oversampling: Type 12 CC; set=2; sample=600 (5.53%) {0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 100}
Training: Type CC; cv=5; d=4; k=1; Train=600; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=3; Train=600; Acc=0.83; Pre=0.85; Recall=0.83; F1=0.84; AUC OVR=0.99; AUC OVO=0.99
Training: Type CC; cv=5; d=4; k=5; Train=600; Acc=0.81; Pre=0.82; Recall=0.81; F1=0.81; AUC OVR=0.98; AUC OVO=0.98
Training: Type CC; cv=5; d=4; k=7; Train=600; Acc=0.79; Pre=0.81; Recall=0.79; F1=0.79; AUC OVR=0.97; AUC OVO=0.97
Training: Type CC; cv=5; d=4; k=9; Train=600; Acc=0.77; Pre=0.80; Recall=0.77; F1=0.78; AUC OVR=0.97; AUC OVO=0.97

No Oversampling: Type 13 RUS; set=2; sample=600 (5.53%) {0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 100}
Training: Type RUS; cv=5; d=4; k=1; Train=600; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RUS; cv=5; d=4; k=3; Train=600; Acc=0.90; Pre=0.90; Recall=0.90; F1=0.90; AUC OVR=0.

Training: Type DISKD$_2$; cv=5; d=4; k=1; Train=2564; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_2$; cv=5; d=4; k=3; Train=2564; Acc=0.90; Pre=0.90; Recall=0.90; F1=0.90; AUC OVR=0.99; AUC OVO=0.99
Training: Type DISKD$_2$; cv=5; d=4; k=5; Train=2564; Acc=0.87; Pre=0.87; Recall=0.87; F1=0.87; AUC OVR=0.99; AUC OVO=0.99
Training: Type DISKD$_2$; cv=5; d=4; k=7; Train=2564; Acc=0.85; Pre=0.85; Recall=0.85; F1=0.85; AUC OVR=0.98; AUC OVO=0.98
Training: Type DISKD$_2$; cv=5; d=4; k=9; Train=2564; Acc=0.83; Pre=0.84; Recall=0.83; F1=0.83; AUC OVR=0.98; AUC OVO=0.98

No Oversampling: Type 24 DISKD$_3$; set=2; sample=3159 (29.13%) {0: 1099, 1: 671, 2: 466, 3: 669, 4: 92, 5: 162}
Training: Type DISKD$_3$; cv=5; d=4; k=1; Train=3159; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_3$; cv=5; d=4; k=3; Train=3159; Acc=0.92; Pre=0.92; Recall=0.92; F1=0.92; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_3$;

  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is

Preprocessing: Attribute = 16 (Non-PCA) (Norm)
Preprocessing: Sample    = 20000
Preprocessing: Class     = 26 [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Preprocessing: Train     = {0: 789, 1: 766, 2: 736, 3: 805, 4: 768, 5: 775, 6: 773, 7: 734, 8: 755, 9: 747, 10: 739, 11: 761, 12: 792, 13: 783, 14: 753, 15: 803, 16: 783, 17: 758, 18: 748, 19: 796, 20: 813, 21: 764, 22: 752, 23: 787, 24: 786, 25: 734}
DIS Reduction: Level 1 is constructed. / Time = 351.45 seconds / Size = 6101 / Original = 20000 / Remain = 13899
DIS Reduction: Level 2 is constructed. / Time = 157.64 seconds / Size = 4064 / Original = 13899 / Remain = 9835
DIS Reduction: Level 3 is constructed. / Time = 79.38 seconds / Size = 2554 / Original = 9835 / Remain = 7281
DIS Reduction: Level 4 is constructed. / Time = 24.50 seconds / Size = 1725 / Original = 7281 / Remain = 5556
DISKD Reduction (k=200): Level 1 is constructed. / Time = 7.27 seconds / Size = 6223 / Original = 20000 / Remain



Training: Type CNN; cv=2; d=4; k=3; Train=1423; Acc=0.61; Pre=0.66; Recall=0.61; F1=0.59; AUC OVR=0.98; AUC OVO=0.98
Training: Type CNN; cv=2; d=4; k=5; Train=1423; Acc=0.58; Pre=0.62; Recall=0.58; F1=0.56; AUC OVR=0.97; AUC OVO=0.98




Training: Type CNN; cv=2; d=4; k=7; Train=1423; Acc=0.54; Pre=0.58; Recall=0.54; F1=0.52; AUC OVR=0.97; AUC OVO=0.97
Training: Type CNN; cv=2; d=4; k=9; Train=1423; Acc=0.51; Pre=0.55; Recall=0.51; F1=0.48; AUC OVR=0.96; AUC OVO=0.96





No Oversampling: Type 2 ENN; set=3; sample=19180 (95.90%) {0: 786, 1: 737, 2: 719, 3: 785, 4: 727, 5: 724, 6: 731, 7: 645, 8: 724, 9: 712, 10: 669, 11: 746, 12: 770, 13: 755, 14: 729, 15: 747, 16: 756, 17: 704, 18: 729, 19: 771, 20: 796, 21: 735, 22: 738, 23: 756, 24: 766, 25: 723}
Training: Type ENN; cv=5; d=4; k=1; Train=19180; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=3; Train=19180; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=5; Train=19180; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=7; Train=19180; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=9; Train=19180; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 3 RENN; set=3; sample=19180 (95.90%) {0: 786, 1: 737, 2: 719, 3: 785, 4: 727, 5: 724, 6: 731, 7: 645, 8:



Training: Type OSS; cv=2; d=4; k=1; Train=16361; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=3; Train=16361; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=5; Train=16361; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=7; Train=16361; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=9; Train=16361; Acc=0.95; Pre=0.95; Recall=0.95; F1=0.95; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 7 NCL; set=3; sample=19180 (95.90%) {0: 786, 1: 737, 2: 719, 3: 785, 4: 727, 5: 724, 6: 731, 7: 645, 8: 724, 9: 712, 10: 669, 11: 746, 12: 770, 13: 755, 14: 729, 15: 747, 16: 756, 17: 704, 18: 729, 19: 771, 20: 796, 21: 735, 22: 738, 23: 756, 24: 766, 25: 723}
Training: Type NCL; cv=5; d=4; k=1; Train=19180; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=3; Train=19180; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=5; Train=19180; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=7; Train=19180; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=9; Train=19180; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00

No Over



No Oversampling: Type 10 NM$_3$; set=3; sample=6620 (33.10%) {0: 128, 1: 351, 2: 146, 3: 336, 4: 263, 5: 169, 6: 279, 7: 733, 8: 180, 9: 141, 10: 368, 11: 139, 12: 299, 13: 352, 14: 310, 15: 200, 16: 245, 17: 340, 18: 245, 19: 123, 20: 246, 21: 151, 22: 147, 23: 347, 24: 161, 25: 221}
Training: Type NM$_3$; cv=5; d=4; k=1; Train=6620; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=3; Train=6620; Acc=0.95; Pre=0.95; Recall=0.95; F1=0.95; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=5; Train=6620; Acc=0.93; Pre=0.93; Recall=0.93; F1=0.93; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=7; Train=6620; Acc=0.91; Pre=0.92; Recall=0.91; F1=0.91; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=9; Train=6620; Acc=0.90; Pre=0.90; Recall=0.90; F1=0.90; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 11 IHT; set=3; sample=19115 (95.58%) {0: 734, 1: 735, 2: 734, 3: 740, 4: 734, 5: 734, 6: 73

  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))


  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))


  self.estimator_.fit(_safe_indexing(X, target_class_indices))
  self.estimator_.fit(_safe_indexing(X, target_class_indices))


No Oversampling: Type 12 CC; set=3; sample=19084 (95.42%) {0: 734, 1: 734, 2: 734, 3: 734, 4: 734, 5: 734, 6: 734, 7: 734, 8: 734, 9: 734, 10: 734, 11: 734, 12: 734, 13: 734, 14: 734, 15: 734, 16: 734, 17: 734, 18: 734, 19: 734, 20: 734, 21: 734, 22: 734, 23: 734, 24: 734, 25: 734}
Training: Type CC; cv=5; d=4; k=1; Train=19084; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=3; Train=19084; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=5; Train=19084; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=7; Train=19084; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=9; Train=19084; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 13 RUS; set=3; sample=19084 (95.42%) {0: 734, 1: 734, 2: 734, 3: 734, 4: 734, 5: 734, 6: 734, 7: 734, 8: 734,

Training: Type IBP$_3$; cv=5; d=4; k=1; Train=12719; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBP$_3$; cv=5; d=4; k=3; Train=12719; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBP$_3$; cv=5; d=4; k=5; Train=12719; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBP$_3$; cv=5; d=4; k=7; Train=12719; Acc=0.95; Pre=0.95; Recall=0.95; F1=0.95; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBP$_3$; cv=5; d=4; k=9; Train=12719; Acc=0.94; Pre=0.94; Recall=0.94; F1=0.94; AUC OVR=1.00; AUC OVO=1.00

Oversampling (Pre) : Type 21 IBP$_4$; set=3; sample=14444 (72.22%) {0: 357, 1: 716, 2: 559, 3: 675, 4: 658, 5: 634, 6: 737, 7: 682, 8: 417, 9: 376, 10: 603, 11: 284, 12: 334, 13: 686, 14: 732, 15: 463, 16: 518, 17: 649, 18: 609, 19: 555, 20: 411, 21: 643, 22: 309, 23: 696, 24: 678, 25: 463}
Oversampling (Post): Type 21 IBP$_4$; set=3; sample=14444 (72.22%) {0: 357, 1: 716, 2: 559, 3: 67

Training: Type IBPKD$_3$; cv=5; d=4; k=1; Train=12970; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_3$; cv=5; d=4; k=3; Train=12970; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_3$; cv=5; d=4; k=5; Train=12970; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_3$; cv=5; d=4; k=7; Train=12970; Acc=0.95; Pre=0.95; Recall=0.95; F1=0.95; AUC OVR=1.00; AUC OVO=1.00
Training: Type IBPKD$_3$; cv=5; d=4; k=9; Train=12970; Acc=0.94; Pre=0.94; Recall=0.94; F1=0.94; AUC OVR=1.00; AUC OVO=1.00

Oversampling (Pre) : Type 29 IBPKD$_4$; set=3; sample=14646 (73.23%) {0: 343, 1: 730, 2: 572, 3: 676, 4: 669, 5: 639, 6: 743, 7: 685, 8: 459, 9: 359, 10: 606, 11: 263, 12: 332, 13: 727, 14: 731, 15: 469, 16: 545, 17: 654, 18: 617, 19: 563, 20: 412, 21: 653, 22: 320, 23: 721, 24: 690, 25: 468}
Oversampling (Post): Type 29 IBPKD$_4$; set=3; sample=14646 (73.23%) {0: 343, 1: 730,

  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is

Preprocessing: Attribute = 64 (Non-PCA) (Norm)
Preprocessing: Sample    = 5620
Preprocessing: Class     = 10 [0 1 2 3 4 5 6 7 8 9]
Preprocessing: Train     = {0: 554, 1: 571, 2: 557, 3: 572, 4: 568, 5: 558, 6: 558, 7: 566, 8: 554, 9: 562}
DIS Reduction: Level 1 is constructed. / Time = 26.94 seconds / Size = 1247 / Original = 5620 / Remain = 4373
DIS Reduction: Level 2 is constructed. / Time = 14.96 seconds / Size = 1157 / Original = 4373 / Remain = 3216
DIS Reduction: Level 3 is constructed. / Time = 7.92 seconds / Size = 848 / Original = 3216 / Remain = 2368
DIS Reduction: Level 4 is constructed. / Time = 4.31 seconds / Size = 613 / Original = 2368 / Remain = 1755
DISKD Reduction (k=56): Level 1 is constructed. / Time = 3.42 seconds / Size = 895 / Original = 5620 / Remain = 4725
DISKD Reduction (k=56): Level 2 is constructed. / Time = 2.43 seconds / Size = 618 / Original = 4725 / Remain = 4107
DISKD Reduction (k=56): Level 3 is constructed. / Time = 1.86 seconds / Size = 497 / Origin



Training: Type CNN; cv=2; d=4; k=9; Train=108; Acc=0.61; Pre=0.55; Recall=0.61; F1=0.56; AUC OVR=0.92; AUC OVO=0.91

No Oversampling: Type 2 ENN; set=4; sample=5556 (98.86%) {0: 552, 1: 568, 2: 556, 3: 565, 4: 562, 5: 549, 6: 556, 7: 563, 8: 537, 9: 548}
Training: Type ENN; cv=5; d=4; k=1; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=3; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=5; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=7; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=9; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 3 RENN; set=4; sample=5556 (98.86%) {0: 552, 1: 568, 2: 556, 3: 565, 4: 562, 5: 549, 6: 556, 7: 563, 8: 537, 9: 548}
Training: Type RENN;



Training: Type OSS; cv=2; d=4; k=3; Train=1969; Acc=0.99; Pre=0.98; Recall=0.99; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type OSS; cv=2; d=4; k=5; Train=1969; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type OSS; cv=2; d=4; k=7; Train=1969; Acc=0.98; Pre=0.97; Recall=0.98; F1=0.97; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=9; Train=1969; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 7 NCL; set=4; sample=5556 (98.86%) {0: 552, 1: 568, 2: 556, 3: 565, 4: 562, 5: 549, 6: 556, 7: 563, 8: 537, 9: 548}
Training: Type NCL; cv=5; d=4; k=1; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=3; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=5; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=7; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=9; Train=5556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 8 NM$_1$; set=4; sample=5540 (98.58%) {0: 554, 1: 554, 2: 554, 3: 554, 4: 554, 5: 554, 6: 554, 7: 554, 8: 554, 9: 554}
Training: Type NM



No Oversampling: Type 10 NM$_3$; set=4; sample=1303 (23.19%) {0: 554, 1: 71, 2: 65, 3: 74, 4: 63, 5: 109, 6: 81, 7: 61, 8: 97, 9: 128}
Training: Type NM$_3$; cv=5; d=4; k=1; Train=1303; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=3; Train=1303; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=5; Train=1303; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=7; Train=1303; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=9; Train=1303; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 11 IHT; set=4; sample=5543 (98.63%) {0: 554, 1: 555, 2: 554, 3: 555, 4: 554, 5: 554, 6: 554, 7: 554, 8: 554, 9: 555}
Training: Type IHT; cv=5; d=4; k=1; Train=5543; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training



No Oversampling: Type 12 CC; set=4; sample=5540 (98.58%) {0: 554, 1: 554, 2: 554, 3: 554, 4: 554, 5: 554, 6: 554, 7: 554, 8: 554, 9: 554}
Training: Type CC; cv=5; d=4; k=1; Train=5540; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=3; Train=5540; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=5; Train=5540; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=7; Train=5540; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=9; Train=5540; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 13 RUS; set=4; sample=5540 (98.58%) {0: 554, 1: 554, 2: 554, 3: 554, 4: 554, 5: 554, 6: 554, 7: 554, 8: 554, 9: 554}
Training: Type RUS; cv=5; d=4; k=1; Train=5540; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RUS; cv=5;

Training: Type DISKD$_1$; cv=5; d=4; k=5; Train=895; Acc=0.91; Pre=0.91; Recall=0.91; F1=0.91; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_1$; cv=5; d=4; k=7; Train=895; Acc=0.89; Pre=0.89; Recall=0.89; F1=0.89; AUC OVR=0.99; AUC OVO=0.99
Training: Type DISKD$_1$; cv=5; d=4; k=9; Train=895; Acc=0.88; Pre=0.89; Recall=0.88; F1=0.88; AUC OVR=0.99; AUC OVO=0.99

No Oversampling: Type 23 DISKD$_2$; set=4; sample=1513 (26.92%) {0: 37, 1: 219, 2: 91, 3: 238, 4: 74, 5: 136, 6: 83, 7: 91, 8: 227, 9: 317}
Training: Type DISKD$_2$; cv=5; d=4; k=1; Train=1513; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_2$; cv=5; d=4; k=3; Train=1513; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_2$; cv=5; d=4; k=5; Train=1513; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_2$; cv=5; d=4; k=7; Train=1513; Acc=0.95; Pre=0.96; Recall=0.95; F1=0.95; AUC OVR=1.00; AUC OVO=1.00
T

  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is

Preprocessing: Attribute = 16 (Non-PCA) (Norm)
Preprocessing: Sample    = 10992
Preprocessing: Class     = 10 [0 1 2 3 4 5 6 7 8 9]
Preprocessing: Train     = {0: 1143, 1: 1143, 2: 1144, 3: 1055, 4: 1144, 5: 1055, 6: 1056, 7: 1142, 8: 1055, 9: 1055}
DIS Reduction: Level 1 is constructed. / Time = 100.82 seconds / Size = 1357 / Original = 10992 / Remain = 9635
DIS Reduction: Level 2 is constructed. / Time = 72.10 seconds / Size = 1332 / Original = 9635 / Remain = 8303
DIS Reduction: Level 3 is constructed. / Time = 52.83 seconds / Size = 1151 / Original = 8303 / Remain = 7152
DIS Reduction: Level 4 is constructed. / Time = 22.32 seconds / Size = 1036 / Original = 7152 / Remain = 6116
DISKD Reduction (k=109): Level 1 is constructed. / Time = 1.39 seconds / Size = 1103 / Original = 10992 / Remain = 9889
DISKD Reduction (k=109): Level 2 is constructed. / Time = 1.21 seconds / Size = 860 / Original = 9889 / Remain = 9029
DISKD Reduction (k=109): Level 3 is constructed. / Time = 1.08 seconds



Training: Type CNN; cv=2; d=4; k=9; Train=102; Acc=0.65; Pre=0.64; Recall=0.65; F1=0.61; AUC OVR=0.93; AUC OVO=0.92

No Oversampling: Type 2 ENN; set=5; sample=10925 (99.39%) {0: 1136, 1: 1128, 2: 1143, 3: 1047, 4: 1141, 5: 1048, 6: 1054, 7: 1134, 8: 1049, 9: 1045}
Training: Type ENN; cv=5; d=4; k=1; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=3; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=5; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=7; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=9; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 3 RENN; set=5; sample=10925 (99.39%) {0: 1136, 1: 1128, 2: 1143, 3: 1047, 4: 1141, 5: 1048, 6: 1054, 7: 1134, 8: 1049, 9:



Training: Type OSS; cv=2; d=4; k=3; Train=3390; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type OSS; cv=2; d=4; k=5; Train=3390; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=7; Train=3390; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type OSS; cv=2; d=4; k=9; Train=3390; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00





No Oversampling: Type 7 NCL; set=5; sample=10925 (99.39%) {0: 1136, 1: 1128, 2: 1143, 3: 1047, 4: 1141, 5: 1048, 6: 1054, 7: 1134, 8: 1049, 9: 1045}
Training: Type NCL; cv=5; d=4; k=1; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=3; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=5; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=7; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=9; Train=10925; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 8 NM$_1$; set=5; sample=10550 (95.98%) {0: 1055, 1: 1055, 2: 1055, 3: 1055, 4: 1055, 5: 1055, 6: 1055, 7: 1055, 8: 1055, 9: 1055}
Training: Type NM$_1$; cv=5; d=4; k=1; Train=10550; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; 



No Oversampling: Type 10 NM$_3$; set=5; sample=2591 (23.57%) {0: 228, 1: 173, 2: 138, 3: 136, 4: 161, 5: 199, 6: 138, 7: 165, 8: 1055, 9: 198}
Training: Type NM$_3$; cv=5; d=4; k=1; Train=2591; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=3; Train=2591; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=5; Train=2591; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=7; Train=2591; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=9; Train=2591; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 11 IHT; set=5; sample=10556 (96.03%) {0: 1057, 1: 1056, 2: 1055, 3: 1055, 4: 1057, 5: 1055, 6: 1055, 7: 1056, 8: 1055, 9: 1055}
Training: Type IHT; cv=5; d=4; k=1; Train=10556; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; A



No Oversampling: Type 12 CC; set=5; sample=10550 (95.98%) {0: 1055, 1: 1055, 2: 1055, 3: 1055, 4: 1055, 5: 1055, 6: 1055, 7: 1055, 8: 1055, 9: 1055}
Training: Type CC; cv=5; d=4; k=1; Train=10550; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=3; Train=10550; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=5; Train=10550; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=7; Train=10550; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=9; Train=10550; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 13 RUS; set=5; sample=10550 (95.98%) {0: 1055, 1: 1055, 2: 1055, 3: 1055, 4: 1055, 5: 1055, 6: 1055, 7: 1055, 8: 1055, 9: 1055}
Training: Type RUS; cv=5; d=4; k=1; Train=10550; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.

Training: Type DISKD$_1$; cv=5; d=4; k=3; Train=1103; Acc=0.93; Pre=0.94; Recall=0.93; F1=0.93; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_1$; cv=5; d=4; k=5; Train=1103; Acc=0.90; Pre=0.90; Recall=0.90; F1=0.90; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_1$; cv=5; d=4; k=7; Train=1103; Acc=0.87; Pre=0.87; Recall=0.87; F1=0.87; AUC OVR=0.99; AUC OVO=0.99
Training: Type DISKD$_1$; cv=5; d=4; k=9; Train=1103; Acc=0.86; Pre=0.86; Recall=0.86; F1=0.85; AUC OVR=0.99; AUC OVO=0.99

No Oversampling: Type 23 DISKD$_2$; set=5; sample=1963 (17.86%) {0: 85, 1: 354, 2: 310, 3: 225, 4: 169, 5: 294, 6: 104, 7: 187, 8: 52, 9: 183}
Training: Type DISKD$_2$; cv=5; d=4; k=1; Train=1963; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_2$; cv=5; d=4; k=3; Train=1963; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_2$; cv=5; d=4; k=5; Train=1963; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96; AUC OVR=1.00; AUC OVO=

  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is

Preprocessing: Attribute = 36 (Non-PCA) (Norm)
Preprocessing: Sample    = 6435
Preprocessing: Class     = 6 [0 1 2 3 4 5]
Preprocessing: Train     = {0: 1533, 1: 703, 2: 1358, 3: 626, 4: 707, 5: 1508}
DIS Reduction: Level 1 is constructed. / Time = 31.75 seconds / Size = 1185 / Original = 6435 / Remain = 5250
DIS Reduction: Level 2 is constructed. / Time = 19.06 seconds / Size = 588 / Original = 5250 / Remain = 4662
DIS Reduction: Level 3 is constructed. / Time = 14.87 seconds / Size = 393 / Original = 4662 / Remain = 4269
DIS Reduction: Level 4 is constructed. / Time = 12.33 seconds / Size = 287 / Original = 4269 / Remain = 3982
DISKD Reduction (k=64): Level 1 is constructed. / Time = 1.42 seconds / Size = 1143 / Original = 6435 / Remain = 5292
DISKD Reduction (k=64): Level 2 is constructed. / Time = 0.97 seconds / Size = 518 / Original = 5292 / Remain = 4774
DISKD Reduction (k=64): Level 3 is constructed. / Time = 0.78 seconds / Size = 328 / Original = 4774 / Remain = 4446
DISKD Redu



Training: Type CNN; cv=2; d=4; k=7; Train=772; Acc=0.84; Pre=0.83; Recall=0.84; F1=0.83; AUC OVR=0.98; AUC OVO=0.97
Training: Type CNN; cv=2; d=4; k=9; Train=772; Acc=0.84; Pre=0.82; Recall=0.84; F1=0.82; AUC OVR=0.98; AUC OVO=0.96





No Oversampling: Type 2 ENN; set=6; sample=5874 (91.28%) {0: 1503, 1: 679, 2: 1265, 3: 455, 4: 636, 5: 1336}
Training: Type ENN; cv=5; d=4; k=1; Train=5874; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=3; Train=5874; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=5; Train=5874; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=7; Train=5874; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=9; Train=5874; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 3 RENN; set=6; sample=5789 (89.96%) {0: 1501, 1: 675, 2: 1250, 3: 428, 4: 624, 5: 1311}
Training: Type RENN; cv=5; d=4; k=1; Train=5789; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RENN; cv=5; d=4; k=3; Train=5789; Acc=1.00; Pre=1.00; Recall=1



Training: Type OSS; cv=2; d=4; k=1; Train=4699; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=3; Train=4699; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=5; Train=4699; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=7; Train=4699; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96; AUC OVR=1.00; AUC OVO=1.00




Training: Type OSS; cv=2; d=4; k=9; Train=4699; Acc=0.95; Pre=0.95; Recall=0.95; F1=0.95; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 7 NCL; set=6; sample=5874 (91.28%) {0: 1503, 1: 679, 2: 1265, 3: 455, 4: 636, 5: 1336}
Training: Type NCL; cv=5; d=4; k=1; Train=5874; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=3; Train=5874; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=5; Train=5874; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=7; Train=5874; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=9; Train=5874; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 8 NM$_1$; set=6; sample=3756 (58.37%) {0: 626, 1: 626, 2: 626, 3: 626, 4: 626, 5: 626}
Training: Type NM$_1$; cv=5; d=4; k=1; Train=3756; Acc=1.00; Pre=1.00; Recall=



No Oversampling: Type 10 NM$_3$; set=6; sample=1573 (24.44%) {0: 141, 1: 33, 2: 288, 3: 626, 4: 120, 5: 365}
Training: Type NM$_3$; cv=5; d=4; k=1; Train=1573; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_3$; cv=5; d=4; k=3; Train=1573; Acc=0.84; Pre=0.84; Recall=0.84; F1=0.84; AUC OVR=0.97; AUC OVO=0.98
Training: Type NM$_3$; cv=5; d=4; k=5; Train=1573; Acc=0.79; Pre=0.79; Recall=0.79; F1=0.79; AUC OVR=0.95; AUC OVO=0.97
Training: Type NM$_3$; cv=5; d=4; k=7; Train=1573; Acc=0.76; Pre=0.76; Recall=0.76; F1=0.76; AUC OVR=0.94; AUC OVO=0.96
Training: Type NM$_3$; cv=5; d=4; k=9; Train=1573; Acc=0.74; Pre=0.74; Recall=0.74; F1=0.74; AUC OVR=0.93; AUC OVO=0.95

No Oversampling: Type 11 IHT; set=6; sample=3812 (59.24%) {0: 661, 1: 628, 2: 642, 3: 626, 4: 626, 5: 629}
Training: Type IHT; cv=5; d=4; k=1; Train=3812; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type IHT; cv=5; d=4; k=3; Train=3812; Acc=0.99; Pre=0.99



No Oversampling: Type 12 CC; set=6; sample=3756 (58.37%) {0: 626, 1: 626, 2: 626, 3: 626, 4: 626, 5: 626}
Training: Type CC; cv=5; d=4; k=1; Train=3756; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=3; Train=3756; Acc=0.92; Pre=0.93; Recall=0.92; F1=0.92; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=5; Train=3756; Acc=0.90; Pre=0.90; Recall=0.90; F1=0.90; AUC OVR=0.99; AUC OVO=0.99
Training: Type CC; cv=5; d=4; k=7; Train=3756; Acc=0.89; Pre=0.89; Recall=0.89; F1=0.89; AUC OVR=0.99; AUC OVO=0.99
Training: Type CC; cv=5; d=4; k=9; Train=3756; Acc=0.88; Pre=0.89; Recall=0.88; F1=0.88; AUC OVR=0.99; AUC OVO=0.99

No Oversampling: Type 13 RUS; set=6; sample=3756 (58.37%) {0: 626, 1: 626, 2: 626, 3: 626, 4: 626, 5: 626}
Training: Type RUS; cv=5; d=4; k=1; Train=3756; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RUS; cv=5; d=4; k=3; Train=3756; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96;

Training: Type DISKD$_2$; cv=5; d=4; k=3; Train=1661; Acc=0.82; Pre=0.82; Recall=0.82; F1=0.82; AUC OVR=0.97; AUC OVO=0.97
Training: Type DISKD$_2$; cv=5; d=4; k=5; Train=1661; Acc=0.77; Pre=0.77; Recall=0.77; F1=0.77; AUC OVR=0.95; AUC OVO=0.96
Training: Type DISKD$_2$; cv=5; d=4; k=7; Train=1661; Acc=0.74; Pre=0.74; Recall=0.74; F1=0.74; AUC OVR=0.94; AUC OVO=0.95
Training: Type DISKD$_2$; cv=5; d=4; k=9; Train=1661; Acc=0.71; Pre=0.72; Recall=0.71; F1=0.71; AUC OVR=0.93; AUC OVO=0.95

No Oversampling: Type 24 DISKD$_3$; set=6; sample=1989 (30.91%) {0: 231, 1: 48, 2: 288, 3: 550, 4: 285, 5: 587}
Training: Type DISKD$_3$; cv=5; d=4; k=1; Train=1989; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_3$; cv=5; d=4; k=3; Train=1989; Acc=0.85; Pre=0.85; Recall=0.85; F1=0.85; AUC OVR=0.98; AUC OVO=0.98
Training: Type DISKD$_3$; cv=5; d=4; k=5; Train=1989; Acc=0.80; Pre=0.80; Recall=0.80; F1=0.80; AUC OVR=0.96; AUC OVO=0.97
Training: Type DISKD$_3$; 

  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Preprocessing: Attribute = 9 (Non-PCA) (Norm)
Preprocessing: Sample    = 958
Preprocessing: Class     = 2 [0 1]
Preprocessing: Train     = {0: 332, 1: 626}
DIS Reduction: Level 1 is constructed. / Time = 0.55 seconds / Size = 387 / Original = 958 / Remain = 571
DIS Reduction: Level 2 is constructed. / Time = 0.15 seconds / Size = 167 / Original = 571 / Remain = 404
DIS Reduction: Level 3 is constructed. / Time = 0.11 seconds / Size = 64 / Original = 404 / Remain = 340
DIS Reduction: Level 4 is constructed. / Time = 0.12 seconds / Size = 18 / Original = 340 / Remain = 322
DISKD Reduction (k=9): Level 1 is constructed. / Time = 0.03 seconds / Size = 336 / Original = 958 / Remain = 622
DISKD Reduction (k=9): Level 2 is constructed. / Time = 0.02 seconds / Size = 138 / Original = 622 / Remain = 484
DISKD Reduction (k=9): Level 3 is constructed. / Time = 0.01 seconds / Size = 128 / Original = 484 / Remain = 356
DISKD Reduction (k=9): Level 4 is constructed. / Time = 0.01 seconds / Size = 70



Training: Type CNN; cv=2; d=4; k=3; Train=388; Acc=1.00; Pre=0.99; Recall=1.00; F1=1.00; AUC OVR=nan; AUC OVO=nan




Training: Type CNN; cv=2; d=4; k=5; Train=388; Acc=1.00; Pre=0.99; Recall=1.00; F1=1.00; AUC OVR=nan; AUC OVO=nan




Training: Type CNN; cv=2; d=4; k=7; Train=388; Acc=1.00; Pre=0.99; Recall=1.00; F1=1.00; AUC OVR=nan; AUC OVO=nan




Training: Type CNN; cv=2; d=4; k=9; Train=388; Acc=1.00; Pre=0.99; Recall=1.00; F1=1.00; AUC OVR=nan; AUC OVO=nan

No Oversampling: Type 2 ENN; set=7; sample=759 (79.23%) {0: 146, 1: 613}
Training: Type ENN; cv=5; d=4; k=1; Train=759; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=3; Train=759; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=5; Train=759; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=7; Train=759; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type ENN; cv=5; d=4; k=9; Train=759; Acc=0.95; Pre=0.96; Recall=0.95; F1=0.95; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 3 RENN; set=7; sample=743 (77.56%) {0: 143, 1: 600}
Training: Type RENN; cv=5; d=4; k=1; Train=743; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RENN; cv=5; d=4; k=3; Tra



Training: Type OSS; cv=2; d=4; k=9; Train=624; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=nan; AUC OVO=nan

No Oversampling: Type 7 NCL; set=7; sample=427 (44.57%) {0: 146, 1: 281}
Training: Type NCL; cv=5; d=4; k=1; Train=427; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=3; Train=427; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=5; Train=427; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00
Training: Type NCL; cv=5; d=4; k=7; Train=427; Acc=0.96; Pre=0.96; Recall=0.96; F1=0.96; AUC OVR=0.99; AUC OVO=0.99
Training: Type NCL; cv=5; d=4; k=9; Train=427; Acc=0.94; Pre=0.94; Recall=0.94; F1=0.94; AUC OVR=0.98; AUC OVO=0.98

No Oversampling: Type 8 NM$_1$; set=7; sample=664 (69.31%) {0: 332, 1: 332}
Training: Type NM$_1$; cv=5; d=4; k=1; Train=664; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type NM$_1$; cv=5; d=4; k=



No Oversampling: Type 12 CC; set=7; sample=664 (69.31%) {0: 332, 1: 332}
Training: Type CC; cv=5; d=4; k=1; Train=664; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type CC; cv=5; d=4; k=3; Train=664; Acc=0.82; Pre=0.84; Recall=0.82; F1=0.81; AUC OVR=0.93; AUC OVO=0.93
Training: Type CC; cv=5; d=4; k=5; Train=664; Acc=0.83; Pre=0.85; Recall=0.83; F1=0.83; AUC OVR=0.92; AUC OVO=0.92
Training: Type CC; cv=5; d=4; k=7; Train=664; Acc=0.84; Pre=0.85; Recall=0.84; F1=0.84; AUC OVR=0.92; AUC OVO=0.92
Training: Type CC; cv=5; d=4; k=9; Train=664; Acc=0.84; Pre=0.85; Recall=0.84; F1=0.84; AUC OVR=0.91; AUC OVO=0.91

No Oversampling: Type 13 RUS; set=7; sample=664 (69.31%) {0: 332, 1: 332}
Training: Type RUS; cv=5; d=4; k=1; Train=664; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type RUS; cv=5; d=4; k=3; Train=664; Acc=0.88; Pre=0.88; Recall=0.88; F1=0.88; AUC OVR=0.96; AUC OVO=0.96
Training: Type RUS; cv=5; d=4; k=5; Train=664

Training: Type DISKD$_3$; cv=5; d=4; k=1; Train=602; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_3$; cv=5; d=4; k=3; Train=602; Acc=0.78; Pre=0.82; Recall=0.78; F1=0.77; AUC OVR=0.89; AUC OVO=0.89
Training: Type DISKD$_3$; cv=5; d=4; k=5; Train=602; Acc=0.85; Pre=0.86; Recall=0.85; F1=0.84; AUC OVR=0.94; AUC OVO=0.94
Training: Type DISKD$_3$; cv=5; d=4; k=7; Train=602; Acc=0.83; Pre=0.85; Recall=0.83; F1=0.83; AUC OVR=0.94; AUC OVO=0.94
Training: Type DISKD$_3$; cv=5; d=4; k=9; Train=602; Acc=0.78; Pre=0.81; Recall=0.78; F1=0.77; AUC OVR=0.91; AUC OVO=0.91

No Oversampling: Type 25 DISKD$_4$; set=7; sample=672 (70.15%) {0: 301, 1: 371}
Training: Type DISKD$_4$; cv=5; d=4; k=1; Train=672; Acc=1.00; Pre=1.00; Recall=1.00; F1=1.00; AUC OVR=1.00; AUC OVO=1.00
Training: Type DISKD$_4$; cv=5; d=4; k=3; Train=672; Acc=0.79; Pre=0.83; Recall=0.79; F1=0.78; AUC OVR=0.90; AUC OVO=0.90
Training: Type DISKD$_4$; cv=5; d=4; k=5; Train=672; Acc=0.86; Pr

  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is

Preprocessing: Attribute = 241 (Non-PCA) (Norm)
Preprocessing: Sample    = 4465
Preprocessing: Class     = 3 [0 1 2]
Preprocessing: Train     = {0: 899, 1: 3565, 2: 1}
DIS Reduction: Level 1 is constructed. / Time = 7.67 seconds / Size = 111 / Original = 4465 / Remain = 4354
DIS Reduction: Level 2 is constructed. / Time = 5.92 seconds / Size = 98 / Original = 4354 / Remain = 4256
DIS Reduction: Level 3 is constructed. / Time = 5.51 seconds / Size = 73 / Original = 4256 / Remain = 4183
DIS Reduction: Level 4 is constructed. / Time = 5.23 seconds / Size = 74 / Original = 4183 / Remain = 4109
DISKD Reduction (k=44): Level 1 is constructed. / Time = 2.75 seconds / Size = 150 / Original = 4465 / Remain = 4315
DISKD Reduction (k=44): Level 2 is constructed. / Time = 2.59 seconds / Size = 105 / Original = 4315 / Remain = 4210
DISKD Reduction (k=44): Level 3 is constructed. / Time = 2.46 seconds / Size = 88 / Original = 4210 / Remain = 4122
DISKD Reduction (k=44): Level 4 is constructed. / Tim



Training: Type ORG; cv=2; d=4; k=1; Train=4465; Acc=0.99; Pre=0.99; Recall=0.99; F1=0.99; AUC OVR=0.97; AUC OVO=0.98




Training: Type ORG; cv=2; d=4; k=3; Train=4465; Acc=0.98; Pre=0.98; Recall=0.98; F1=0.98; AUC OVR=0.97; AUC OVO=0.98




Training: Type ORG; cv=2; d=4; k=5; Train=4465; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=0.98; AUC OVO=0.99




Training: Type ORG; cv=2; d=4; k=7; Train=4465; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00




Training: Type ORG; cv=2; d=4; k=9; Train=4465; Acc=0.97; Pre=0.97; Recall=0.97; F1=0.97; AUC OVR=1.00; AUC OVO=1.00

No Oversampling: Type 1 CNN; set=8; sample=3 (0.07%) {0: 1, 1: 1, 2: 1}


ValueError: n_splits=2 cannot be greater than the number of members in each class.

In [None]:
# Plot data set performance comparison
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

files2 = [];
files2_header = [];
files2_separator = [];
files2.append("cross_train_accuracy.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_precision.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_recall.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_f1.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_aucovr.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_aucovo.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_accuracy.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_precision.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_recall.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_f1.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_aucovr.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_aucovo.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C

#max_k = 10;
#max_level = 3;
#max_set = 7;
k_array = []
for k in range(0,max_k): k_array.append((2*k)+1); 
s_array = []
for s in range(0,max_set): s_array.append(s+1); 

perf_title = ('(a) Best Accuracy','(b) Best Precision','(c) Best Recall','(d) Best F1-Score','(e) Best Area Under the Curve (One-vs-Rest)','(f) Best Area Under the Curve (One-vs-One)');
label_traintype = ('Original','L1','L1+L2','L1+L2+L3');
plot_row = len(files2)//2;
plot_col = 2;
plot_alpha = 0.7;
plot_linewidth = 1;

raw_max = np.zeros((max_set,max_level+1),dtype = np.float64);
figure, axis = plt.subplots(plot_row, plot_col, figsize=(plot_col*10, plot_row*6))
for measure in range(0,len(files2)):
    inputfile = files2[measure];
    print("Measure:",measure);
    print("Data Source:",inputfile);
    df = pd.read_csv(inputfile, header = files2_header[measure], sep = files2_separator[measure]);
    raw = df.to_numpy();
    #print(raw)
    for s in range(0,max_set):
        for l in range(0,max_level):
            raw_max[s][l] = max(raw[s*(max_level)+l]);
#            print(raw[s*(max_level+1)+l])

    for l in range(0,max_level):
        axis[measure//2,measure%2].plot(s_array,raw_max.T[l],marker='o',label=label_traintype[l],linewidth=plot_linewidth,alpha=plot_alpha)
    #    print(measure//2,measure%2)

#    for d in range(0,max_set): d_array[d] = d+1; 
#        max_test_accuracy[s][l]=max(raw[][l])

#    for l in range(0,max_level+1):
#        axis[measure//2,measure%2].plot(k_array,raw[l],marker='o',label=label_traintype[l],linewidth=plot_linewidth,alpha=plot_alpha)
    for x in range(0,plot_row):
        for y in range(0,plot_col):
            axis[x,y].set_title(perf_title[x*2+y],fontsize=16)
            axis[x,y].set_xlabel('Data Set',fontsize=13)
            axis[x,y].set_ylabel('Performance (Larger is better)',fontsize=13)
            axis[x,y].set_ylim([0,1])
            axis[x,y].legend(loc='lower right')
figure.savefig("performance-best.pdf", format="pdf", dpi=None, facecolor="w", edgecolor="w", orientation="portrait", transparent=True, bbox_inches="tight", pad_inches=0.1, metadata=None);

print("Task completed.");   


## Plots

In [None]:
# Plot data set performance comparison
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

max_set = 10;

files2 = [];
files2_header = [];
files2_separator = [];
files2.append("cross_train_accuracy.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_precision.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_recall.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_f1.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_aucovr.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_aucovo.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_accuracy.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_precision.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_recall.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_f1.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_aucovr.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_aucovo.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C

#max_k = 10;
#max_level = 3;
#max_set = 7;
k_array = []
for k in range(0,max_k): k_array.append((2*k)+1); 
s_array = []
for s in range(0,max_set): s_array.append(s+1); 

perf_title = ('(a) Best Accuracy','(b) Best Precision','(c) Best Recall','(d) Best F1-Score','(e) Best Area Under the Curve (One-vs-Rest)','(f) Best Area Under the Curve (One-vs-One)');
label_traintype = ('ORG','CNN','ENN','RENN','All KNN','TL','OSS','NCL','NM$_1$','NM$_2$','NM$_3$','IHT','CC','RUS','DIS$_1$','DIS$_2$','DIS$_3$','DIS$_4$','IBP$_1$','IBP$_2$','IBP$_3$','IBP$_4$','DISKD$_1$','DISKD$_2$','DISKD$_3$','DISKD$_4$','IBPKD$_1$','IBPKD$_2$','IBPKD$_3$','IBPKD$_4$')
plot_row = len(files2)//2;
plot_col = 2;
plot_alpha = 0.7;
plot_linewidth = 0;

# Define a list of 30 distinct colors
colors = plt.cm.viridis(np.linspace(0, 0.70, 30))

for measure in range(0,len(files2)):
    inputfile = files2[measure];
    print("Measure:",measure);
    print("Data Source:",inputfile);
    df = pd.read_csv(inputfile, header = files2_header[measure], sep = files2_separator[measure]);
    raw = df.to_numpy();
    # Compute the average of each row
    raw_avg = np.nanmean(raw, axis=1)
    raw_avg_reshaped = raw_avg.reshape(10, 30)
    # Create subplots
    fig, axs = plt.subplots(5, 2, figsize=(15, 20))
    axs = axs.ravel()

    # Plot each of the 10 data sets (each row of the reshaped array)
    for i in range(10):
        bars = axs[i].bar(range(1,31), raw_avg_reshaped[i], color=colors)
        axs[i].set_title(f"Dataset {i+1}")
        axs[i].set_xlabel("Data Reduction Algorithm")
        axs[i].set_ylabel("Reduction Rate (Higher is better)")

        # Set text labels on x-axis
        axs[i].set_xticks(range(1, 31))
        axs[i].set_xticklabels(label_traintype, rotation=45)

        # Add data labels at the bottom of each bar
        for bar in bars:
            yval = bar.get_height()
            # axs[i].text(bar.get_x() + bar.get_width() / 2.0, 0.01, '{:.4f}'.format(yval), va='bottom', ha='center', rotation=90, color='white')
            axs[i].text(bar.get_x() + bar.get_width() / 2.0, yval, '{:.4f}'.format(yval), va='top', ha='center', rotation=90, color='white')

    plt.tight_layout()
    plt.show()
    fig.savefig(files2[measure].replace(".txt", ".pdf"), format="pdf", dpi=None, facecolor="w", edgecolor="w", orientation="portrait", transparent=True, bbox_inches="tight", pad_inches=0.1, metadata=None);

print("Task completed.");   


## LaTex Tables

In [None]:
# Create LaTex tables to show the performance comparison
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from tabulate import tabulate

def bold_max_value(row):
    max_val = max(row)
    return [f"\\textbf{{{x:.3f}}}" if x == max_val else f"{x:.3f}" for x in row]

def color_max_value(row):
    max_val = max(row)
    return [f"\\textcolor{{blue}}{{{x:.3f}}}" if x == max_val else f"{x:.3f}" for x in row]

def bold_max_value_column(matrix):
    max_vals = np.max(matrix, axis=0)  # Find the max value in each column
    new_matrix = []

    for row in matrix:
        new_row = []
        for i, val in enumerate(row):
            max_val = max_vals[i]
            new_row.append(f"\\textbf{{{val:.4f}}}" if val == max_val else f"{val:.4f}")
        new_matrix.append(new_row)

    return new_matrix

def color_max_value_column(matrix):
    max_vals = np.max(matrix, axis=0)  # Find the max value in each column
    new_matrix = []

    for row in matrix:
        new_row = []
        for i, val in enumerate(row):
            max_val = max_vals[i]
            new_row.append(f"\\textcolor{{blue}}{{{val:.4f}}}" if val == max_val else f"{val:.4f}")
        new_matrix.append(new_row)

    return new_matrix

files2 = [];
files2_header = [];
files2_separator = [];
files2.append("cross_train_accuracy.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_precision.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_recall.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_f1.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_aucovr.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_aucovo.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_accuracy.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_precision.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_recall.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_f1.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_aucovr.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_aucovo.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C

file_removal = glob.glob("latex_performance.txt")
for f in file_removal:
    os.remove(f)    

max_k = 10;
max_level = 5;
max_set = 7;
k_array = []
for k in range(0,max_k): k_array.append((2*k)+1); 
s_array = []
for s in range(0,max_set): s_array.append(s+1); 

perf_title = ('(a) Best Accuracy','(b) Best Precision','(c) Best Recall','(d) Best F1-Score','(e) Best Area Under the Curve (One-vs-Rest)','(f) Best Area Under the Curve (One-vs-One)');
label_traintype = ('ORG','CNN','ENN','RENN','All KNN','TL','OSS','NCL','NM$_1$','NM$_2$','NM$_3$','IHT','CC','RUS','DIS$_1$','DIS$_2$','DIS$_3$','DIS$_4$','IBP$_1$','IBP$_2$','IBP$_3$','IBP$_4$','DISKD$_1$','DISKD$_2$','DISKD$_3$','DISKD$_4$','IBPKD$_1$','IBPKD$_2$','IBPKD$_3$','IBPKD$_4$')
label_dataset = ('Type','D1','D2','D3','D4','D5','D6','D7','D8','D9','D10')
label_measure = ('accuracy','precision','recall','F1-Score','AUC (One-vs-Rest)','AUC (One-vs-One)')
plot_row = len(files2)//2;
plot_col = 2;
plot_alpha = 0.7;
plot_linewidth = 0;

#raw_max = np.zeros((max_set,max_level+1),dtype = np.float64);
#figure, axis = plt.subplots(plot_row, plot_col, figsize=(plot_col*10, plot_row*6))

# Define a list of 30 distinct colors
colors = plt.cm.viridis(np.linspace(0, 0.70, 30))

for measure in range(0,len(files2)):
    inputfile = files2[measure];
    #print("Measure:",measure);
    #print("Data Source:",inputfile);
    df = pd.read_csv(inputfile, header = files2_header[measure], sep = files2_separator[measure]);
    raw = df.to_numpy();
    # Compute the average of each row - average of five k
    raw_avg = np.nanmean(raw, axis=1)
    # Reshape the result to 10 rows (data sets) of 30 columns (training types)
    raw_avg_reshaped = raw_avg.reshape(10, 30)
    # Transpose the array to reshape the result to 30 rows (training types) of 10 columns (data sets)
    raw_avg_reshaped_transposed = np.transpose(raw_avg_reshaped)
    # Bold the max value in each row
    #raw_final = [color_max_value(column) for row in raw_avg_reshaped]
    raw_final = color_max_value_column(raw_avg_reshaped_transposed)
    # Create a table
    # latex_table = tabulate(raw_avg_reshaped, headers="firstrow", tablefmt="latex", floatfmt=(".4f",".4f",""))
    #latex_table = tabulate(raw_avg_reshaped_bold, tablefmt="latex", floatfmt=(".4f",) * len(raw_avg_reshaped[0]))
    #latex_table = tabulate(raw_final, tablefmt="latex_raw", headers=label_traintype, showindex=range(1, len(raw_final) + 1), colalign=("right",)*len(raw_final[0]), floatfmt=(".2f",)*len(raw_final[0]))
    latex_table = tabulate(raw_final, tablefmt="latex_raw", headers=label_dataset, showindex=label_traintype, colalign=("left",)+("right",)*len(raw_final[0]), floatfmt=(".3f",)*len(raw_final[0]))
    #latex_table = tabulate(raw_final, tablefmt="latex_raw", headers=label_dataset, showindex=label_traintype, colalign=("right",)*len(raw_final[0]), floatfmt=(".4f",)*len(raw_final[0]))
    #print("\\begin{table}")
    #print("\\caption{Classification "+label_measure[measure]+" of the based lines and the proposed techniques}")
    #print("\\label{table:classification-accuracy}")
    #print("\\setlength{\\tabcolsep{4pt}}")
    #print("\\tiny")
    #print(latex_table)
    #print("\\end{table}")
    #print()

    with open("latex_performance.txt", "a") as file:
        file.write("\\begin{table}\n")
        file.write("\\caption{Classification "+label_measure[measure]+" of the based lines and the proposed techniques}\n")
        file.write("\\label{table:classification-accuracy}\n")
        file.write("\\setlength{\\tabcolsep}{4pt}\n")
        file.write("\\scriptsize\n")
        file.write(latex_table)
        file.write("\n\\end{table}\n")
        file.write("\n")
                  
print("Task completed.");   

## Backup

In [None]:
# Create LaTex tables to show the performance comparison
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tabulate import tabulate

def bold_max_value(row):
    max_val = max(row)
    return [f"\\textbf{{{x:.3f}}}" if x == max_val else f"{x:.3f}" for x in row]

def color_max_value(row):
    max_val = max(row)
    return [f"\\textcolor{{blue}}{{{x:.3f}}}" if x == max_val else f"{x:.3f}" for x in row]

files2 = [];
files2_header = [];
files2_separator = [];
files2.append("cross_train_accuracy.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_precision.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_recall.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_f1.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_aucovr.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
files2.append("cross_train_aucovo.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_accuracy.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_precision.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_recall.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_f1.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_aucovr.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C
#files2.append("cross_test_aucovo.txt"); files2_header.append(None); files2_separator.append("\t"); # 21A4C

max_k = 10;
max_level = 5;
max_set = 7;
k_array = []
for k in range(0,max_k): k_array.append((2*k)+1); 
s_array = []
for s in range(0,max_set): s_array.append(s+1); 

perf_title = ('(a) Best Accuracy','(b) Best Precision','(c) Best Recall','(d) Best F1-Score','(e) Best Area Under the Curve (One-vs-Rest)','(f) Best Area Under the Curve (One-vs-One)');
label_traintype = ('Data Set','ORG','CNN','ENN','RENN','All KNN','TL','OSS','NCL','NM$_1$','NM$_2$','NM$_3$','IHT','CC','RUS','DIS$_1$','DIS$_2$','DIS$_3$','DIS$_4$','IBP$_1$','IBP$_2$','IBP$_3$','IBP$_4$','DISKD$_1$','DISKD$_2$','DISKD$_3$','DISKD$_4$','IBPKD$_1$','IBPKD$_2$','IBPKD$_3$','IBPKD$_4$')
#label_dataset = ('Type','D1','D2','D3','D4','D5','D6','D7','D8','D9','D10')
plot_row = len(files2)//2;
plot_col = 2;
plot_alpha = 0.7;
plot_linewidth = 0;

#raw_max = np.zeros((max_set,max_level+1),dtype = np.float64);
#figure, axis = plt.subplots(plot_row, plot_col, figsize=(plot_col*10, plot_row*6))

# Define a list of 30 distinct colors
colors = plt.cm.viridis(np.linspace(0, 0.70, 30))

for measure in range(0,len(files2)):
    inputfile = files2[measure];
    print("Measure:",measure);
    print("Data Source:",inputfile);
    df = pd.read_csv(inputfile, header = files2_header[measure], sep = files2_separator[measure]);
    raw = df.to_numpy();
    # Compute the average of each row - average of five k
    raw_avg = np.nanmean(raw, axis=1)
    # Reshape the result to 10 rows (data sets) of 30 columns (training types)
    raw_avg_reshaped = raw_avg.reshape(10, 30)
    # Transpose the array to reshape the result to 30 rows (training types) of 10 columns (data sets)
    #raw_avg_reshaped_transpose = np.transpose(raw_avg_reshaped)
    # Bold the max value in each row
    raw_final = [color_max_value(row) for row in raw_avg_reshaped]
    # Create a table
    # latex_table = tabulate(raw_avg_reshaped, headers="firstrow", tablefmt="latex", floatfmt=(".4f",".4f",""))
    #latex_table = tabulate(raw_avg_reshaped_bold, tablefmt="latex", floatfmt=(".4f",) * len(raw_avg_reshaped[0]))
    #latex_table = tabulate(raw_final, tablefmt="latex_raw", headers=label_traintype, showindex=range(1, len(raw_final) + 1), colalign=("right",)*len(raw_final[0]), floatfmt=(".2f",)*len(raw_final[0]))
    latex_table = tabulate(raw_final, tablefmt="latex_raw", headers=label_traintype, showindex=range(1, len(raw_final) + 1), colalign=("right",)*len(raw_final[0]), floatfmt=(".3f",)*len(raw_final[0]))
    #latex_table = tabulate(raw_final, tablefmt="latex_raw", headers=label_dataset, showindex=label_traintype, colalign=("right",)*len(raw_final[0]), floatfmt=(".4f",)*len(raw_final[0]))
    print(latex_table)

print("Task completed.");   