In [None]:
import pandas  as pd
import numpy as np

from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.linear_model import Perceptron
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import BaggingClassifier

from imblearn.over_sampling import SMOTE

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from sklearn.metrics import confusion_matrix

from deslib.util.instance_hardness import kdn_score
from deslib.des.knora_u import KNORAU
from deslib.des.knora_e import KNORAE
from deslib.dcs.ola import OLA
from deslib.dcs.mcb import MCB
from deslib.dcs.lca import LCA
from deslib.static.static_selection import StaticSelection

In [None]:
dataset_dir = 'datasets/'
df_cm1 = pd.read_csv(dataset_dir + 'cm1.csv', sep=',', header=None)

In [None]:
df_cm1

In [None]:
df_kc1 = pd.read_csv(dataset_dir + 'kc1.csv', sep=',', header=None)
df_kc1

In [None]:
base_classifier = CalibratedClassifierCV(Perceptron())
n_estimators = 100

In [None]:
pool = BaggingClassifier(base_classifier, n_estimators=n_estimators)
pool

In [None]:
def get_X_y(df):
    y = df[[0]]
    X = df.drop(df.columns[0], axis=1)
    
    oversample = SMOTE()
    X, y = oversample.fit_resample(X, y)
    
    return X, y

def g_mean(y_test, y_pred):
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    TP = conf_matrix[1][1]
    TN = conf_matrix[0][0]
    FP = conf_matrix[0][1]
    FN = conf_matrix[1][0]
   
    # calculate the sensitivity
    conf_sensitivity = (TP / float(TP + FN))
    # calculate the specificity
    conf_specificity = (TN / float(TN + FP))
    
    return np.sqrt(conf_sensitivity * conf_specificity)


def auc_score(y_true, y_pred):
    fp_rate, tp_rate, thresholds = roc_curve(y_true, y_pred, pos_label=1)
    return auc(fp_rate, tp_rate)

def get_metrics(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    auc = auc_score(y_true, y_pred)     
    f1 = f1_score(y_true, y_pred)
    g_m = g_mean(y_true, y_pred)
    
    return acc, auc, f1, g_m



        
def get_validation_sets(X_val, y_val):
    
    y_val = y_val.to_numpy().ravel()
    X_val = X_val.to_numpy()
    
    X_hard = []
    y_hard = []
    X_easy = []
    y_easy = []
   
    kdn_scores, _ = kdn_score(X_val, y_val, 5)
    for index, score in enumerate(kdn_scores):
        if (score > 0.5):
            X_hard.append(X_val[index])
            y_hard.append(y_val[index])
        else:
            X_easy.append(X_val[index])
            y_easy.append(y_val[index])
    
    return X_val, y_val, X_hard, y_hard, X_easy, y_easy
    

#fold,acc, auc, f1_score, g_mean

In [None]:
np.warnings.filterwarnings('ignore')
des_models = [KNORAU, KNORAE, OLA, MCB, LCA]

def rum_experiments(df, model):
    base_classifier = CalibratedClassifierCV(Perceptron())
    n_estimators = 100
    X, y = get_X_y(df)
    
    
    kf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)
    validation_sets = ['v_init', 'v_easy', 'v_hard']
    
    
    acc_list = []
    auc_list = []
    f1_list = [] 
    g_m_list = []
    
    acc_list_hard = []
    auc_list_hard  = []
    f1_list_hard  = [] 
    g_m_list_hard  = []
    
    acc_list_easy = []
    auc_list_easy  = []
    f1_list_easy  = [] 
    g_m_list_easy  = []
    
    
    model_name = model.__name__

    for train_index, test_index in kf.split(X,y):

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2222) 
        
        pool = BaggingClassifier(base_classifier, n_estimators=n_estimators)
        pool.fit(X_train, y_train)
        
        X_val, y_val, X_hard, y_hard, X_easy, y_easy = get_validation_sets(X_val, y_val) 
       
        
        
        #y_pred =  pool.predict(X_test)
        #acc, auc, f1, g_m = get_metrics(y_test, y_pred)   
      
        model_ = model(pool)
        model_.fit(X_val, y_val)
        y_pred =  model_.predict(X_test)
        acc, auc, f1, g_m = get_metrics(y_test, y_pred)  
        
        acc_list.append(acc)
        auc_list.append(auc)
        f1_list.append(f1)
        g_m_list.append(g_m)
        #----------------------------------------------------
        
        model_ = model(pool, pct_classifiers=1.0)
        model_.fit(X_hard, y_hard)
        y_pred =  model_.predict(X_test)
        acc, auc, f1, g_m = get_metrics(y_test, y_pred)  
        
        acc_list_hard.append(acc)
        auc_list_hard.append(auc)
        f1_list_hard.append(f1)
        g_m_list_hard.append(g_m)
        #---------------------------------------------------
        
        model_ = model(pool)
        model_.fit(X_easy, y_easy)
        y_pred =  model_.predict(X_test)
        acc, auc, f1, g_m = get_metrics(y_test, y_pred)  
        
        acc_list_easy.append(acc)
        auc_list_easy.append(auc)
        f1_list_easy.append(f1)
        g_m_list_easy.append(g_m)
        
        
        
        

    print("name: ", model_name)
    print("acc: ", np.mean(acc_list), np.std(acc_list))
    print("auc: ", np.mean(auc_list), np.std(auc_list))
    print("f1: ", np.mean(f1_list,), np.std(f1_list))
    print("gm: ", np.mean(g_m_list), np.std(g_m_list)) 
    print("\n")
    print("HARD name: ", model_name)
    print("acc: ", np.mean(acc_list_hard), np.std(acc_list_hard))
    print("auc: ", np.mean(auc_list_hard), np.std(auc_list_hard))
    print("f1: ", np.mean(f1_list_hard), np.std(f1_list_hard))
    print("gm: ", np.mean(g_m_list_hard), np.std(g_m_list_hard)) 
    print("\n")
    print("EASY name: ", model_name)
    print("acc: ", np.mean(acc_list_easy), np.std(acc_list_easy))
    print("auc: ", np.mean(auc_list_easy), np.std(auc_list_easy))
    print("f1: ", np.mean(f1_list_easy), np.std(f1_list_easy))
    print("gm: ", np.mean(g_m_list_easy), np.std(g_m_list_easy)) 
    print("\n")
    
    return model_name, [np.mean(acc_list), np.mean(acc_list_hard),np.mean(acc_list_easy)], [np.mean(auc_list), np.mean(auc_list_hard),np.mean(auc_list_easy)],  [np.mean(f1_list), np.mean(f1_list_hard),np.mean(f1_list_easy)],[np.mean(g_m_list), np.mean(g_m_list_hard),np.mean(g_m_list_easy)]

accs = {'validation_set': ['all','hard','easy']}
aucs = {'validation_set': ['all','hard','easy']}
f1s = {'validation_set': ['all','hard','easy']}
gm1 = {'validation_set': ['all','hard','easy']}


def run(dataset, model):
    model_name, acc_v, auc_v, f1_v, g_m_v =  rum_experiments(dataset, model)
    accs[model_name] = acc_v
    aucs[model_name] = auc_v
    f1s[model_name] = f1_v
    gm1[model_name] = g_m_v
    
run(df_kc1, KNORAU)
run(df_kc1, KNORAE)
run(df_kc1, OLA)
run(df_kc1, MCB)
run(df_kc1, StaticSelection)
run(df_kc1, LCA)

df = pd.DataFrame(accs, columns = ['validation_set','KNORAU','KNORAE','OLA','MCB','LCA','StaticSelection'])
df.to_csv('accs', index=False)

df = pd.DataFrame(aucs, columns = ['validation_set','KNORAU','KNORAE','OLA','MCB','LCA','StaticSelection'])
df.to_csv('aucs', index=False)

df = pd.DataFrame(f1s, columns = ['validation_set','KNORAU','KNORAE','OLA','MCB','LCA','StaticSelection'])
df.to_csv('f1s', index=False)

df = pd.DataFrame(gm1, columns = ['validation_set','KNORAU','KNORAE','OLA','MCB','LCA','StaticSelection'])
df.to_csv('gm1', index=False)