In [11]:
import pandas as pd
import numpy as np
import os
from os import listdir
from os.path import isfile, join
from sklearn.metrics import accuracy_score,cohen_kappa_score
from collections import defaultdict

In [14]:
folder = './results_minmax/Experiment1'
bases = [name for name in os.listdir(folder) if os.path.isdir(os.path.join(folder, name))]

In [15]:
def get_metrics():
    skip_bases = ["Nursery"]
    for base in bases:
        mypath = folder + "/" + base
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
        techniques = list(map(lambda x: x[7:-4],onlyfiles))
        techniques = list(set(techniques))
        #if(base  == "Letter-2"):
        #    continue        

        acc_data = defaultdict(list)
        kappa_data = defaultdict(list)
        for tec in techniques:
            accs = []
            kappas = []
            acc_data["Technique"].append(tec)
            kappa_data["Technique"].append(tec)
            for fold in range(1,6):
                df = pd.read_csv('{0}/{1}/fold_{2}_{3}.csv'.format(folder,base, fold, tec), header=None) 
                ac = accuracy_score(df[0].values, df[1].values)
                accs.append(ac)
                kp = cohen_kappa_score(df[0].values, df[1].values)
                kappas.append(kp) 
                acc_data["Fold"+str(fold)].append(ac) 
                kappa_data["Fold"+str(fold)].append(kp)  

            acc_data["mean"].append(np.mean(accs)) 
            acc_data["std"].append(np.std(accs)) 
            kappa_data["mean"].append(np.mean(kappas))
            kappa_data["std"].append(np.std(kappas))

        df_acc = pd.DataFrame(acc_data, columns = ["Technique", "Fold1", "Fold2", "Fold3", "Fold4", "Fold5", "mean", "std"])
        df_acc.to_csv('{0}/{1}/Metrics/accuracy_{2}.csv'.format(folder,base, base.lower()), index=False)

        df_kappa = pd.DataFrame(kappa_data, columns = ["Technique", "Fold1", "Fold2", "Fold3", "Fold4", "Fold5", "mean", "std"])
        df_kappa.to_csv('{0}/{1}/Metrics/kappa_{2}.csv'.format(folder,base, base.lower()), index=False)
    
#get_metrics()    

In [6]:
def get_tables(metric):
    
    data = defaultdict(defaultdict)
    #skip_bases = ["Ecoli", "Lymphography"]
    for base in bases:
        #if(base in skip_bases):
        #    continue
        mypath = folder + "/" + base
        df = pd.read_csv('{0}/Metrics/{1}_{2}.csv'.format(mypath,metric, base.lower())) 

        for index, row in df.iterrows():
            data[base][row['Technique']] = (row['mean'], row['std'])

    aggs = ['max', 'ecoc', 'decision_templates']
    columns = ["Base", "kmeans", "kmeans des", "kmeans desthr", "parzen", "parzen des", "parzen desthr", "svdd", "svdd des", "svdd desthr"]

    for agg in aggs:
        data_agg = defaultdict(list)
        for base in bases:
            #if(base in skip_bases):
            #    continue
            data_agg["Base"].append(base)
            dict_agg = {k: v for k, v in data[base].items() if "_"+agg+"_" in k}
            sortedDict = dict( sorted(dict_agg.items(), key=lambda x: x[0].lower()) )

            for k,v in sortedDict.items():
                y = k.replace("_"+agg+"_agg",'')
                y = y.replace("_",' ')
                data_agg[y].append('{:0.2f}'.format(v[0]*100))

        df_agg = pd.DataFrame(data_agg, columns = columns)
        df_agg.to_csv('{0}/{1}_{2}.csv'.format(folder,agg,metric), index=False, sep='\t')

#get_tables("accuracy")
#get_tables("kappa")