In [69]:
from utils import prepare_jupyter
prepare_jupyter()

import os
import pandas as pd
import numpy as np

from thesis.io import read_csv, NAMES

DATA_DIR_BASE = '/Users/tomek/University/MgrThesis/EnsembleDiversityResults'
base_path = os.path.join(DATA_DIR_BASE, 'diversity-22-09')

COLUMNS = [
    # STRUCT.
    'node_diversity',    
    'used_attributes_ratio',
    'avg_node_count',
    'avg_attributes_used',
    # BEHAV.
    'corr',
    'df',
    'entropy',
    'kw',
    'q',
    'coverage_minmax',
    'coverage_std'
]

In [70]:
def save_table_csv(table_df, name, pretty=False):
    table_path = f'/Users/tomek/University/MgrThesis/EnsembleDiversityResults/tables-15-09/{name}.csv'
    table_to_save = table_df.copy()
    
    if pretty:
        index_name_map = {
            'dataset_name': 'Dataset'
        }
        
        column_name_map = {
            'used_attributes_ratio': 'Used attr. ratio',
            'node_diversity': 'Node diversity',
            'corr': 'Correlation',
            'entropy': 'Entropy',
            'kw': 'Kohavi-Wolpert variance',
            'q': 'Q-statistic',
            'coverage_minmax': 'Coverage (minmax)',
            'coverage_std': 'Coverage (std. dev.)'
        }
        
        table_to_save = table_to_save.rename(columns=column_name_map).rename(index=index_name_map)
        
    table_to_save.to_csv(table_path)

def get_table(df, ensemble_name):
    results = {}
    
    groups = df[df['name'] == ensemble_name].groupby(by=['dataset_name'])   
    
    for group_id, group_df in groups:
        dataset_name = group_id
        
        if dataset_name not in results:
            results[dataset_name] = []
        
        for column in COLUMNS:
            values = group_df[[column, 'accuracy']]
            corr_coeff = values.corr().iat[0, 1] # Or [1, 0] - doesn't matter
            results[dataset_name].append(corr_coeff)    
    
    table = np.array([[dataset_name, *results] for dataset_name, results in results.items()])
    table_df = pd.DataFrame(table, columns=['dataset_name', *COLUMNS]).set_index('dataset_name').astype(np.float32)
    
    return table_df.round(3)

## Tabele (średnie)

1. Średnie wyniki dla każdej miary (wiersz to zbiór danych, kolumna to nazwa klas.)
2. ...

In [71]:
def df_from_groups(groups, with_avg=True):
    column_set = ['index']
    results = {}
    
    for (dataset_name, name), value in groups.items():
        if name not in column_set:
            column_set.append(name)
        
        if dataset_name not in results:
            results[dataset_name] = []
        
        results[dataset_name].append(value)
    
    
    results = np.array([[dataset_name, *values] for dataset_name, values in results.items()])

    if with_avg:
        column_avgs = np.array([['AVERAGE', *np.mean(results[:,1:].astype(np.float), axis=0)]])
        results = np.concatenate((results, column_avgs), axis=0)

    return pd.DataFrame(results, columns=column_set)


def get_avg_table(measure):
    df_path = os.path.join(base_path, 'allexperiment-ensemble.csv')
    all_df = pd.read_csv(df_path, index_col=0)
    all_df.head(n=2)

    groups = all_df.groupby(by=['dataset_name', 'name'])
    results = {}

    for group_name, group_df in groups:
        values = group_df[measure].values
        results[group_name] = np.mean(values)

    return df_from_groups(results)

In [72]:
get_avg_table('node_diversity')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,6.705776423704727,4.534673144740658,13.029224771973004,1.190220073003889,4.893634637072217,3.1774274731125605
1,boston,6.14710119515433,4.137565972484585,0.8830720033424524,3.818990024633795,5.140086232543834,5.110744124972201
2,cmc,7.714195013533241,4.709819881488884,2.817987025960352,3.830279488071824,7.419626898959596,6.218113430634265
3,flags,7.247158154581975,5.256902613743185,2.8903229547834437,0.8184039271416983,6.774610358779737,3.3278104642073782
4,glass,5.5996130820675925,3.96716795721864,0.664822998006925,1.0905561810096067,4.8239531665074855,3.2719249913059003
5,ionosphere,6.186434061579411,3.7207108371457314,0.0,1.9910277672351853,5.986341781702702,3.3326352444911294
6,isolet,0.0,1.751915856308536,0.3339817970707536,2.839737188445424,5.750324560316694,1.4665012864301863
7,lymph,4.881995514979682,3.766367265648041,11.404581724794562,0.940017570847456,5.2952969972552255,2.7904387444559604
8,oil_spill,4.642181203252159,3.533263480428248,0.0,3.692821176281792,5.065862001083143,4.624994878667559
9,pollution,0.0,2.15209132921398,5.163451618620398,0.0,2.929255818514028,1.6184070516204945


In [73]:
get_avg_table('used_attributes_ratio')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,0.993939393939394,0.9915151515151516,0.9724799010513296,0.8484848484848484,0.9990909090909093,0.9
1,boston,0.981025641025641,0.9861538461538464,0.9784830554061325,0.8923076923076922,0.9948717948717948,0.923076923076923
2,cmc,1.0,0.9977777777777778,1.0,0.962962962962963,1.0,0.9851851851851852
3,flags,0.974047619047619,0.9623809523809522,0.9014084507042256,0.6142857142857142,0.9840476190476192,0.8238095238095238
4,glass,0.9992592592592592,0.9955555555555554,1.0,0.9555555555555556,0.9992592592592592,0.9407407407407408
5,ionosphere,0.942156862745098,0.892549019607843,0.8939575830332133,0.8411764705882353,0.9533333333333333,0.8019607843137256
6,isolet,0.0076715289032955,0.1181739600216099,0.2304518819915902,0.1736358725013506,0.3566828741220962,0.0448406266882766
7,lymph,0.9785185185185185,0.972222222222222,0.8918717375093214,0.6481481481481481,0.9925925925925928,0.7888888888888889
8,oil_spill,0.8968707482993195,0.846938775510204,0.8371511870054145,0.7551020408163265,0.9190476190476188,0.6761904761904762
9,pollution,0.3479999999999999,0.9586666666666668,0.9687645687645684,0.4488888888888888,0.9946666666666666,0.72


In [74]:
get_avg_table('corr')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,0.2370716355139751,0.4610211501624622,0.1554838430745032,0.3022003885968626,0.4367145396163208,0.4063373688403712
1,boston,0.1544952755258015,0.4526140800778277,0.2107225152920194,0.2614148265959742,0.3302368934522918,0.3186428989897578
2,cmc,0.0447336855579296,0.5385472668907901,0.1132609066208035,0.2847927991729281,0.4298318073714657,0.3373476289549552
3,flags,0.1250265127757344,0.2889407392858981,0.0580790459524144,0.2240397117069165,0.2187713101885316,0.2971422903302971
4,glass,0.1539052854084596,0.3825998610774361,0.1667075686195846,0.1777801963523967,0.3026220362688421,0.31631344457485
5,ionosphere,0.1811594637561135,0.4983745703121576,0.1183285607596729,0.2447938250032784,0.3250131594106754,0.3583747680441128
6,isolet,0.0,0.4861819233749171,0.0695551792438308,0.4107645812607309,0.2428188774298788,0.4091343219726154
7,lymph,0.1693764714245069,0.3120709336117052,0.1819695904841867,0.1965062177893173,0.2122678570596307,0.2782320518477246
8,oil_spill,0.2785403227517349,0.5960026651497565,0.1303179510187209,0.2587339536624295,0.5009820466187567,0.4850629584175046
9,pollution,0.0,0.2598779303210567,0.1757574507691779,0.2276471655497441,0.1921451396459247,0.3912851096370245


In [75]:
get_avg_table('df')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,0.152479523098371,0.1711141061158032,0.1638594224511226,0.1689179085309663,0.169339619376257,0.1690819837157064
1,boston,0.1186574402324852,0.10822673585219,0.1087596766403806,0.1250584711398978,0.1103163080667144,0.1273797626794825
2,cmc,0.2175205081474443,0.2859866362206538,0.2227147076616118,0.2195149083393849,0.2957999106936815,0.2478337535731483
3,flags,0.1949235283977138,0.1859353055187641,0.1962337545573805,0.2041438190510872,0.1855750717860993,0.2102108683111189
4,glass,0.119554687730546,0.1413084328797348,0.1405039950269476,0.1616416816556035,0.1317312132938457,0.1458377046087851
5,ionosphere,0.0729983845584489,0.0744457542865936,0.0892808124407399,0.0808267258190799,0.0643557237605527,0.0809729460940728
6,isolet,0.0,0.0192709475709475,0.057928699006621,0.0184293204150347,0.0159581607457797,0.0269439886392267
7,lymph,0.0962907110653387,0.1011730734509059,0.1032527219018441,0.1395178271099453,0.1059835451394072,0.104862791966897
8,oil_spill,0.0485204001239605,0.0428908599893467,0.0779804560044536,0.096978405976618,0.0429847438535815,0.0524721041651132
9,pollution,0.0,0.13787659818136,0.1399458766341883,0.1563702787512311,0.139100300052681,0.189956458004077


In [76]:
get_avg_table('entropy')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,0.5067135336594455,0.2948647099930118,0.5656981227151936,0.4544523643139996,0.2974913580246913,0.358932215234102
1,boston,0.5131855044327962,0.2503599948230117,0.4554344393715389,0.4534934316961108,0.3345296317867081,0.4003318449492007
2,cmc,0.8255647457627118,0.2764811222568124,0.6534722188463109,0.5104009070294785,0.3520075022099235,0.4572916868442291
3,flags,0.6398687359424201,0.4724199730094467,0.7236849708235921,0.5586900584795322,0.5326940170940172,0.4939208277103014
4,glass,0.5024826135105205,0.329162200073828,0.5313505906238465,0.5564584717607973,0.3789111111111111,0.4152587670727206
5,ionosphere,0.3578431015723974,0.1606830315224681,0.4758739101274313,0.3601196512407781,0.2250423608316566,0.2910331321260899
6,isolet,0.0,0.0441122222222222,0.4586678321678321,0.0784888888888888,0.09366,0.1312333333333333
7,lymph,0.4412895469912102,0.3291445210727969,0.4534151045282727,0.51832030651341,0.4256375478927203,0.3830988505747126
8,oil_spill,0.2590577921351977,0.0945807562483407,0.5042202567680845,0.392473508552357,0.1459188606970834,0.191936663253309
9,pollution,0.0,0.4424444444444444,0.5307925407925408,0.5177777777777777,0.5009222222222222,0.3897777777777777


In [77]:
get_avg_table('kw')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,0.1599717935996111,0.1037200731351624,0.1890913842693835,0.1831846880568582,0.10721649189855,0.1611777674901189
1,boston,0.0975920057353412,0.0467985707583856,0.0903021251257702,0.0969204197287727,0.0639697984367852,0.0891183504900217
2,cmc,0.0430782520986236,0.0177497901964137,0.0392748538156092,0.0356963920711481,0.0224831257042837,0.0338235826445766
3,flags,0.2912408065780216,0.2182562873844356,0.3099702494572813,0.277011643697985,0.245428557656642,0.263612740500339
4,glass,0.2236670961648285,0.1467429857490739,0.2238051561990499,0.2514042292477354,0.1688943885624678,0.2116627238600506
5,ionosphere,0.0954710598389022,0.0447898926867711,0.1282569856070958,0.1148465508004431,0.0637701661450932,0.1034827325859921
6,isolet,0.0,0.0079908935185185,0.0778317647630147,0.0191334259259259,0.0172137407407407,0.0360354166666666
7,lymph,0.2876755093372853,0.2114047183687849,0.2824202039225701,0.3430357739903995,0.2704616334170079,0.2857709481657638
8,oil_spill,0.0298545067576045,0.0099604692791493,0.0530074492942928,0.0447804591479056,0.0166265586658804,0.0284949781535576
9,pollution,0.0,0.6661842592592592,0.7909916472416472,0.8052777777777778,0.7592087962962963,0.7192037037037037


In [78]:
get_avg_table('coverage_minmax')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,0.0654911085579523,0.0472898395203418,0.0736912270685783,0.0327169620801376,0.0476974278320926,0.0748471762617936
1,boston,0.0795160716244479,0.0587526765460612,0.122813591104388,0.0987427807237736,0.0792368999820726,0.0639337518110934
2,cmc,0.2089787876700442,0.0718260869933711,0.1438258084331926,0.1368348629938433,0.0938882948117366,0.1502997175025009
3,flags,0.0677096406919101,0.0521625007396148,0.1264146856934143,0.0175567129563018,0.0734475943041327,0.0374326230897082
4,glass,0.0734779598908217,0.0879539397132825,0.1104479390892179,0.0280535366904089,0.0857792131213399,0.0764779353098544
5,ionosphere,0.0996723813855167,0.0508513093762756,0.1024961313438416,0.0810166010891939,0.1011905197314808,0.0971494197725312
6,isolet,0.0,0.0197173138317034,0.1699849875578292,0.0373984659775788,0.0367802497311432,0.0716590101299852
7,lymph,0.0636535800069113,0.0478089634634553,0.0867527121099684,0.0281300409109351,0.0634067388510041,0.0624856734402276
8,oil_spill,0.1439637527356607,0.0537621104835212,0.1452793386440492,0.1072477133535873,0.138722419271338,0.1157167792807293
9,pollution,0.0,0.0601434414675506,0.0848148313799385,0.0487002665472517,0.0730787781409774,0.0677064636361307


In [79]:
get_avg_table('coverage_std')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,0.0177326396505286,0.0134270076233083,0.028662765519259,0.0109108382014343,0.0129816080633515,0.0253138993248192
1,boston,0.0165965110965541,0.013734562111249,0.0188917129607593,0.0321700902680999,0.0165859082542645,0.0171633169629023
2,cmc,0.054191318506984,0.0143875201927495,0.025670098535559,0.0386778629046811,0.0182991814797661,0.0381737904735995
3,flags,0.0162078549718504,0.0143258500749669,0.0250461311870165,0.007214815837071,0.0207991713999067,0.0134289351940396
4,glass,0.015233657191885,0.0190846186647949,0.0162780233178213,0.0105175506493792,0.020157347176319,0.0192335457760943
5,ionosphere,0.0259388869464331,0.0239802923860672,0.0146789953829384,0.028543874221646,0.0372676067659969,0.0309185779596681
6,isolet,0.0,0.0125612598778097,0.0271256975443437,0.0223131330757021,0.0324965082561913,0.0369991063759089
7,lymph,0.0137932738222591,0.0133410831027254,0.025291020071178,0.0128595600263253,0.0162402864876494,0.019338898555331
8,oil_spill,0.0301439667038234,0.0225062667188294,0.0208826260577413,0.0313111049265427,0.0359197076347288,0.0395264497736952
9,pollution,0.0,0.0257695137741732,0.0143063246679364,0.0243501332736258,0.0283390390212489,0.0282244489596076


In [81]:
get_avg_table('q')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,0.3673305334842701,0.7695914030729523,0.2782074168294673,0.4677788381698912,0.7107276108660457,0.604620692484301
1,boston,0.314901607372291,0.7817238615608364,0.3827041996445249,0.4224434613155889,0.6130991728422809,0.5394049557927716
2,cmc,0.085442245295662,0.8369429969630011,0.2071949743464251,0.4707274096221905,0.7263169310706608,0.5333924011416888
3,flags,0.2328975035120903,0.5112606108466289,0.1028646466176554,0.3527944116340086,0.3974841581125743,0.4723538785732323
4,glass,0.2969922381803129,0.6668704717110607,0.2982158272858107,0.2588957979695125,0.5543521973256504,0.5162845806921242
5,ionosphere,0.3807123272205548,0.8533095906735609,0.2257450481104514,0.4366866031876583,0.640489400840688,0.58317590450929
6,isolet,0.0,0.7725948888609842,0.0861778399415212,0.5601247803498475,0.4089215886081486,0.4808212826843237
7,lymph,0.3011783782537182,0.5549259151167515,0.3038755767184311,0.2579037414490966,0.3877007245897594,0.4142832361420523
8,oil_spill,0.6010492559199861,0.9094986908196476,0.2687120779340799,0.480525892281775,0.8129177958901631,0.7631052636596193
9,pollution,0.0,0.3335482054968222,0.2436884692169159,0.1866560938318722,0.2552468428504131,0.4785894858759661


In [82]:
get_avg_table('accuracy')

Unnamed: 0,index,Adaboost,Bagging,CatboostEnsemble,LightGBM,RandomForest,XGBoostEnsemble
0,SPECT,0.7812834847426042,0.7607337526205451,0.7930375504499494,0.7783135336594456,0.7406335895644071,0.7868623340321453
1,boston,0.8495521905131689,0.8512062382708857,0.8573527035961513,0.8458228175758751,0.8406736556008542,0.8491555037856726
2,cmc,0.623061147622891,0.636987509127945,0.6365461622120219,0.6407317729351628,0.6162404396786962,0.6549913524731926
3,flags,0.666378767431399,0.7038956365272154,0.6820959495162608,0.6615384615384615,0.6917318938371569,0.6631129104813315
4,glass,0.8301365817644886,0.7890143964562568,0.8163913498215822,0.8133997785160576,0.8143816906607604,0.8008121077888518
5,ionosphere,0.9110301810865192,0.9044225352112674,0.909365033739854,0.9127967806841047,0.9189577464788734,0.9013816230717638
6,isolet,0.9681666666666668,0.9745555555555556,0.9782051282051282,0.9799999999999998,0.9789444444444444,0.977222222222222
7,lymph,0.8811877394636015,0.8443908045977011,0.8407004551415568,0.837624521072797,0.8475402298850575,0.8734099616858239
8,oil_spill,0.936077103955702,0.9407511283043196,0.942913722548981,0.9420127432017296,0.9495399552470892,0.9341942579739828
9,pollution,0.681111111111111,0.7861111111111111,0.793123543123543,0.75,0.7794444444444444,0.7555555555555554
