# Experimentos 18/10/23 - BayesFL

- Probar con todas las combinaciones de BN_FusionUnion y BN_FusionIntersection. 
- Experimentos en portátil MSI.
- Base más grande: `andes`.
- 4 clientes.
- 5000 instancias para cada cliente.
- Sin interleaving.

In [1]:
import warnings
warnings.filterwarnings('ignore')

from IPython.display import display

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

import pingouin as pg

import glob
import os
import shutil

pd.options.display.max_info_rows = 30000000
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 100)

sns.set_theme(style='darkgrid')

# Tamaño figuras
plt.rcParams["figure.figsize"] = (20,8)
plt.rcParams["figure.dpi"] = 600

pd.options.mode.chained_assignment = None

# Paleta de colores
color = ['#2a9d8f', '#e9c46a', '#f4a261', '#e76f51', '#b13c47']

color2 = ['#264653', '#2a9d8f', '#e9c46a', '#f4a261', '#e76f51', '#b13c47']

In [2]:
# Join all of csv files of the folder ../resultados, mantaining the header of the first file
path = r'../results/Client/' # use your path
all_files = glob.glob(os.path.join(path, "*.csv"))

df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files)
df = pd.concat(df_from_each_file, ignore_index=True)

df

Unnamed: 0,bbdd,algorithm,id,iteration,instances,threads,bdeu,SMHD,time(s)
0,alarm_50001,GES_BN_FusionIntersection_GES_BN_FusionInterse...,0,1,5000,8,-54570.524663,10,0.154
1,alarm_50001,GES_BN_FusionIntersection_GES_BN_FusionInterse...,0,2,5000,8,-53557.711947,7,0.039
2,alarm_50001,GES_BN_FusionIntersection_GES_BN_FusionInterse...,0,3,5000,8,-53557.711947,7,0.031
3,alarm_50001,GES_BN_FusionIntersection_GES_BN_FusionInterse...,0,4,5000,8,-53557.711947,7,0.037
4,alarm_50001,GES_BN_FusionIntersection_GES_BN_FusionInterse...,0,5,5000,8,-53557.711947,7,0.036
...,...,...,...,...,...,...,...,...,...
8023,win95pts_50004,pGES_BN_FusionUnion_None_BN_FusionUnion_fusion,3,1,5000,8,-47436.404946,64,0.014
8024,win95pts_50004,pGES_BN_FusionUnion_None_BN_FusionUnion_fusion,3,2,5000,8,-47460.319854,65,0.034
8025,win95pts_50004,pGES_BN_FusionUnion_None_BN_FusionUnion_fusion,3,3,5000,8,-47460.319854,65,0.025
8026,win95pts_50004,pGES_BN_FusionUnion_None_BN_FusionUnion_fusion,3,4,5000,8,-47460.319854,65,0.035


In [3]:
# Replace the GES_BN_FusionIntersection_GES_build of column algorithm with four columns separated with the _
df[['algorithm','fusionClient','refinement','fusionServer','epoch']] = df.algorithm.str.split("_",expand=True,).drop(columns=[1,4])

df[['bbdd','sample']] = df.bbdd.str.split("_",expand=True,)

# Delete bbdd diabetes and link
df = df[df.bbdd != 'diabetes']
df = df[df.bbdd != 'link']

df

Unnamed: 0,bbdd,algorithm,id,iteration,instances,threads,bdeu,SMHD,time(s),fusionClient,refinement,fusionServer,epoch,sample
0,alarm,GES,0,1,5000,8,-54570.524663,10,0.154,FusionIntersection,GES,FusionIntersection,build,50001
1,alarm,GES,0,2,5000,8,-53557.711947,7,0.039,FusionIntersection,GES,FusionIntersection,build,50001
2,alarm,GES,0,3,5000,8,-53557.711947,7,0.031,FusionIntersection,GES,FusionIntersection,build,50001
3,alarm,GES,0,4,5000,8,-53557.711947,7,0.037,FusionIntersection,GES,FusionIntersection,build,50001
4,alarm,GES,0,5,5000,8,-53557.711947,7,0.036,FusionIntersection,GES,FusionIntersection,build,50001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8023,win95pts,pGES,3,1,5000,8,-47436.404946,64,0.014,FusionUnion,,FusionUnion,fusion,50004
8024,win95pts,pGES,3,2,5000,8,-47460.319854,65,0.034,FusionUnion,,FusionUnion,fusion,50004
8025,win95pts,pGES,3,3,5000,8,-47460.319854,65,0.025,FusionUnion,,FusionUnion,fusion,50004
8026,win95pts,pGES,3,4,5000,8,-47460.319854,65,0.035,FusionUnion,,FusionUnion,fusion,50004


### BDeu

In [4]:
# Drop the rows with epoch != build
df2 = df[df['epoch'] == 'build']

# Only iteration 5
df2 = df2[(df2['iteration'] == 1) | (df2['iteration'] == 5)]

df_unstack = df2.drop(["sample", "id", "threads", "instances", "epoch", "SMHD", "time(s)"], axis=1)
df_unstack.groupby(["bbdd", "algorithm", "fusionClient", "fusionServer", "refinement", "iteration"], observed=True).mean().unstack(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,bbdd,alarm,andes,barley,child,hailfinder,hepar2,insurance,mildew,water,win95pts
algorithm,fusionClient,fusionServer,refinement,iteration,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
GES,FusionIntersection,FusionIntersection,GES,1,-53993.785619,-471382.420224,-265927.284293,-62160.32755,-252158.044952,-164689.532991,-67574.976828,-235357.914167,-65913.33004,-46896.283478
GES,FusionIntersection,FusionIntersection,GES,5,-53653.577956,-471524.811981,-263628.101976,-61935.472062,-252862.896665,-164698.202809,-67830.648523,-235357.914167,-65932.425939,-46936.77733
GES,FusionIntersection,FusionIntersection,,1,-53993.785619,-471382.420224,-265927.284293,-62160.32755,-252158.044952,-164689.532991,-67574.976828,-235357.914167,-65913.33004,-46896.283478
GES,FusionIntersection,FusionIntersection,,5,-53596.153387,-471377.105068,-265790.052812,-62059.52751,-252158.044952,-164689.532991,-67530.324599,-235357.914167,-65913.33004,-46670.635656
GES,FusionIntersection,FusionUnion,GES,1,-53993.785619,-471382.420224,-265927.284293,-62160.32755,-252158.044952,-164689.532991,-67574.976828,-235357.914167,-65913.33004,-46896.283478
GES,FusionIntersection,FusionUnion,GES,5,-53648.982757,-471377.937926,-263924.992633,-61935.472062,-252158.044952,-164687.95242,-67524.632755,-235357.914167,-65867.71386,-46803.571855
GES,FusionIntersection,FusionUnion,,1,-53993.785619,-471382.420224,-265927.284293,-62160.32755,-252158.044952,-164689.532991,-67574.976828,-235357.914167,-65913.33004,-46896.283478
GES,FusionIntersection,FusionUnion,,5,-53605.576683,-471377.937926,-263924.992633,-61935.472062,-252158.044952,-164689.532991,-67543.293585,-235357.914167,-65913.33004,-46803.571855
GES,FusionUnion,FusionIntersection,GES,1,-53993.785619,-471382.420224,-265927.284293,-62160.32755,-252158.044952,-164689.532991,-67574.976828,-235357.914167,-65913.33004,-46896.283478
GES,FusionUnion,FusionIntersection,GES,5,-53671.57051,-471524.811981,-263933.426299,-61935.472062,-252862.896665,-164698.202809,-67830.648523,-235357.914167,-65932.425939,-46967.087562


In [39]:
df_unstack = df2.drop(["sample", "id", "threads", "instances", "epoch", "SMHD", "time(s)", "fusionClient", "fusionServer", "refinement"], axis=1)
df_unstack.groupby(["bbdd", "algorithm", "iteration"], observed=True).mean().unstack(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu,bdeu
Unnamed: 0_level_1,bbdd,alarm,andes,barley,child,hailfinder,hepar2,insurance,mildew,water,win95pts
algorithm,iteration,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
GES,1,-53993.785619,-471382.420224,-265927.284293,-62160.32755,-252158.044952,-164689.532991,-67574.976828,-235357.914167,-65913.33004,-46896.283478
GES,5,-53688.588764,-471409.213226,-264157.178848,-61958.728871,-252337.690528,-164695.823762,-67650.498646,-235357.914167,-65903.45959,-46879.985591
pGES,1,-53669.747329,-471377.937926,-264117.900969,-62041.499597,-252174.47247,-164689.532991,-67626.908305,-235616.652494,-66773.993382,-46800.386186
pGES,5,-53648.223078,-471372.494713,-263889.756592,-61979.345719,-252184.178104,-164691.729687,-67662.672891,-235551.967913,-65908.551927,-46834.118806


In [40]:
df_unstack = df2.drop(["sample", "id", "threads", "instances", "epoch", "SMHD", "time(s)","bbdd"], axis=1)
df_unstack.groupby(["iteration", "algorithm", "fusionClient", "fusionServer", "refinement"], observed=True).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,bdeu
iteration,algorithm,fusionClient,fusionServer,refinement,Unnamed: 5_level_1
1,GES,FusionIntersection,FusionIntersection,GES,-168605.390014
1,GES,FusionIntersection,FusionIntersection,,-168605.390014
1,GES,FusionIntersection,FusionUnion,GES,-168605.390014
1,GES,FusionIntersection,FusionUnion,,-168605.390014
1,GES,FusionUnion,FusionIntersection,GES,-168605.390014
1,GES,FusionUnion,FusionIntersection,,-168605.390014
1,GES,FusionUnion,FusionUnion,GES,-168605.390014
1,GES,FusionUnion,FusionUnion,,-168605.390014
1,pGES,FusionIntersection,FusionIntersection,GES,-168489.160193
1,pGES,FusionIntersection,FusionIntersection,,-168487.103969


### SMHD

In [41]:
df_unstack = df2.drop(["sample", "id", "threads", "instances", "epoch", "bdeu", "time(s)"], axis=1)
df_unstack.groupby(["bbdd", "algorithm", "fusionClient", "fusionServer", "refinement", "iteration"], observed=True).mean().unstack(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,bbdd,alarm,andes,barley,child,hailfinder,hepar2,insurance,mildew,water,win95pts
algorithm,fusionClient,fusionServer,refinement,iteration,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
GES,FusionIntersection,FusionIntersection,GES,1,9.25,148.5,60.5,8.25,34.75,76.25,22.25,41.0,90.0,56.0
GES,FusionIntersection,FusionIntersection,GES,5,7.0,148.0,55.25,0.75,36.0,77.25,22.25,41.0,90.75,51.25
GES,FusionIntersection,FusionIntersection,,1,9.25,148.5,60.5,8.25,34.75,76.25,22.25,41.0,90.0,56.0
GES,FusionIntersection,FusionIntersection,,5,7.0,145.0,58.5,6.25,34.75,76.25,19.75,41.0,90.0,38.5
GES,FusionIntersection,FusionUnion,GES,1,9.25,148.5,60.5,8.25,34.75,76.25,22.25,41.0,90.0,56.0
GES,FusionIntersection,FusionUnion,GES,5,7.25,148.0,55.5,0.75,34.75,75.5,20.75,41.0,86.0,51.5
GES,FusionIntersection,FusionUnion,,1,9.25,148.5,60.5,8.25,34.75,76.25,22.25,41.0,90.0,56.0
GES,FusionIntersection,FusionUnion,,5,7.0,148.0,55.5,0.75,34.75,76.25,21.75,41.0,90.0,51.5
GES,FusionUnion,FusionIntersection,GES,1,9.25,148.5,60.5,8.25,34.75,76.25,22.25,41.0,90.0,56.0
GES,FusionUnion,FusionIntersection,GES,5,7.25,148.0,56.0,0.75,36.0,77.25,22.25,41.0,90.75,52.75


In [42]:
df_unstack = df2.drop(["sample", "id", "threads", "instances", "epoch", "bdeu", "time(s)", "fusionClient", "fusionServer", "refinement"], axis=1)
df_unstack.groupby(["bbdd", "algorithm", "iteration"], observed=True).mean().unstack(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD,SMHD
Unnamed: 0_level_1,bbdd,alarm,andes,barley,child,hailfinder,hepar2,insurance,mildew,water,win95pts
algorithm,iteration,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
GES,1,9.25,148.5,60.5,8.25,34.75,76.25,22.25,41.0,90.0,56.0
GES,5,7.59375,145.84375,56.0,1.71875,35.5625,76.3125,21.1875,41.0,88.5,49.59375
pGES,1,7.46875,148.0,58.5,3.75,34.5,76.25,25.25,44.0,91.75,49.0
pGES,5,7.15625,145.84375,57.5,2.0625,33.875,76.125,25.125,43.25,88.125,48.75


In [43]:
df_unstack = df2.drop(["sample", "id", "threads", "instances", "epoch", "bdeu", "time(s)","bbdd"], axis=1)
df_unstack.groupby(["iteration", "algorithm", "fusionClient", "fusionServer", "refinement"], observed=True).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,SMHD
iteration,algorithm,fusionClient,fusionServer,refinement,Unnamed: 5_level_1
1,GES,FusionIntersection,FusionIntersection,GES,54.675
1,GES,FusionIntersection,FusionIntersection,,54.675
1,GES,FusionIntersection,FusionUnion,GES,54.675
1,GES,FusionIntersection,FusionUnion,,54.675
1,GES,FusionUnion,FusionIntersection,GES,54.675
1,GES,FusionUnion,FusionIntersection,,54.675
1,GES,FusionUnion,FusionUnion,GES,54.675
1,GES,FusionUnion,FusionUnion,,54.675
1,pGES,FusionIntersection,FusionIntersection,GES,53.85
1,pGES,FusionIntersection,FusionIntersection,,53.825


### Time(s)

In [44]:
df_unstack = df2.drop(["sample", "id", "threads", "instances", "epoch", "SMHD", "bdeu"], axis=1)
df_unstack.groupby(["bbdd", "algorithm", "fusionClient", "fusionServer", "refinement", "iteration"], observed=True).mean().unstack(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,time(s),time(s),time(s),time(s),time(s),time(s),time(s),time(s),time(s),time(s)
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,bbdd,alarm,andes,barley,child,hailfinder,hepar2,insurance,mildew,water,win95pts
algorithm,fusionClient,fusionServer,refinement,iteration,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
GES,FusionIntersection,FusionIntersection,GES,1,0.16825,15.76825,0.86925,0.02425,0.321,0.44175,0.064,0.7185,0.06325,0.91575
GES,FusionIntersection,FusionIntersection,GES,5,0.03575,0.63575,0.34925,0.01325,0.07825,0.0695,0.01625,0.51675,0.017,0.11925
GES,FusionIntersection,FusionIntersection,,1,0.14125,15.69675,0.85575,0.024,0.39225,0.5275,0.062,0.7595,0.0545,0.919
GES,FusionIntersection,FusionIntersection,,5,0.03925,1.5255,0.29725,0.0155,0.14025,0.20775,0.03,0.5665,0.043,0.167
GES,FusionIntersection,FusionUnion,GES,1,0.14525,17.091,0.904,0.02475,0.3375,0.49275,0.0655,0.72425,0.05825,0.95325
GES,FusionIntersection,FusionUnion,GES,5,0.0405,0.62675,0.27875,0.01575,0.07325,0.07675,0.01125,0.5365,0.0195,0.0995
GES,FusionIntersection,FusionUnion,,1,0.15175,15.407,0.9005,0.0245,0.3265,0.618,0.0675,0.86775,0.0685,0.927
GES,FusionIntersection,FusionUnion,,5,0.03225,0.7145,0.26075,0.01175,0.11525,0.12725,0.02525,0.5575,0.0235,0.1615
GES,FusionUnion,FusionIntersection,GES,1,0.19125,15.821,0.881,0.03025,0.33975,0.54775,0.07525,0.839,0.056,0.91225
GES,FusionUnion,FusionIntersection,GES,5,0.043,0.73725,0.263,0.0125,0.07575,0.076,0.019,0.51875,0.025,0.12275


In [45]:
df_unstack = df2.drop(["sample", "id", "threads", "instances", "epoch", "bdeu", "SMHD", "fusionClient", "fusionServer", "refinement"], axis=1)
df_unstack.groupby(["bbdd", "algorithm", "iteration"], observed=True).mean().unstack(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,time(s),time(s),time(s),time(s),time(s),time(s),time(s),time(s),time(s),time(s)
Unnamed: 0_level_1,bbdd,alarm,andes,barley,child,hailfinder,hepar2,insurance,mildew,water,win95pts
algorithm,iteration,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
GES,1,0.150406,15.818812,0.911781,0.025719,0.346,0.531969,0.069781,0.781219,0.060062,0.920875
GES,5,0.038594,0.808406,0.348406,0.013219,0.090656,0.103063,0.019031,0.554906,0.024594,0.129531
pGES,1,0.195437,3.666656,0.832562,0.062437,0.245031,0.316437,0.093406,0.663219,0.12075,0.727031
pGES,5,0.084656,1.928656,0.419156,0.032219,0.188219,0.234563,0.053344,0.519219,0.070625,0.263688
