# Additional scripts to generate figures and tables


In [1]:
import pandas as pd
from matplotlib import pyplot as plt

_algos = ['ensemble', 'tree', 'forest', 'neighbors']
_bases = ['chess', 'rbf', 'sea', 'squares', 'poker', 'weather']
_metrics = ['Accuracy', 'Precision', 'Recall', 'F1']


The script below iterate bases, metrics, and algorithms. After all, it produces the figures in pdf format.

In [2]:

for b in _bases:
    fig, axs = plt.subplots(4, 1)
    fig.tight_layout(h_pad=None, w_pad=None, rect=None)
    fig.set_figheight(7)
    fig.set_figwidth(12)
    fig.subplots_adjust(hspace=0)
    fig.legend(labels=_algos)
    df = pd.read_csv(f'results/{b}.csv')
    for i, ax in enumerate(fig.axes):
        for a in _algos:
            ax.plot('instance', _metrics[i], data=df[df.model == a])
            ax.set
            ax.set_ylim(0, 1)
            ax.set_xlim(0, df['instance'].max())
            ax.set_yticklabels(['', 0.2, 0.4, 0.6, 0.8])
            ax.set_ylabel(f'{_metrics[i].upper()}')
            if i < 3:
                ax.set_xticklabels('')
            else:
                ax.legend(_algos, loc='lower right')
    fig.savefig(f'results/images/{b}.pdf')
    fig.clf()

  ax.set_yticklabels(['', 0.2, 0.4, 0.6, 0.8])
  ax.set_yticklabels(['', 0.2, 0.4, 0.6, 0.8])
  ax.set_yticklabels(['', 0.2, 0.4, 0.6, 0.8])
  ax.set_yticklabels(['', 0.2, 0.4, 0.6, 0.8])
  ax.set_yticklabels(['', 0.2, 0.4, 0.6, 0.8])
  ax.set_yticklabels(['', 0.2, 0.4, 0.6, 0.8])


<Figure size 1200x700 with 0 Axes>

<Figure size 1200x700 with 0 Axes>

<Figure size 1200x700 with 0 Axes>

<Figure size 1200x700 with 0 Axes>

<Figure size 1200x700 with 0 Axes>

<Figure size 1200x700 with 0 Axes>

This is the first version script to generate a result table. Moreover it's difficult to compare

In [5]:
dictii = {}
for a in _algos:
    dictii.update({a: {}})
    for m in _metrics:
        dictii[a].update({m: {}})

for b in _bases:
    df = pd.read_csv(f'results/{b}.csv')
    for a in _algos:
        for m in _metrics:
            r = df[df.model == a][m].mean()
            dictii[a][m].update({b: r})

# dictii

table = []
for a in _algos:
    for m in _metrics:
        row = [a, m]
        for b in _bases:
            r = dictii[a][m][b]
            row.append(r)
        table.append(row)

columns = ['Classifiers', 'Results'] + _bases
df = pd.DataFrame(table, columns=columns)
df2 = df.set_index(['Classifiers', 'Results'], append=True).swaplevel(0,1).droplevel(1)
# df2


print(df2.to_latex(float_format="{:0.4f}".format))

\begin{tabular}{llrrrrrr}
\toprule
 &  & chess & rbf & sea & squares & poker & weather \\
Classifiers & Results &  &  &  &  &  &  \\
\midrule
\multirow[t]{4}{*}{ensemble} & Accuracy & 0.9488 & 0.5743 & 0.8752 & NaN & 0.7816 & 0.7759 \\
 & Precision & 0.9348 & 0.5803 & 0.8712 & NaN & 0.7904 & 0.6699 \\
 & Recall & 0.9293 & 0.4661 & 0.7743 & NaN & 0.7086 & 0.5123 \\
 & F1 & 0.9319 & 0.5168 & 0.8198 & NaN & 0.7469 & 0.5792 \\
\cline{1-8}
\multirow[t]{4}{*}{tree} & Accuracy & 0.7655 & 0.4560 & 0.8457 & NaN & 0.6295 & 0.7359 \\
 & Precision & 0.8040 & 0.4367 & 0.8474 & NaN & 0.7016 & 0.5840 \\
 & Recall & 0.7426 & 0.3452 & 0.7075 & NaN & 0.4127 & 0.4493 \\
 & F1 & 0.7703 & 0.3853 & 0.7708 & NaN & 0.5175 & 0.5020 \\
\cline{1-8}
\multirow[t]{4}{*}{forest} & Accuracy & 0.7779 & 0.6498 & 0.8753 & NaN & 0.7928 & 0.7751 \\
 & Precision & 0.7609 & 0.6860 & 0.8619 & NaN & 0.7866 & 0.6927 \\
 & Recall & 0.7696 & 0.4888 & 0.7865 & NaN & 0.7399 & 0.4525 \\
 & F1 & 0.7652 & 0.5699 & 0.8224 & NaN & 0.76

The second version is more efficient and more understandable than first one.

In [57]:
import pandas as pd

_algos = ['ensemble', 'tree', 'forest', 'neighbors']
_bases = ['chess', 'rbf', 'sea', 'squares', 'poker', 'weather']
_metrics = ['Accuracy', 'Precision', 'Recall', 'F1']

table = []
for b in _bases:
    df = pd.read_csv(f'results/{b}.csv')
    for m in _metrics:
        row = [b, m]
        for a in _algos:
            r = df[df.model == a][m].mean()
            row.append(r)
        table.append(row)
columns = ['Bases', 'Results'] + _algos
df = pd.DataFrame(table, columns=columns)
df2 = df.set_index(['Bases', 'Results'], append=True).swaplevel(0,1).droplevel(1)
print(df2.to_latex(float_format="{:0.4f}".format))

\begin{tabular}{llrrrr}
\toprule
 &  & ensemble & tree & forest & neighbors \\
Bases & Results &  &  &  &  \\
\midrule
\multirow[t]{4}{*}{chess} & Accuracy & 0.9488 & 0.7655 & 0.7779 & 0.9345 \\
 & Precision & 0.9348 & 0.8040 & 0.7609 & 0.9243 \\
 & Recall & 0.9293 & 0.7426 & 0.7696 & 0.9283 \\
 & F1 & 0.9319 & 0.7703 & 0.7652 & 0.9263 \\
\cline{1-6}
\multirow[t]{4}{*}{rbf} & Accuracy & 0.5743 & 0.4560 & 0.6498 & 0.8438 \\
 & Precision & 0.5803 & 0.4367 & 0.6860 & 0.8406 \\
 & Recall & 0.4661 & 0.3452 & 0.4888 & 0.7552 \\
 & F1 & 0.5168 & 0.3853 & 0.5699 & 0.7955 \\
\cline{1-6}
\multirow[t]{4}{*}{sea} & Accuracy & 0.8752 & 0.8457 & 0.8753 & 0.8873 \\
 & Precision & 0.8712 & 0.8474 & 0.8619 & 0.8814 \\
 & Recall & 0.7743 & 0.7075 & 0.7865 & 0.8003 \\
 & F1 & 0.8198 & 0.7708 & 0.8224 & 0.8388 \\
\cline{1-6}
\multirow[t]{4}{*}{squares} & Accuracy & 0.8194 & 0.4058 & 0.5524 & 0.9728 \\
 & Precision & 0.7210 & 0.3994 & 0.4363 & 0.9637 \\
 & Recall & 0.8076 & 0.2644 & 0.4303 & 0.9631 \\
 & F

In [39]:
import pandas as pd

df1 = pd.read_csv('results/squares_others.csv')
df2 = pd.read_csv('results/squares_tree.csv')
df3 = pd.concat([df1,df2], ignore_index=True)
df3.sort_values(by='instance', ignore_index=True, inplace=True)
df3.to_csv('results/squares.csv', index=False)
