In [2]:
import matplotlib
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import os
import seaborn as sns
import copy

from itertools import combinations

print(f"Matplotlib version: {matplotlib.__version__}")
print(f"Pandas version: {pd.__version__}")
print(f"seaborn version: {sns.__version__}")

Matplotlib version: 3.5.1
Pandas version: 1.3.4
seaborn version: 0.11.2


# LaTex Tables

In [13]:
# Read data and rename models
data = pd.read_excel(os.path.join(os.getcwd(), "AllResults.xlsx"))
data = data.rename(columns={"Acc": "RST-M", "Gini": "RST-G", "mcart": "CART-M", "benders": "BendersOCT",
                           "flow": "FlowOCT", "binoct": "BinOCT"})
data["HybridRST"] = data["RST-M"]
data.loc[data["depth"] > 5, "HybridRST"] = data.loc[data["depth"] > 5, "RST-G"]
model_list = ["FlowOCT", "BinOCT", "BendersOCT", "RST-M", "RST-G", "HybridRST", 'CART']
df = data[["data", "depth", "fold", "metric", *model_list]]
metrics = ["Training Accuracy", "Test Accuracy"]
df = df.loc[df["metric"].isin(metrics)]
df[["BendersOCT", "BinOCT", "FlowOCT"]] = df.groupby(["data", "fold", "metric"])[["BendersOCT", "BinOCT", "FlowOCT"]].ffill()
df[["BendersOCT", "BinOCT", "FlowOCT"]] = df.groupby(["data", "metric"])[["BendersOCT", "BinOCT", "FlowOCT"]].ffill()
df[model_list] = df[model_list].multiply(100)
df["FlowOCT"] = df["FlowOCT"].astype(float).round(1)
df["BendersOCT"] = df["BendersOCT"].astype(float).round(1)
df["BinOCT"] = df["BinOCT"].astype(float).round(1)


In [10]:
def bold_extreme_values(data, data_max=-1):
    str_ = "\\"
    if data == data_max:
        return f"{str_}bfseries {data}"

    return data

In [14]:
df

Unnamed: 0,data,depth,fold,metric,FlowOCT,BinOCT,BendersOCT,RST-M,RST-G,HybridRST,CART
0,adult,2,1,Test Accuracy,,,76.1,80.4,79.1,80.4,79.1
1,adult,3,1,Test Accuracy,,,76.1,81.7,81.5,81.7,81.5
2,adult,4,1,Test Accuracy,,,76.1,82.5,81.7,82.5,81.7
3,adult,5,1,Test Accuracy,,,76.1,82.6,81.8,82.6,82.3
4,adult,6,1,Test Accuracy,,,76.1,82.6,82.7,82.7,82.5
...,...,...,...,...,...,...,...,...,...,...,...
2655,wine,4,10,Training Accuracy,78.9,78.9,78.9,77.6,76.4,77.6,75.8
2656,wine,5,10,Training Accuracy,87.0,82.6,87.0,83.2,83.2,83.2,84.5
2657,wine,6,10,Training Accuracy,93.2,72.7,93.2,88.2,87.6,87.6,90.7
2658,wine,7,10,Training Accuracy,97.5,93.2,95.7,92.6,91.3,91.3,94.4


## LaTex Tables: Avg of 10 Folds & Depths. 
### Makes bold with the highest idx

In [15]:
for metric in metrics:
    title = {"Training Accuracy": "In-Sample Accuracy", "Test Accuracy": "Out-of-Sample Accuracy"}
    metric_str = metric.replace(" ", "_").lower()
    metric_title = title[metric]
    df1 = df.loc[(df["metric"] == metric)].groupby("data", as_index=False).agg({"FlowOCT": 'mean',
                                          "BinOCT": 'mean',
                                          "BendersOCT": 'mean',
                                          "CART": 'mean',
                                          "RST-M": 'mean',
                                          "RST-G": 'mean',
                                          "HybridRST": 'mean'}).round(1)
    df1 = df1[["data", "BinOCT", "FlowOCT", "BendersOCT", "CART", "RST-M", "RST-G", "HybridRST"]]
    for idx, col in df1.iterrows():
        temp = pd.Series(list(col.values[1:]))
        temp = temp.apply(
                    lambda data: bold_extreme_values(data, data_max=temp.max()))
        col.values[1:] = temp.values
        df1.iloc[idx] = col
    a = df1.to_latex(index=False, escape=False)
    repl = a[16:24]
    a = a.replace(f"\\begin{{tabular}}{{{repl}}}",
              f"\\begin{{table}}\n\\label{{fig:metric={metric_str}}}\n\\caption{{Avg. {metric_title} for 10-Fold}}\n\\begin{{tabular}}{{{repl}}}")
    a = a.replace("\\end{tabular}\n",
              "\\end{tabular}\n\\end{table}\n")
    print(a)

\begin{table}
\label{fig:metric=training_accuracy}
\caption{Avg. In-Sample Accuracy for 10-Fold}
\begin{tabular}{lrllllll}
\toprule
                   data &  BinOCT &        FlowOCT &     BendersOCT &           CART &          RST-M &          RST-G &      HybridRST \\
\midrule
                  adult &     NaN &            NaN &           76.3 &           82.3 & \bfseries 83.0 &           82.4 &           82.8 \\
       agaricus-lepiota &     NaN &            NaN &           96.9 &           99.0 & \bfseries 99.4 & \bfseries 99.4 & \bfseries 99.4 \\
          balance-scale &    71.1 &           78.1 &           78.5 &           78.2 &           80.9 &           79.7 & \bfseries 81.0 \\
banknote-authentication &    75.7 &           86.8 &           88.7 &           86.1 & \bfseries 89.6 &           89.5 &           89.5 \\
         car-evaluation &    75.7 &            NaN &           82.8 &           86.2 &           87.3 &           87.1 & \bfseries 87.4 \\
               diabetes &

## LaTex Tables: Avg of 10 Folds for every depth.
### Makes bold with the highest idx

In [16]:
for depth in range(2, 9):
    for metric in metrics:
        title = {"Training Accuracy": "In-Sample Accuracy", "Test Accuracy": "Out-of-Sample Accuracy"}
        metric_str = metric.replace(" ", "_").lower()
        metric_title = title[metric]
        df1 = df.loc[(df["depth"] == depth) & (df["metric"] == metric)].groupby("data", as_index=False).agg({
                                              "FlowOCT": 'mean',
                                              "BinOCT": 'mean',
                                              "BendersOCT": 'mean',
                                              "CART": 'mean',
                                              "RST-M": 'mean',
                                              "RST-G": 'mean',
                                              "HybridRST": 'mean'}).round(1)
        df1 = df1[["data", "BinOCT", "FlowOCT", "BendersOCT", "CART", "RST-M", "RST-G", "HybridRST"]]
        for idx, col in df1.iterrows():
            temp = pd.Series(list(col.values[1:]))
            temp = temp.apply(
                        lambda data: bold_extreme_values(data, data_max=temp.max()))
            col.values[1:] = temp.values
            df1.iloc[idx] = col
        a = df1.to_latex(index=False, escape=False)
        repl = a[16:24]
        a = a.replace(f"\\begin{{tabular}}{{{repl}}}",
                  f"\\begin{{table}}\n\\label{{fig:depth={depth}_metric={metric_str}}}\n\\caption{{Avg. {metric_title} of Depth {depth} for 10-Fold}}\n\\begin{{tabular}}{{{repl}}}")
        a = a.replace("\\end{tabular}\n",
                  "\\end{tabular}\n\\end{table}\n")
        print(a)

\begin{table}
\label{fig:depth=2_metric=training_accuracy}
\caption{Avg. In-Sample Accuracy of Depth 2 for 10-Fold}
\begin{tabular}{llllllll}
\toprule
                   data &         BinOCT &        FlowOCT &     BendersOCT &           CART &          RST-M &          RST-G &      HybridRST \\
\midrule
                  adult &            NaN &            NaN &           76.8 &           79.1 & \bfseries 80.9 &           79.1 & \bfseries 80.9 \\
       agaricus-lepiota &            NaN &            NaN & \bfseries 96.9 &           95.4 & \bfseries 96.9 & \bfseries 96.9 & \bfseries 96.9 \\
          balance-scale & \bfseries 68.7 & \bfseries 68.7 & \bfseries 68.7 &           68.4 & \bfseries 68.7 & \bfseries 68.7 & \bfseries 68.7 \\
banknote-authentication & \bfseries 73.9 & \bfseries 73.9 & \bfseries 73.9 &           72.3 & \bfseries 73.9 & \bfseries 73.9 & \bfseries 73.9 \\
         car-evaluation & \bfseries 77.8 &            NaN & \bfseries 77.8 & \bfseries 77.8 & \bfseries 77.8 &


\begin{table}
\label{fig:depth=6_metric=test_accuracy}
\caption{Avg. Out-of-Sample Accuracy of Depth 6 for 10-Fold}
\begin{tabular}{llllllll}
\toprule
                   data &         BinOCT &         FlowOCT &      BendersOCT &            CART &           RST-M &           RST-G &       HybridRST \\
\midrule
                  adult &            NaN &             NaN &            76.3 &            82.8 &  \bfseries 83.0 &            82.8 &            82.8 \\
       agaricus-lepiota &            NaN &             NaN &            91.3 & \bfseries 100.0 & \bfseries 100.0 & \bfseries 100.0 & \bfseries 100.0 \\
          balance-scale &           63.9 &            73.8 &  \bfseries 75.7 &            73.0 &            73.1 &            73.1 &            73.1 \\
banknote-authentication &           72.6 &            91.3 &            94.0 &            89.1 &  \bfseries 95.0 &            93.1 &            93.1 \\
         car-evaluation &           71.2 &             NaN &            82.9 & 