In [84]:
from figure_utils import save_table
import pandas as pd
# https://docs.google.com/spreadsheets/d/1irZHhFP2NfIXyPqcMDqKStY_DY6nldomHGwXuCSBl20/edit#gid=1226257673

In [86]:
df = pd.read_csv("tables/feature-engineering/table.tsv", sep="\t").sort_values("Dimension")

def col_to_math(col):
    return col.apply(lambda x: f"${x}$")
df['Type'] = df['Type'].fillna("N")
df['Symbol'] = col_to_math(df['Symbol'])
df['Formula'] = col_to_math(df['Formula'])

tab = df.sort_values(["Dimension", "Feature"]).set_index(list(df.columns)).to_latex(
    index=True, escape=False, column_format="lp{0.23\linewidth}p{0.10\linewidth}>{\\footnotesize}p{0.15\linewidth}p{0.08\linewidth}l", na_rep=""
)
tab = (
    tab.replace("$$nan$$", "")
    .replace("$nan$", "")
    .replace("\midrule", "\midrule\midrule")
    .replace("\cline{1-6} \cline{2-6} \cline{3-6} \cline{4-6} \cline{5-6}", "")
    .replace("\cline{2-6} \cline{3-6} \cline{4-6} \cline{5-6}\n", "")
)
save_table(tab, "chapters/05_cost_estimation/auto-generated/feature-table.tex")

\begin{tabular}{lp{0.23\linewidth}p{0.10\linewidth}>{\footnotesize}p{0.15\linewidth}p{0.08\linewidth}l}
\toprule
Dimension & Feature & Symbol & Formula & Type & Notes \\
\midrule\midrule
\multirow[t]{12}{*}{Data} & Complexity  & $O_M$, $O_F$ &  & N &  \\
 & Complexity ratio &  & $\frac{O_M}{ O_F}$ & N &  \\
 & Dataset size (rows, columns) & $r_T, c_T$ &  & N &  \\
 & Feature ratio & $\rho$ & $\frac{n_S}{\sum_{k=1}^p n_k} $ & N &  \\
 & Join type & $j_t$ &  & C &  \\
 & Selectivity & $\sigma$ & $\frac{\sum_{k=1}^{n}r_{S_k}}{r_T}$ & N &  \\
 & Sparsity & $e_T$ & $\frac{nnz(T)}{r_T\times c_T}$ & N &  \\
 & Sparsity ratio &  & $\frac{e_T}{e_S}$ & N &  \\
 & Tuple ratio & $\tau$ & $\frac{\sum_{k=1}^p d_k}{d_S}$ & N &  \\
 & \# Base tables & $n$ &  & N &  \\
 & \# Non-zero values & $nnz(T)$ & $nnz(S) = \sum_{k=1}^{n}nnz(S_k)$ & N &  \\
 & \# Sparse base tables ($e < 0.05$) & $q$ & $|\{S_k \in S| e_{S_k} < 0.05\}|$ & N & From \cite{MorpheusFI} \\

\multirow[t]{9}{*}{Hardware} & Arithmetic int