In [None]:
import re
from textwrap import indent
import pandas as pd
from typing import Literal
from collections import Counter

In [2]:
def raw_to_dataframe(
                        raw: str, 
                        col_names: list[str] = ["pipeline", "method", "config", "mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
                        return_cols: list[str]|None = None,
                        numeric_cols: list[str] = ["mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
                        ) -> pd.DataFrame:
    """
    Parse the raw (CSV-like/tab/space separated) table text into a pandas DataFrame.
    Uses existing parse_raw_table. Optionally select a subset of columns.
    Coerces numeric-looking fields to numeric.
    """
    # Simple TSV parse with header skip: drop lines before "Base Pretrained Model"
    raw_lines = raw.splitlines()
    start_idx = 0
    for i, ln in enumerate(raw_lines):
        if "Base Pretrained Model" in ln:
            start_idx = i
            break
    lines = [ln for ln in raw_lines[start_idx:] if ln.strip()]
    rows = [ln.split('\t') for ln in lines]
    max_len = max((len(r) for r in rows), default=0)
    for r in rows:
        if len(r) < max_len:
            r.extend([""] * (max_len - len(r)))
    df = pd.DataFrame(rows)

    # Assign column names to a fixed schema
    # Trim or pad columns to match the schema length
    df = df.iloc[:, :len(col_names)]
    if df.shape[1] < len(col_names):
        df = pd.concat([df, pd.DataFrame([[""] * (len(col_names) - df.shape[1])] * len(df))], axis=1)
    df.columns = col_names

    # Convert numeric columns (skip text fields) with cleaning for %, commas, dashes
    for i, c in enumerate(numeric_cols):
        s = (
            df[c]
            .astype(str)
            .str.replace('%', '', regex=False)
            .str.replace(',', '', regex=False)
            .str.strip()
        )
        s = s.replace({'-': None, '--': None, '—': None, '–': None, '': None})

        # Coerce to numeric for comparison
        df[c] = pd.to_numeric(s, errors="coerce")

    if return_cols is not None:
        df = df[return_cols]

    return df

# Example: build a compact dataframe from the provided input_string
# try:
#     demo_df = raw_to_dataframe(input_string, \
#         # return_cols=["pipeline","method","avg_acc","freeze","throughput","tp_gain","mfu"]
#     )
#     display(demo_df)
# except NameError:
#     pass

In [3]:
def df_to_rows(df: pd.DataFrame, columns: list[str]|None = None, blank_nan: bool = True) -> list:
    """
    Convert a DataFrame to a list of row dictionaries.
    - columns: optional list to select/ordering of columns
    - blank_nan: when True, NaN/None are converted to "" in output
    """
    sub = df[columns].copy() if columns is not None else df.copy()
    if blank_nan:
        sub = sub.where(~sub.isna(), "")
    return sub.to_dict(orient="records")

# Example (only runs if demo_df exists)
# try:
#     demo_rows_from_df = df_to_rows(demo_df, \
#         columns=["pipeline","method","avg_acc","freeze","throughput","tp_gain","mfu"])
#     display(demo_rows_from_df[:2])
# except NameError:
#     pass

In [4]:
def style_cell(cell, cellcolor:str=''):
    cell = str(cell)
    if len(cell) == 0:
        cell = "--"
    if cell.endswith('.0'):
        cell = cell[:-2]
    elif cell.endswith('.0}'):
        cell = cell[:-3] + '}'
    return f"\cellcolor{{{cellcolor}}}{cell}" if len(cellcolor) > 0 else cell


def style_method(method, cellcolor:str=''):
    """Return LaTeX method cell with styling"""
    methodstyle = {
        "TimelyFreeze": "\TimelyFreeze{}",
        "TimelyFreeze+APF": "\scriptsize\quad +APF",
        "TimelyFreeze+AutoFreeze": "\scriptsize\quad +AutoFreeze"
    }
    return style_cell(methodstyle[method] if method in methodstyle.keys() else method, cellcolor)

def latex_datarow(r: dict,
                cellcolor:str='',
                columns:list[str] = ["avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu"],
                ):  
    latex = []
    for c in columns:
        latex.append(style_cell(r.get(c, ''), cellcolor))
    return " & ".join(latex) + r" \\"

In [5]:
def latex_pipeline(rows:list[dict], 
                    pipe: Literal["GPipe", "1F1B", "Interleaved 1F1B"],
                    cellcolordict: dict[str,str]={"TimelyFreeze": "gray!15", "TimelyFreeze+APF": "lightgray!10", "TimelyFreeze+AutoFreeze": "lightgray!10"},
                    datarow_cols: list[str] = ["avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu"],
                    ascending: dict[str,bool] = {"avg_acc": True, "acc_gain": True, "freeze": False, "throughput": True, "tp_gain": True, "mfu": True},
                    bold: dict[str,bool] = {"avg_acc": True, "acc_gain": True, "freeze": True, "throughput": True, "tp_gain": True, "mfu": True},
                    underline: dict[str,bool] = {"avg_acc": True, "acc_gain": True, "freeze": True, "throughput": True, "tp_gain": True, "mfu": True},
    )->str:
    """
    Convert rows of a pipeline into LaTeX table code.
    """
    assert rows[0]["pipeline"] == pipe, f"pipeline mismatch: {rows[0]['pipeline']} != {pipe}"

    l_pad, m_pad, r_pad = 30-(len(pipe)+2)//2, len(pipe) + 2, 30-(len(pipe)+3)//2
    latex = [
        "\n" + "%"*(l_pad + m_pad + r_pad),
        "%"*l_pad + " " + pipe + " " + "%"*r_pad,
        "%"*(l_pad + m_pad + r_pad),
    ]

    columns = set(rows[0].keys()) & set(datarow_cols)
    for c in columns:
        ascending_flag, bold_flag, underline_flag = ascending.get(c, True), bold.get(c, True), underline.get(c, True)
        if bold_flag or underline_flag:
            values = {i:r[c] for i, r in enumerate(rows) \
                if (r['method'] != '' and r['method'] != 'No Freezing') \
                        and (isinstance(r[c], (int, float)) or r[c].isnumeric())}
            if len(values) > 1:
                best, second_best = sorted(values.values(), reverse=ascending_flag)[:2]
            is_best = {k:v == best for k,v in values.items()}
            is_second_best = {k:v == second_best for k,v in values.items()}
            print('c:', c, 'values:', values.values(), 'best:', best, 'second_best:', second_best)
            for k,v in values.items():
                if bold_flag and is_best[k]:
                    rows[k][c] = r"\textbf{"+str(v)+"}"
                elif underline_flag and is_second_best[k]:
                    rows[k][c] = r"\underline{"+str(v)+"}"

    for r in rows:
        if r["pipeline"] == pipe:
            pcell = r"\multirow{" + str(len(rows)) + r"}{*}{\makecell[c]{" + pipe.replace(' ', r'\\') + r"}}"
        else:
            pcell = ""
        method = r['method']
        cellcolor = cellcolordict.get(method, '')
        latex.append(f"{pcell} & {style_method(method, cellcolor)} & {latex_datarow(r, cellcolor, datarow_cols)}")            

        # Add a line to separate No Freezing baseline from other methods
        if method == "No Freezing":
            num_cols = latex[-1].count("&")+1
            latex.append(r"\thincline{2-" + str(num_cols) + "}")

    print(f"pipe: {pipe}, latex: {latex}")
    return '\n'.join(latex) + "\n\hline"

In [6]:
def latex_table(rows,
                datarow_cols: list[str] = ["avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu"],
                header:str="",
                footer:str="",
                ):
    """
    Convert parsed rows into LaTeX table code.
    """
    latex = [header, '\hline']
    pipelines = {}
    curr_pipe = None
    for r in rows:
        if r["pipeline"] != '' and r["pipeline"] != curr_pipe:
            print(f"curr_pipe: {curr_pipe}, r: {r}")
            if curr_pipe in ["GPipe", "1F1B", "Interleaved 1F1B"]:
                latex.append(latex_pipeline(pipelines[curr_pipe], curr_pipe, datarow_cols=datarow_cols))
            elif curr_pipe == 'Base Pretrained Model':
                latex.append('\multicolumn{2}{c|}{Base Pretrained Model} & ' + latex_datarow(pipelines[curr_pipe][0], columns=datarow_cols))
                latex.append(r'\hline')
            curr_pipe = r["pipeline"]
            pipelines[curr_pipe] = [r]
        else:
            pipelines[curr_pipe].append(r)
    if curr_pipe in ["GPipe", "1F1B", "Interleaved 1F1B"]:
        latex.append(latex_pipeline(pipelines[curr_pipe], curr_pipe, datarow_cols=datarow_cols) + "\n")
    latex.append(footer)
    print("final latex: ", latex)
    return "\n".join(latex)

In [7]:
def raw_to_latex_table(raw, 
                        col_names: list[str] = ["pipeline", "method", "config", "mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
                        numeric_cols: list[str] = ["mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
                        columns:list[str] = ["pipeline","method","avg_acc","acc_gain","freeze","throughput","tp_gain","mfu"],
                        datarow_cols: list[str] = ["avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu"],
                        header:str="",
                        footer:str=""
                        ):
    df = raw_to_dataframe(raw, col_names=col_names, numeric_cols=numeric_cols)
    rows = df_to_rows(df, columns=columns)
    return latex_table(rows, datarow_cols=datarow_cols, header=header, footer=footer)

## 1B Acc <-> Thp

In [8]:

def main_result_header(caption:str="Some Fancy Caption.") -> str:
    return r"""
\begin{table*}[t]
\renewcommand{\arraystretch}{1}
\centering
\caption{""" + caption + r"""}
\resizebox{\textwidth}{!}{
\begin{tabular}{cl|ccc|ccc}

%%%%% header %%%%%
\hline
\multirow{2}{*}{\makecell[c]{Pipeline \\ Schedule}} &
\multicolumn{1}{c|}{\multirow{2}{*}{\makecell[c]{Freeze \\ Method}}} &
\multicolumn{3}{c|}{Accuracy Preservation} &
\multicolumn{3}{c}{Time Efficiency} \\
\cline{3-5} \cline{6-8}
& & Avg. Acc.↑ & Acc. Gain↑ & Freeze Ratio↓ & Throughput↑ & Th. Gain↑ & MFU↑ \\

"""

def main_result_footer(label:str="some_fancy_label") -> str:
    return r"""
\end{tabular}}
\label{tab:""" + label.replace(" ", "_") + r"""}
\end{table*}
"""

In [9]:
caption = "Comparison of freezing methods under different pipeline schedules (LLaMA-1B)."
label = "llama1b"
output_file = label + "_table.txt"

input_string = """
"Pipeline 
Schedule"	Freeze Method		Accuracy Preservation							Time Efficiency			
			"MMLU
(5 shots)
 ↑"	"HellaSwag
 ↑"	"ARC-C
(10 shots)
 ↑"	"TruthfulQA
 ↑"	"Average
Acc.↑"	"Accuracy
Gain (%) ↑"	"Avg. Freeze 
Ratio (%) ↓"	"Avg. 
Throughput 
(token/s) ↑"	"Throughput
Gain (%) ↑"	Avg. mfu (%) ↑	"Avg. Batch
Time (s) ↓"
Base Pretrained Model			31.99%	47.75%	36.95%	23.01%	34.93%		-				
GPipe	No Freezing		32.54%	49.28%	37.37%	28.76%	36.99%	2.06%	0.00%	7022 	-	16.20 	4.67 
	APF	th=0.05	32.27%	48.27%	37.20%	24.72%	35.62%	0.69%	78.97%	9466 	34.79 	21.77 	3.46 
	AutoFreeze	p=80%	30.96%	48.32%	36.69%	26.56%	35.63%	0.71%	60.37%	7586 	8.03 	17.45 	4.32 
	TimelyFreeze	mfr=0.8	32.46%	48.22%	36.86%	25.83%	35.84%	0.92%	62.39%	9855 	40.34 	22.80 	3.32 
	TimelyFreeze+APF	mfr=0.8, th=0.05	32.33%	48.16%	37.12%	25.09%	35.68%	0.75%	60.91%	9916 	41.21 	22.90 	3.30 
	TimelyFreeze+AutoFreeze	mfr=0.8, p=80%	32.07%	48.14%	37.12%	25.58%	35.73%	0.80%	61.38%	9806 	39.64 	22.77 	3.34 
1F1B	No Freezing		31.23%	49.88%	37.71%	27.54%	36.59%	1.67%	0.00%	7121 	-	16.43 	4.60 
	APF	th=0.01	32.59%	48.37%	37.03%	25.46%	35.86%	0.94%	68.89%	8004 	12.40 	18.43 	4.09 
	AutoFreeze	p=80%	30.69%	47.80%	37.63%	25.34%	35.37%	0.44%	46.79%	7397 	3.87 	17.03 	4.43 
	TimelyFreeze	mfr=0.8	32.27%	49.06%	37.97%	27.54%	36.71%	1.79%	69.30%	8592 	20.66 	19.75 	3.81 
	TimelyFreeze+APF	mfr=0.8, th=0.01	32.43%	49.28%	38.14%	28.27%	37.03%	2.11%	69.53%	8416 	18.18 	19.38 	3.89 
	TimelyFreeze+AutoFreeze	mfr=0.8, p=80%	31.21%	49.22%	37.54%	26.81%	36.20%	1.27%	69.38%	8695 	22.10 	20.00 	3.77 
Interleaved 1F1B	No Freezing		30.44%	49.38%	38.40%	25.95%	36.04%	1.12%	0.00%	6815 	-	15.71 	4.81 
	APF	th=0.03	32.32%	48.21%	36.86%	24.60%	35.50%	0.57%	75.20%	8359 	22.66 	19.26 	3.92 
	AutoFreeze	p=80%	29.29%	48.76%	37.71%	26.07%	35.46%	0.53%	48.17%	7116 	4.42 	16.35 	4.60 
	TimelyFreeze	mfr=0.8	30.64%	48.41%	37.03%	25.95%	35.51%	0.58%	63.65%	8062 	18.29 	18.57 	4.06 
	TimelyFreeze+APF	mfr=0.8, th=0.03	31.42%	48.20%	37.12%	26.56%	35.83%	0.90%	62.10%	8167 	19.84 	18.78 	4.01 
	TimelyFreeze+AutoFreeze	mfr=0.8, p=80%	29.86%	48.43%	36.86%	25.70%	35.21%	0.29%	62.09%	8120 	19.16 	18.68 	4.04 

"""

latex_str = raw_to_latex_table(input_string,
	col_names=["pipeline", "method", "config", "mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	numeric_cols=["mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	columns=["pipeline","method","avg_acc","acc_gain","freeze","throughput","tp_gain","mfu"],
	datarow_cols=["avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu"],
	header=main_result_header(caption),
	footer=main_result_footer(label),
)
print(latex_str)
print(latex_str, file=open(output_file, "w"))

curr_pipe: None, r: {'pipeline': 'Base Pretrained Model', 'method': '', 'avg_acc': 34.93, 'acc_gain': '', 'freeze': '', 'throughput': '', 'tp_gain': '', 'mfu': ''}
curr_pipe: Base Pretrained Model, r: {'pipeline': 'GPipe', 'method': 'No Freezing', 'avg_acc': 36.99, 'acc_gain': 2.06, 'freeze': 0.0, 'throughput': 7022.0, 'tp_gain': '', 'mfu': 16.2}
curr_pipe: GPipe, r: {'pipeline': '1F1B', 'method': 'No Freezing', 'avg_acc': 36.59, 'acc_gain': 1.67, 'freeze': 0.0, 'throughput': 7121.0, 'tp_gain': '', 'mfu': 16.43}
c: freeze values: dict_values([78.97, 60.37, 62.39, 60.91, 61.38]) best: 60.37 second_best: 60.91
c: acc_gain values: dict_values([0.69, 0.71, 0.92, 0.75, 0.8]) best: 0.92 second_best: 0.8
c: tp_gain values: dict_values([34.79, 8.03, 40.34, 41.21, 39.64]) best: 41.21 second_best: 40.34
c: avg_acc values: dict_values([35.62, 35.63, 35.84, 35.68, 35.73]) best: 35.84 second_best: 35.73
c: mfu values: dict_values([21.77, 17.45, 22.8, 22.9, 22.77]) best: 22.9 second_best: 22.8
c: th

## 1B Benchmark

In [10]:

def benchmark_header(caption:str="Some Fancy Caption.") -> str:
    return r"""
\begin{table*}[t]
\renewcommand{\arraystretch}{1}
\centering
\caption{""" + caption + r"""}\resizebox{0.85\linewidth}{!}{
\begin{tabular}{cl|cccc|c}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%% Header %%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\hline
\multirow{3}{*}{\makecell[c]{Pipeline \\Schedule }} &
\multicolumn{1}{c|}{\multirow{3}{*}{\makecell[c]{Freeze \\Method }}} &
\multicolumn{4}{c|}{Benchmark Scores (\%) ↑} &
\multirow{3}{*}{\makecell[c]{Average \\Accuracy↑ }} \\
\cline{3-6}
& & \makecell[c]{MMLU\\[-5pt]\scriptsize{(5 shots)}} &
\makecell[c]{HellaSwag\\[-5pt]\scriptsize{(0 shots)}} &
\makecell[c]{ARC-C\\[-5pt]\scriptsize{(10 shots)}} &
\makecell[c]{TruthfulQA\\[-5pt]\scriptsize{(0 shots)}} \\

"""

def benchmark_footer(label:str="some_fancy_label") -> str:
    return r"""
\end{tabular}}
\label{tab:""" + label.replace(" ", "_") + r"""}
\end{table*}
"""

In [11]:
# Benchmark score
caption = "Detailed Benchmark Scores under different pipeline schedules (LLaMA-1B)."
label = "llama1b_benchmark"
output_file = label + "_table.txt"


latex_str = raw_to_latex_table(input_string,
	col_names=["pipeline", "method", "config", "mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	numeric_cols=["mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	columns=["pipeline","method","mmlu","hella","arc","tqa","avg_acc"],
	datarow_cols=["mmlu", "hella", "arc", "tqa", "avg_acc"],
	header=benchmark_header(caption),
	footer=benchmark_footer(label),
)
print(latex_str)
print(latex_str, file=open(output_file, "w"))

curr_pipe: None, r: {'pipeline': 'Base Pretrained Model', 'method': '', 'mmlu': 31.99, 'hella': 47.75, 'arc': 36.95, 'tqa': 23.01, 'avg_acc': 34.93}
curr_pipe: Base Pretrained Model, r: {'pipeline': 'GPipe', 'method': 'No Freezing', 'mmlu': 32.54, 'hella': 49.28, 'arc': 37.37, 'tqa': 28.76, 'avg_acc': 36.99}
curr_pipe: GPipe, r: {'pipeline': '1F1B', 'method': 'No Freezing', 'mmlu': 31.23, 'hella': 49.88, 'arc': 37.71, 'tqa': 27.54, 'avg_acc': 36.59}
c: arc values: dict_values([37.2, 36.69, 36.86, 37.12, 37.12]) best: 37.2 second_best: 37.12
c: hella values: dict_values([48.27, 48.32, 48.22, 48.16, 48.14]) best: 48.32 second_best: 48.27
c: mmlu values: dict_values([32.27, 30.96, 32.46, 32.33, 32.07]) best: 32.46 second_best: 32.33
c: avg_acc values: dict_values([35.62, 35.63, 35.84, 35.68, 35.73]) best: 35.84 second_best: 35.73
c: tqa values: dict_values([24.72, 26.56, 25.83, 25.09, 25.58]) best: 26.56 second_best: 25.83
pipe: GPipe, latex: ['\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

In [12]:
caption = "Comparison of freezing methods under different pipeline schedules (LLaMA-8B)."
label = "llama8b"
output_file = label + "_table.txt"

input_string = """
"Pipeline 
Schedule"	Freeze Method		Accuracy Preservation							Time Efficiency			
			"MMLU
(5 shots)
 ↑"	"HellaSwag
 ↑"	"ARC challenge
 ↑"	"TruthfulQA
 ↑"	"Average
Acc.↑"	"Accuracy
Gain (%) ↑"	"Avg. Freeze 
Ratio (%) ↓"	"Avg. 
Throughput 
(token/s) ↑"	"Throughput
Gain (%) ↑"	Avg. mfu (%) ↑	"Avg. Batch
Time (s) ↓"
Base Pretrained Model			63.55%	60.00%	51.28%	28.40%	50.81%		-				
GPipe	No Freezing		64.24%	60.43%	56.06%	39.05%	54.95%	4.14%	0.00%	6261 	-	28.30 	2.62 
	APF	th=0.005	63.69%	61.34%	55.12%	37.82%	54.49%	3.69%	70.53%	8360 	33.51 	38.13 	1.96 
	AutoFreeze	p=80%	64.61%	60.86%	54.86%	37.82%	54.54%	3.73%	81.95%	8052 	28.59 	36.72 	2.03 
	TimelyFreeze	mfr=0.7	63.93%	61.06%	54.44%	36.47%	53.98%	3.17%	38.84%	7412 	18.37 	33.58 	2.21 
	TimelyFreeze+APF	mfr=0.7, th=0.005	64.10%	61.00%	53.67%	37.58%	54.09%	3.28%	39.19%	7432 	18.69 	33.67 	2.20 
	TimelyFreeze+AutoFreeze	mfr=0.7, p=80%	64.20%	61.04%	54.01%	37.58%	54.21%	3.40%	43.27%	8440 	34.80 	38.49 	1.94 
1F1B	No Freezing		64.15%	60.49%	55.55%	39.05%	54.81%	4.00%	0.00%	6262 	-	28.31 	2.62 
	APF	th=0.003	64.25%	61.27%	54.78%	36.47%	54.19%	3.39%	69.92%	8344 	33.24 	38.04 	1.96 
	AutoFreeze	p=80%	64.18%	60.70%	54.10%	37.58%	54.14%	3.33%	78.02%	7925 	26.54 	36.14 	2.07 
	TimelyFreeze	mfr=0.7	64.54%	60.56%	55.72%	39.90%	55.18%	4.37%	38.63%	8133 	29.87 	37.00 	2.01 
	TimelyFreeze+APF	mfr=0.7, th=0.003	64.29%	60.91%	55.20%	40.51%	55.23%	4.42%	37.69%	8201 	30.95 	37.31 	2.00 
	TimelyFreeze+AutoFreeze	mfr=0.7, p=80%	64.37%	60.83%	54.44%	37.33%	54.24%	3.44%	38.36%	8213 	31.14 	37.39 	1.99 
Interleaved 1F1B	No Freezing		64.38%	60.35%	55.63%	38.92%	54.82%	4.01%	0.00%	6088 	-	27.92 	2.69 
	APF	th=0.05	64.04%	61.17%	54.86%	36.35%	54.11%	3.30%	74.88%	8350 	37.15 	38.16 	1.96 
	AutoFreeze	p=80%	64.24%	61.00%	54.18%	35.74%	53.79%	2.98%	79.33%	7494 	23.09 	34.54 	2.19 
	TimelyFreeze	mfr=0.7	64.61%	60.90%	54.61%	36.60%	54.18%	3.37%	45.69%	7796 	28.05 	35.89 	2.10 
	TimelyFreeze+APF	mfr=0.7, th=0.01	64.71%	60.91%	54.44%	37.33%	54.35%	3.54%	44.30%	7738 	27.09 	35.16 	2.12 
	TimelyFreeze+AutoFreeze	mfr=0.7, p=80%	64.91%	60.98%	54.10%	37.33%	54.33%	3.52%	44.66%	7797 	28.07 	35.44 	2.10 

"""

latex_str = raw_to_latex_table(input_string,
	col_names=["pipeline", "method", "config", "mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	numeric_cols=["mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	columns=["pipeline","method","avg_acc","acc_gain","freeze","throughput","tp_gain","mfu"],
	header=main_result_header(caption),
	footer=main_result_footer(label),
)
print(latex_str)
print(latex_str, file=open(output_file, "w"))

curr_pipe: None, r: {'pipeline': 'Base Pretrained Model', 'method': '', 'avg_acc': 50.81, 'acc_gain': '', 'freeze': '', 'throughput': '', 'tp_gain': '', 'mfu': ''}
curr_pipe: Base Pretrained Model, r: {'pipeline': 'GPipe', 'method': 'No Freezing', 'avg_acc': 54.95, 'acc_gain': 4.14, 'freeze': 0.0, 'throughput': 6261.0, 'tp_gain': '', 'mfu': 28.3}
curr_pipe: GPipe, r: {'pipeline': '1F1B', 'method': 'No Freezing', 'avg_acc': 54.81, 'acc_gain': 4.0, 'freeze': 0.0, 'throughput': 6262.0, 'tp_gain': '', 'mfu': 28.31}
c: freeze values: dict_values([70.53, 81.95, 38.84, 39.19, 43.27]) best: 38.84 second_best: 39.19
c: acc_gain values: dict_values([3.69, 3.73, 3.17, 3.28, 3.4]) best: 3.73 second_best: 3.69
c: tp_gain values: dict_values([33.51, 28.59, 18.37, 18.69, 34.8]) best: 34.8 second_best: 33.51
c: avg_acc values: dict_values([54.49, 54.54, 53.98, 54.09, 54.21]) best: 54.54 second_best: 54.49
c: mfu values: dict_values([38.13, 36.72, 33.58, 33.67, 38.49]) best: 38.49 second_best: 38.13
c:

In [13]:
# Benchmark score
caption = "Detailed Benchmark Scores under different pipeline schedules (LLaMA-8B)."
label = "llama8b_benchmark"
output_file = label + "_table.txt"

latex_str = raw_to_latex_table(input_string,
	col_names=["pipeline", "method", "config", "mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	numeric_cols=["mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	columns=["pipeline","method","mmlu","hella","arc","tqa","avg_acc"],
	datarow_cols=["mmlu", "hella", "arc", "tqa", "avg_acc"],
	header=benchmark_header(caption),
	footer=benchmark_footer(label),
)
print(latex_str)
print(latex_str, file=open(output_file, "w"))

curr_pipe: None, r: {'pipeline': 'Base Pretrained Model', 'method': '', 'mmlu': 63.55, 'hella': 60.0, 'arc': 51.28, 'tqa': 28.4, 'avg_acc': 50.81}
curr_pipe: Base Pretrained Model, r: {'pipeline': 'GPipe', 'method': 'No Freezing', 'mmlu': 64.24, 'hella': 60.43, 'arc': 56.06, 'tqa': 39.05, 'avg_acc': 54.95}
curr_pipe: GPipe, r: {'pipeline': '1F1B', 'method': 'No Freezing', 'mmlu': 64.15, 'hella': 60.49, 'arc': 55.55, 'tqa': 39.05, 'avg_acc': 54.81}
c: arc values: dict_values([55.12, 54.86, 54.44, 53.67, 54.01]) best: 55.12 second_best: 54.86
c: hella values: dict_values([61.34, 60.86, 61.06, 61.0, 61.04]) best: 61.34 second_best: 61.06
c: mmlu values: dict_values([63.69, 64.61, 63.93, 64.1, 64.2]) best: 64.61 second_best: 64.2
c: avg_acc values: dict_values([54.49, 54.54, 53.98, 54.09, 54.21]) best: 54.54 second_best: 54.49
c: tqa values: dict_values([37.82, 37.82, 36.47, 37.58, 37.58]) best: 37.82 second_best: 37.82
pipe: GPipe, latex: ['\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

In [16]:
caption = "Comparison of freezing methods under different pipeline schedules (LLaMA-1B)."
label = "llama1b"
output_file = label + "_table_1227.txt"

input_string = """
"Pipeline 
Schedule"	Freeze Method		Accuracy Preservation							Time Efficiency			
			"MMLU
(5 shots)
 ↑"	"HellaSwag
 ↑"	"ARC-C
(10 shots)
 ↑"	"TruthfulQA
 ↑"	"Average
Acc.↑"	"Accuracy
Gain (%p) ↑"	"Avg. Freeze 
Ratio (%) ↓"	"Avg. 
Throughput 
(token/s) ↑"	"Throughput
Gain (%) ↑"	Avg. mfu (%) ↑	"Avg. Batch
Time (s) ↓"
Base Pretrained Model			31.99%	47.75%	36.95%	23.01%	34.93%		-				
GPipe	No Freezing		33.07%	49.37%	37.54%	28.40%	37.10%	36.35%	0.00%	6937.35 	-	16.00 	4.72 
	APF	th=0.01	32.79%	48.53%	37.03%	25.21%	35.89%	35.14%	70.35%	8159.60 	17.62 	18.75 	4.02 
	AutoFreeze	p=80%	30.64%	48.80%	38.05%	26.56%	36.01%	35.26%	55.93%	7679.16 	10.69 	17.66 	4.27 
	TimelyFreeze	mfr=0.8	32.13%	49.33%	37.97%	28.27%	36.93%	36.18%	39.86%	8819.64 	27.13 	20.27 	3.72 
	TimelyFreeze+APF	mfr=0.8, th=0.01	31.26%	48.83%	37.37%	26.56%	36.01%	35.26%	40.50%	8818.68 	27.12 	20.31 	3.72 
	TimelyFreeze+AutoFreeze	mfr=0.8, p=80%	30.55%	48.97%	37.46%	26.56%	35.89%	35.14%	36.95%	8799.47 	26.84 	20.27 	3.72 
1F1B	No Freezing		31.51%	49.58%	37.71%	25.95%	36.19%	35.44%	0.00%	7140.05 	-	16.43 	4.59 
	APF	th=0.01	32.52%	48.38%	36.86%	25.09%	35.71%	34.96%	70.03%	8217.45 	15.09 	18.90 	3.99 
	AutoFreeze	p=80%	28.65%	42.07%	37.54%	26.81%	33.77%	33.02%	50.32%	7605.43 	6.52 	17.48 	4.31 
	TimelyFreeze	mfr=0.8	29.80%	49.04%	37.03%	26.56%	35.61%	34.86%	31.14%	9224.38 	29.19 	21.24 	3.55 
	TimelyFreeze+APF	mfr=0.8, th=0.01	28.47%	49.08%	36.77%	26.44%	35.19%	34.44%	31.07%	9146.56 	28.10 	21.06 	3.58 
	TimelyFreeze+AutoFreeze	mfr=0.8, p=80%	26.93%	48.97%	37.46%	25.95%	34.83%	34.08%	31.06%	9176.37 	28.52 	21.15 	3.57 
Interleaved 1F1B	No Freezing		30.64%	49.71%	37.80%	27.05%	36.30%	35.55%	0.00%	6776.70 	-	15.63 	4.84 
	APF	th=0.01	32.64%	48.40%	36.95%	25.21%	35.80%	35.05%	69.05%	7442.39 	9.82 	17.13 	4.40 
	AutoFreeze	p=80%	28.80%	42.12%	37.71%	26.68%	33.83%	33.08%	39.18%	6931.66 	2.29 	15.98 	4.73 
	TimelyFreeze	mfr=0.8	27.13%	48.93%	38.05%	25.83%	34.99%	34.24%	36.63%	8256.26 	21.83 	19.00 	3.97 
	TimelyFreeze+APF	mfr=0.8, th=0.01	28.36%	48.90%	37.88%	25.46%	35.15%	34.40%	38.56%	8138.26 	20.09 	18.73 	4.03 
	TimelyFreeze+AutoFreeze	mfr=0.8, p=80%	30.68%	49.36%	37.88%	26.07%	36.00%	35.25%	38.63%	8245.69 	21.68 	18.98 	3.97 
ZeroBubbleZBV	No Freezing		31.31%	49.79%	38.23%	27.17%	36.63%	35.88%	0.00%	7475.52 	-	17.16 	4.38 
	APF	th=0.01	32.35%	48.20%	37.12%	24.60%	35.57%	34.82%	65.51%	9638.23 	28.93 	22.12 	3.40 
	AutoFreeze	p=80%	28.95%	47.35%	37.63%	26.56%	35.12%	34.37%	26.56%	7928.64 	6.06 	18.17 	4.13 
	TimelyFreeze	mfr=0.8	31.98%	49.38%	38.65%	27.29%	36.83%	36.08%	40.28%	9705.36 	29.83 	22.26 	3.38 
	TimelyFreeze+APF	mfr=0.8, th=0.01	29.49%	49.09%	38.91%	25.21%	35.68%	34.93%	40.15%	9542.33 	27.65 	21.92 	3.43 
	TimelyFreeze+AutoFreeze	mfr=0.8, p=80%	28.26%	48.75%	38.31%	25.46%	35.20%	34.45%	40.22%	9663.72 	29.27 	22.19 	3.39 
 
"""

latex_str = raw_to_latex_table(input_string,
	col_names=["pipeline", "method", "config", "mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	numeric_cols=["mmlu", "hella", "arc", "tqa", "avg_acc", "acc_gain", "freeze", "throughput", "tp_gain", "mfu", "batch"],
	columns=["pipeline","method","avg_acc","acc_gain","freeze","throughput","tp_gain","mfu"],
	header=main_result_header(caption),
	footer=main_result_footer(label),
)
print(latex_str)
print(latex_str, file=open(output_file, "w"))

curr_pipe: None, r: {'pipeline': 'Base Pretrained Model', 'method': '', 'avg_acc': 34.93, 'acc_gain': '', 'freeze': '', 'throughput': '', 'tp_gain': '', 'mfu': ''}
curr_pipe: Base Pretrained Model, r: {'pipeline': 'GPipe', 'method': 'No Freezing', 'avg_acc': 37.1, 'acc_gain': 36.35, 'freeze': 0.0, 'throughput': 6937.35, 'tp_gain': '', 'mfu': 16.0}
curr_pipe: GPipe, r: {'pipeline': '1F1B', 'method': 'No Freezing', 'avg_acc': 36.19, 'acc_gain': 35.44, 'freeze': 0.0, 'throughput': 7140.05, 'tp_gain': '', 'mfu': 16.43}
c: freeze values: dict_values([70.35, 55.93, 39.86, 40.5, 36.95]) best: 36.95 second_best: 39.86
c: acc_gain values: dict_values([35.14, 35.26, 36.18, 35.26, 35.14]) best: 36.18 second_best: 35.26
c: tp_gain values: dict_values([17.62, 10.69, 27.13, 27.12, 26.84]) best: 27.13 second_best: 27.12
c: avg_acc values: dict_values([35.89, 36.01, 36.93, 36.01, 35.89]) best: 36.93 second_best: 36.01
c: mfu values: dict_values([18.75, 17.66, 20.27, 20.31, 20.27]) best: 20.31 second_b