In [17]:
import os
import json

# --- Modify these variables directly to change inputs ---
evaluation_llm_dir = "evaluations_llm"
evaluation_rouge_dir = "evaluations_rouge"
exp_name = "seq-1B"
communication_rounds = "1"

# --- Domain mapping for both functions ---
domains = {
    'BS': 'brainstorming',
    'CL': 'classification',
    'CQ': 'closed_qa',
    'CW': 'creative_writing',
    'GQ': 'general_qa',
    'IE': 'information_extraction',
    'OQ': 'open_qa',
    'SM': 'summarization'
}

In [18]:
def compute_statistics(results):
    """Compute row/column averages and overall average"""
    n = len(results)
    
    # Row averages
    row_avgs = []
    for row in results:
        vals = [v for v in row if v is not None]
        row_avgs.append(sum(vals)/len(vals) if vals else None)
    
    # Column averages
    col_avgs = []
    for j in range(n):
        vals = [results[i][j] for i in range(n) if results[i][j] is not None]
        col_avgs.append(sum(vals)/len(vals) if vals else None)
    
    # Overall average
    overall_avg = sum(a for a in row_avgs if a is not None) / len([a for a in row_avgs if a is not None])
    
    return row_avgs, col_avgs, overall_avg

def generate_latex_table_llm(evaluation_dir, exp_name, communication_rounds):
    """Generate LaTeX table for LLM evaluation (min is better)"""
    n = len(domains)
    
    # Load results into an n×n matrix
    results = [[None]*n for _ in range(n)]
    base_path = os.path.join(evaluation_dir, exp_name, communication_rounds)
    for i in range(n):
        filepath = os.path.join(base_path, f"client_{i}_results.json")
        try:
            data = json.load(open(filepath))
            for j, abbr in enumerate(domains):
                key = domains[abbr]
                if key in data['categories']:
                    results[i][j] = data['categories'][key]['ratio']
        except Exception as e:
            print(f"Warning for client {i}: {e}")
    
    # Compute statistics
    row_avgs, col_avgs, overall_avg = compute_statistics(results)
    
    # Identify minimum values for bolding
    col_min = [min(results[i][j] or float('inf') for i in range(n) if results[i][j] is not None) for j in range(n)]
    min_row_avg = min(a or float('inf') for a in row_avgs if a is not None)
    
    # Build LaTeX table
    lines = []
    lines.append(r"\begin{tabular}{l l|cccccccc|c}")
    lines.append(r"\hline")
    lines.append(r"\textbf{Method} & \textbf{Training} & \multicolumn{8}{c|}{\textbf{Test Domain}} & \textbf{Avg.} \\")
    lines.append(r" & \textbf{Domain} & \textbf{BS} & \textbf{CL} & \textbf{CQ} & \textbf{CW} & \textbf{GQ} & \textbf{IE} & \textbf{OQ} & \textbf{SM} & \\")
    lines.append(r"\hline")
    lines.append(r"\multirow{%d}{*}{Individual}" % n)
    
    # Data rows
    for i in range(n):
        row = ["", list(domains.keys())[i]]
        for j in range(n):
            v = results[i][j]
            if v is None:
                cell = "--"
            else:
                text = f"{v:.3f}"
                if v == col_min[j]:
                    text = r"\textbf{" + text + "}"
                cell = text
            row.append(cell)
        
        avg = row_avgs[i]
        avg_text = f"{avg:.3f}" if avg is not None else "--"
        if avg == min_row_avg:
            avg_text = r"\textbf{" + avg_text + "}"
        row.append(avg_text)
        lines.append(" & ".join(row) + r" \\")
    
    # Column-average row
    lines.append(r"\hline")
    avg_cells = [f"{v:.3f}" for v in (col_avgs + [overall_avg])]
    lines.append(
        r"\multicolumn{2}{c|}{\textbf{Average}} & "
        + " & ".join(avg_cells)
        + r" \\"
    )
    
    lines.append(r"\hline")
    lines.append(r"\end{tabular}")
    return "\n".join(lines)

def generate_latex_table_rouge(evaluation_dir, exp_name, communication_rounds, rouge_metric="rougeL"):
    """Generate LaTeX table for ROUGE evaluation (max is better)"""
    n = len(domains)
    
    # Load results into an n×n matrix
    results = [[None]*n for _ in range(n)]
    base_path = os.path.join(evaluation_dir, exp_name, communication_rounds)
    
    for i in range(n):
        filepath = os.path.join(base_path, f"client_{i}_output_rouge.json")
        try:
            data = json.load(open(filepath))
            for j, abbr in enumerate(domains):
                key = domains[abbr]
                if key in data:
                    results[i][j] = data[key][rouge_metric]
        except Exception as e:
            print(f"Warning for client {i}: {e}")
    
    # Compute statistics
    row_avgs, col_avgs, overall_avg = compute_statistics(results)
    
    # Identify maximum values for bolding
    col_max = [max(results[i][j] or 0 for i in range(n) if results[i][j] is not None) for j in range(n)]
    max_row_avg = max(a or 0 for a in row_avgs if a is not None)
    
    # Build LaTeX table
    lines = []
    lines.append(r"\begin{tabular}{l l|cccccccc|c}")
    lines.append(r"\hline")
    lines.append(r"\textbf{Method} & \textbf{Training} & \multicolumn{8}{c|}{\textbf{Test Domain}} & \textbf{Avg.} \\")
    lines.append(r" & \textbf{Domain} & \textbf{BS} & \textbf{CL} & \textbf{CQ} & \textbf{CW} & \textbf{GQ} & \textbf{IE} & \textbf{OQ} & \textbf{SM} & \\")
    lines.append(r"\hline")
    lines.append(r"\multirow{%d}{*}{Individual}" % n)
    
    # Data rows
    for i in range(n):
        row = ["", list(domains.keys())[i]]
        for j in range(n):
            v = results[i][j]
            if v is None:
                cell = "--"
            else:
                text = f"{v:.3f}"
                if v == col_max[j]:  # Bold the maximum value in each column
                    text = r"\textbf{" + text + "}"
                cell = text
            row.append(cell)
        
        avg = row_avgs[i]
        avg_text = f"{avg:.3f}" if avg is not None else "--"
        if avg == max_row_avg:  # Bold the maximum row average
            avg_text = r"\textbf{" + avg_text + "}"
        row.append(avg_text)
        lines.append(" & ".join(row) + r" \\")
    
    # Column-average row
    lines.append(r"\hline")
    avg_cells = [f"{v:.3f}" for v in (col_avgs + [overall_avg])]
    lines.append(
        r"\multicolumn{2}{c|}{\textbf{Average}} & "
        + " & ".join(avg_cells)
        + r" \\"
    )
    
    lines.append(r"\hline")
    lines.append(r"\end{tabular}")
    return "\n".join(lines)

In [19]:
# Generate and print tables
latex_code_llm = generate_latex_table_llm(evaluation_llm_dir, exp_name, communication_rounds)
print(latex_code_llm)
print("\n")

latex_code_rouge = generate_latex_table_rouge(evaluation_rouge_dir, exp_name, communication_rounds)
print(latex_code_rouge)



\begin{tabular}{l l|cccccccc|c}
\hline
\textbf{Method} & \textbf{Training} & \multicolumn{8}{c|}{\textbf{Test Domain}} & \textbf{Avg.} \\
 & \textbf{Domain} & \textbf{BS} & \textbf{CL} & \textbf{CQ} & \textbf{CW} & \textbf{GQ} & \textbf{IE} & \textbf{OQ} & \textbf{SM} & \\
\hline
\multirow{8}{*}{Individual}
 & BS & 0.136 & 0.347 & 0.242 & 0.127 & 0.159 & 0.233 & 0.196 & 0.247 & 0.211 \\
 & CL & 0.142 & 0.465 & 0.295 & 0.121 & 0.159 & 0.276 & 0.220 & 0.273 & 0.244 \\
 & CQ & 0.129 & \textbf{0.467} & \textbf{0.357} & 0.112 & 0.139 & 0.310 & \textbf{0.227} & 0.264 & 0.250 \\
 & CW & 0.146 & 0.326 & 0.296 & \textbf{0.142} & 0.172 & 0.275 & 0.204 & 0.282 & 0.230 \\
 & GQ & 0.147 & 0.293 & 0.293 & 0.131 & \textbf{0.173} & 0.255 & 0.207 & 0.273 & 0.221 \\
 & IE & 0.142 & 0.411 & 0.313 & 0.119 & 0.157 & \textbf{0.330} & 0.216 & 0.239 & 0.241 \\
 & OQ & 0.151 & 0.412 & 0.283 & 0.124 & 0.161 & 0.329 & 0.213 & 0.221 & 0.237 \\
 & SM & \textbf{0.152} & 0.371 & 0.318 & 0.135 & 0.170 & 0.312 & 0.2