# Benchmark Metric Table
- Default Reward Settings, across different Algorithm (3 dataset): 3 Tables
- Different Reward Settings (3 dataset * 7 algo): 21 Tables

# Benchmark Metric Table

In [None]:
# "models/training_log/with_action_penalty_with_action_stable_penalty_weaning_reward_10_lr_1e-5"
import os
import sys
os.listdir("../models/training_log")

In [None]:
import pandas as pd
import os
from collections import defaultdict

preprocess_prefix = "preprocess_aggregate_update_all_use_unseen_"

# Define experiment setup
EXP_FOLDER_PREFIX_list = [
    f'{preprocess_prefix}without_action_penalty_with_action_stable_penalty_weaning_reward_10_lr_1e-5',
    f'{preprocess_prefix}without_action_penalty_without_action_stable_penalty_weaning_reward_10_lr_1e-5',
    f'{preprocess_prefix}without_internal_reward_weaning_reward_10_lr_1e-5',
    f'{preprocess_prefix}without_action_penalty_with_action_stable_penalty_weaning_reward_100_lr_1e-5',
    f'{preprocess_prefix}with_internal_reward_without_weaning_reward_lr_1e-5',
    # f'{preprocess_prefix}without_action_penalty_with_action_stable_penalty_weaning_reward_10_lr_1e-6',
    # f'{preprocess_prefix}BC_1e-6_other_1e-5'
]

FOLDER_NAME_MAPPING = {
    f'{preprocess_prefix}without_action_penalty_with_action_stable_penalty_weaning_reward_10_lr_1e-5': 'default',
    f'{preprocess_prefix}without_action_penalty_without_action_stable_penalty_weaning_reward_10_lr_1e-5': 'no_action_stable_penalty',
    f'{preprocess_prefix}without_internal_reward_weaning_reward_10_lr_1e-5': 'no_internal_reward',
    f'{preprocess_prefix}without_action_penalty_with_action_stable_penalty_weaning_reward_100_lr_1e-5': 'large_extub_reward',
    f'{preprocess_prefix}with_internal_reward_without_weaning_reward_lr_1e-5': 'no_extub_reward',
    f'{preprocess_prefix}without_action_penalty_with_action_stable_penalty_weaning_reward_10_lr_1e-6': 'small_lr', 
    f'{preprocess_prefix}BC_1e-6_other_1e-5': 'BC_1e-6_other_1e-5'
}

algo_list = ["DiscreteBC", "NFQ", "DQN", "DoubleDQN", "DiscreteSAC", "DiscreteBCQ", "DiscreteCQL"]
data_source_list = ["train", "test", "eICU"]
reward_types = ["small_reward", "median_reward", "large_reward"]

# Add mapping for reward types to severity labels for display
REWARD_TYPE_DISPLAY_MAPPING = {
    "small_reward": "high_severity",
    "median_reward": "median_severity", 
    "large_reward": "low_severity"
}

# Metrics to extract
# metric_names = [
#     ("mean_reward", "Total Cumulative Reward"),
#     ("meet_weaning_percentage", "Extubation Success Rate (\%)"),
#     ("mean_traj_len", "Avg. Episode Length (hrs)"),
#     ("mean_traj_len_to_meet_weaning", "Avg. Time to Meet (hrs)"),
#     ("action_diversity", "Action Diversity"),
#     ("mean_nonsense_actions", "Anomalous Actions (\%)")
# ]

metric_names = [
    ("mean_reward", "Total Cumulative Reward"),
    ("meet_weaning_percentage", "Extubation Meet Rate (\%)"),
    ("mean_traj_len", "Avg. Trajectory Length (hrs)"),
    ("mean_traj_len_to_meet_weaning", "Avg. Time to Meet (hrs)"),
    ("action_diversity", "Action Diversity"),
    ("mean_nonsense_actions", "Anomalous Actions (\%)")
]

def extract_best_epoch(df, reward_category):
    subset = df[df['category'] == reward_category]
    if subset.empty:
        return None
    return subset.loc[subset['mean_reward'].idxmax()]

def build_table_for_algo(EXP_FOLDER, data_source):
    result = {rtype: defaultdict(dict) for rtype in reward_types}
    for algo in algo_list:
        csv_path = f'../models/training_log/{EXP_FOLDER}/rewards_summary_{algo}_{data_source}.csv'
        if not os.path.exists(csv_path):
            continue
        df = pd.read_csv(csv_path)
        for rtype in reward_types:
            best = extract_best_epoch(df, rtype)
            for key, label in metric_names:
                if best is not None:
                    val = best[key]
                    val = round(val * 100, 2) if "%" in label else round(val, 2)
                else:
                    val = "N/A"
                result[rtype][label][algo] = val
    return result


def build_table_for_algo_with_physician_policy(EXP_FOLDER, data_source):
    result = {rtype: defaultdict(dict) for rtype in reward_types}

    # Include RL algorithms
    for algo in algo_list + ['naive_agent']:
        if algo == 'naive_agent':
            csv_path = f'../models/training_log/naive_agent/rewards_summary_naive_agent_{data_source}.csv'
        else:
            csv_path = f'../models/training_log/{EXP_FOLDER}/rewards_summary_{algo}_{data_source}.csv'

        if not os.path.exists(csv_path):
            continue

        df = pd.read_csv(csv_path)

        for rtype in reward_types:
            if algo == 'naive_agent':
                best = df[df['category'] == rtype]
                best = best.iloc[0] if not best.empty else None
            else:
                best = extract_best_epoch(df, rtype)

            for key, label in metric_names:
                if best is not None:
                    val = best[key]
                    val = round(val * 100, 2) if "%" in label else round(val, 2)
                else:
                    val = "N/A"
                result[rtype][label][algo] = val

    return result

def build_table_for_reward_design(algo, data_source):
    result = {rtype: defaultdict(dict) for rtype in reward_types}
    for EXP_FOLDER in EXP_FOLDER_PREFIX_list:
        csv_path = f'../models/training_log/{EXP_FOLDER}/rewards_summary_{algo}_{data_source}.csv'
        if not os.path.exists(csv_path):
            continue
        df = pd.read_csv(csv_path)
        for rtype in reward_types:
            best = extract_best_epoch(df, rtype)
            for key, label in metric_names:
                if best is not None:
                    val = best[key]
                    val = round(val * 100, 2) if "%" in label else round(val, 2)
                else:
                    val = "N/A"
                result[rtype][label][EXP_FOLDER] = val
    return result

def escape_latex(text):
    """
    Escape underscores for LaTeX compatibility.
    """
    return text.replace('_', '\\_')

def print_table_latex(table_dict, columns, caption_suffix=""):
    for rtype in reward_types:
        severity_display = REWARD_TYPE_DISPLAY_MAPPING[rtype].replace('_', ' ').title()
        
        print("\\begin{table}[htbp]")
        print("\\centering")
        print(f"\\caption{{Performance Metrics ({severity_display.replace('_', ' ').title()}) {caption_suffix}}}")
        print("\\small")
        print("\\resizebox{\\textwidth}{!}{%")  # Start resizebox
        print("\\begin{tabular}{" + "l" + "c" * len(columns) + "}")
        print("\\toprule")

        # Header row
        header_cells = ["\\textbf{Metric}"]
        for col in columns:
            if col == "naive_agent":
                escaped_label = "Physician Policy"
            else:
                short_label = FOLDER_NAME_MAPPING.get(col, col)
                escaped_label = escape_latex(short_label)
            header_cells.append(f"\\texttt{{{escaped_label}}}")
        print(" & ".join(header_cells) + " \\\\")

        print("\\midrule")

        # Data rows
        for key, label in metric_names:
            row = [label]
            for col in columns:
                val = table_dict[rtype][label].get(col, "N/A")
                row.append(str(val))
            print(" & ".join(row) + " \\\\")

        print("\\bottomrule")
        print("\\end{tabular}")
        print("}%")  # End resizebox
        safe_label = caption_suffix.replace(' ', '_').replace('(', '').replace(')', '').replace('%', '')
        print(f"\\label{{tab:rl_metrics_{rtype}_{safe_label}}}")
        print("\\end{table}")
        print()

# def print_table_latex(table_dict, columns, caption_suffix=""):
#     for rtype in reward_types:
#         # Convert reward type to severity display name
#         severity_display = REWARD_TYPE_DISPLAY_MAPPING[rtype].replace('_', ' ').title()
        
#         print("\\begin{table}[htbp]")
#         print("\\centering")
#         print(f"\\caption{{Performance Metrics ({severity_display}) {caption_suffix}}}")
#         print("\\small")
#         print("\\resizebox{\\textwidth}{!}{%")  # Start resizebox
#         print("\\begin{tabular}{" + "l" + "c" * len(columns) + "}")
#         print("\\toprule")

#         # Header row
#         header_cells = ["\\textbf{Metric}"]
#         for col in columns:
#             short_label = FOLDER_NAME_MAPPING.get(col, col)
#             escaped_label = escape_latex(short_label)
#             header_cells.append(f"\\texttt{{{escaped_label}}}")
#         print(" & ".join(header_cells) + " \\\\")

#         print("\\midrule")

#         # Data rows
#         for key, label in metric_names:
#             row = [label]
#             for col in columns:
#                 val = table_dict[rtype][label].get(col, "N/A")
#                 row.append(str(val))
#             print(" & ".join(row) + " \\\\")

#         print("\\bottomrule")
#         print("\\end{tabular}")
#         print("}%")  # End resizebox
        
#         # Use severity name for label
#         severity_label = REWARD_TYPE_DISPLAY_MAPPING[rtype]
#         safe_label = caption_suffix.replace(' ', '_').replace('(', '').replace(')', '').replace('%', '')
#         print(f"\\label{{tab:rl_metrics_{severity_label}_{safe_label}}}")
#         print("\\end{table}")
#         print()

In [None]:
## Default Reward Settings with Different Algo
# 1. Compare algorithms within a reward setting
exp_setting = EXP_FOLDER_PREFIX_list[0]  # "default" setting
data_source = "eICU"
table_dict = build_table_for_algo(exp_setting, data_source)
print_table_latex(table_dict, algo_list, caption_suffix=f"for {data_source}")
# print_table_latex(table_dict, algo_list, caption_suffix=f"for {data_source} ({exp_setting})")

In [None]:
# 1. Compare algorithms within a reward setting
exp_setting = EXP_FOLDER_PREFIX_list[0]  # "default" setting
for data_source in data_source_list:
    table_dict = build_table_for_algo(exp_setting, data_source)
    print_table_latex(table_dict, algo_list, caption_suffix=f"for {data_source}")
    # print_table_latex(table_dict, algo_list, caption_suffix=f"for {data_source} ({exp_setting})")

With Pyhsician Policy

In [None]:
algo_list_with_physician_policy = ['naive_agent'] +  algo_list
exp_setting = EXP_FOLDER_PREFIX_list[0]  # "default" setting
for data_source in data_source_list:
    table = build_table_for_algo_with_physician_policy(EXP_FOLDER=exp_setting, data_source=data_source)
    print_table_latex(table, algo_list_with_physician_policy, caption_suffix=f"for {data_source}")

## Merge Severity for Default Reward Settings with Different Algo (with Physician Policy)

In [None]:
def build_table_for_algo_with_physician_policy_severity_merged(EXP_FOLDER, data_source):
    """
    Build a merged table that combines all severity categories for algorithm comparison including physician policy.
    Returns a single table with metrics for all severity levels.
    """
    result = defaultdict(dict)
    
    # Include RL algorithms + naive_agent
    for algo in algo_list + ['naive_agent']:
        if algo == 'naive_agent':
            csv_path = f'../models/training_log/naive_agent/rewards_summary_naive_agent_{data_source}.csv'
        else:
            csv_path = f'../models/training_log/{EXP_FOLDER}/rewards_summary_{algo}_{data_source}.csv'
        
        if not os.path.exists(csv_path):
            continue
        
        df = pd.read_csv(csv_path)
        
        for rtype in reward_types:
            if algo == 'naive_agent':
                best = df[df['category'] == rtype]
                best = best.iloc[0] if not best.empty else None
            else:
                best = extract_best_epoch(df, rtype)
            
            severity_display = REWARD_TYPE_DISPLAY_MAPPING[rtype].replace('_', ' ').title()
            
            for key, label in metric_names:
                # Create a combined metric name with severity level
                combined_metric = f"{label} ({severity_display})"
                
                if best is not None:
                    val = best[key]
                    val = round(val * 100, 2) if "%" in label else round(val, 2)
                else:
                    val = "N/A"
                result[combined_metric][algo] = val
    
    return result

def print_compact_merged_table_latex_with_physician(table_dict, columns, exp_folder, data_source):
    """
    Print a compact merged LaTeX table with severity as columns and physician policy included.
    """
    print("\\begin{table}[htbp]")
    print("\\centering")
    print(f"\\caption{{Algorithm Comparison Across All Severity Categories for {data_source}}}")
    print("\\small")
    print("\\resizebox{\\textwidth}{!}{%")
    
    # Create a structure where each metric has columns for each severity and each algorithm
    num_cols = len(columns) * len(REWARD_TYPE_DISPLAY_MAPPING)
    print("\\begin{tabular}{l" + "c" * num_cols + "}")
    print("\\toprule")
    
    # Multi-level header
    header_row1 = ["\\textbf{Metric}"]
    header_row2 = [""]
    
    for col in columns:
        if col == "naive_agent":
            escaped_col = "Physician Policy"
        else:
            escaped_col = escape_latex(col)
        header_row1.extend([f"\\multicolumn{{3}}{{c}}{{\\texttt{{{escaped_col}}}}}", "", ""])
        header_row2.extend(["\\textbf{High}", "\\textbf{Med}", "\\textbf{Low}"])
    
    # Remove extra empty strings
    header_row1 = [h for h in header_row1 if h != ""]
    
    print(" & ".join(header_row1) + " \\\\")
    print(" & ".join(header_row2) + " \\\\")
    print("\\midrule")
    
    # Data rows
    base_metrics = [label for key, label in metric_names]
    
    for base_metric in base_metrics:
        row = [base_metric]
        
        # For each algorithm (column)
        for col in columns:
            # For each severity category within this algorithm column
            for severity_key in ["high_severity", "median_severity", "low_severity"]:
                severity_display = severity_key.replace('_', ' ').title()
                combined_metric = f"{base_metric} ({severity_display})"
                
                # Get the value for this specific algorithm and severity
                val = table_dict.get(combined_metric, {}).get(col, "N/A")
                row.append(str(val))
        
        print(" & ".join(row) + " \\\\")
    
    print("\\bottomrule")
    print("\\end{tabular}")
    print("}%")
    
    exp_folder_safe = escape_latex(exp_folder)
    print(f"\\label{{tab:algo_comparison_with_physician_{exp_folder_safe}_{data_source}}}")
    print("\\end{table}")
    print()

# Usage function
def generate_compact_merged_tables_with_physician():
    """Generate only compact merged algorithm comparison tables with physician policy."""
    exp_setting = EXP_FOLDER_PREFIX_list[0]  # Use default setting
    columns = ['naive_agent'] + algo_list
    
    for data_source in data_source_list:
        table_dict = build_table_for_algo_with_physician_policy_severity_merged(exp_setting, data_source)
        print(f"% Compact algorithm comparison table with physician for {exp_setting} on {data_source}")
        print_compact_merged_table_latex_with_physician(table_dict, columns, exp_setting, data_source)


In [None]:
# To use it:
# [Part 1] Default reward settings with different algo
generate_compact_merged_tables_with_physician()

## Highlight and Merge Severity for Default Reward Settings with Different Algo

In [None]:
# METRIC_OPTIMIZATION = {
#     "Total Cumulative Reward": "higher",
#     "Extubation Success Rate (%)": "higher",
#     "Avg. Episode Length (hrs)": "lower",
#     "Avg. Time to Meet (hrs)": "lower",
#     "Action Diversity": "higher",
#     "Anomalous Actions (%)": "lower"
# }
METRIC_OPTIMIZATION = {
    "Total Cumulative Reward": "higher",
    "Extubation Meet Rate (\%)": "higher",
    "Avg. Trajectory Length (hrs)": "lower",
    "Avg. Time to Meet (hrs)": "lower",
    "Action Diversity": "higher",
    "Anomalous Actions (\%)": "lower"
}

def build_table_for_algo_merged_with_physician_highlight(EXP_FOLDER, data_source):
    result = defaultdict(dict)
    for algo in algo_list + ['naive_agent']:
        if algo == 'naive_agent':
            csv_path = f'../models/training_log/naive_agent/rewards_summary_naive_agent_{data_source}.csv'
        else:
            csv_path = f'../models/training_log/{EXP_FOLDER}/rewards_summary_{algo}_{data_source}.csv'
        if not os.path.exists(csv_path):
            continue
        df = pd.read_csv(csv_path)
        for rtype in reward_types:
            best = None
            if algo == 'naive_agent':
                subset = df[df['category'] == rtype]
                if not subset.empty:
                    best = subset.iloc[0]
            else:
                best = extract_best_epoch(df, rtype)
            sev_disp = REWARD_TYPE_DISPLAY_MAPPING[rtype].replace('_',' ').title()
            for key, label in metric_names:
                combined = f"{label} ({sev_disp})"
                if best is not None:
                    val = best[key]
                    val = round(val*100,2) if "%" in label else round(val,2)
                else:
                    val = "N/A"
                result[combined][algo] = val
    return result

def _to_num(x):
    """Try to coerce to float; return None if not possible."""
    try:
        return float(x)
    except:
        return None

def compute_best_per_metric(table_dict):
    best_vals = {sev: {} for sev in ["High Severity","Median Severity","Low Severity"]}
    for combined, algo_vals in table_dict.items():
        metric, sev = combined.rsplit(" (",1)
        sev = sev[:-1]
        direction = METRIC_OPTIMIZATION.get(metric)
        if not direction:
            continue
        nums = [_to_num(v) for v in algo_vals.values()]
        nums = [v for v in nums if v is not None]
        if not nums:
            continue
        best = max(nums) if direction=="higher" else min(nums)
        best_vals[sev][metric] = best
    return best_vals

def is_best(raw, best):
    """Return True if raw equals best after float coercion."""
    r = _to_num(raw)
    # print(f"debug message: raw={raw}, best={best}, r={r}")
    # print(f"type of r: {type(r)}, type of best: {type(best)}")
    return (r is not None) and (best is not None) and (r == best)

def print_compact_merged_table_latex_with_physician_highlighted(table_dict, columns, exp_folder, data_source):
    """
    Print a compact merged LaTeX table with severity as columns and physician policy included.
    Best values are highlighted in bold for each severity category within each metric row.
    """
    best_vals = compute_best_per_metric(table_dict)

    print("\\begin{table}[htbp]")
    print("\\centering")
    print(f"\\caption{{Algorithm Comparison Across All Severity Categories for {data_source}}}")
    print("\\small")
    print("\\resizebox{\\textwidth}{!}{%")
    num_cols = len(columns)*3
    print("\\begin{tabular}{l" + "c"*num_cols + "}")
    print("\\toprule")

    # header
    hdr1 = ["\\textbf{Metric}"]
    hdr2 = [""]
    for col in columns:
        name = "Physician Policy" if col=="naive_agent" else escape_latex(col)
        hdr1.append(f"\\multicolumn{{3}}{{c}}{{\\texttt{{{name}}}}}")
        hdr2 += ["\\textbf{High}", "\\textbf{Med}", "\\textbf{Low}"]
    print(" & ".join(hdr1) + " \\\\")
    print(" & ".join(hdr2) + " \\\\")
    print("\\midrule")

    base_metrics = [label for _,label in metric_names]
    for metric in base_metrics:
        row = [metric]
        for col in columns:
            for sev_key in ["high_severity","median_severity","low_severity"]:
                sev_disp = sev_key.replace("_"," ").title()
                combined = f"{metric} ({sev_disp})"
                raw = table_dict.get(combined,{}).get(col,"N/A")
                if is_best(raw, best_vals[sev_disp].get(metric)):
                    cell = f"\\textbf{{{raw}}}"
                else:
                    cell = str(raw)
                row.append(cell)
        print(" & ".join(row) + " \\\\")
    print("\\bottomrule")
    print("\\end{tabular}")
    print("}%")
    safe = escape_latex(exp_folder)
    print(f"\\label{{tab:algo_comparison_with_physician_{safe}_{data_source}}}")
    print("\\end{table}\n")

def print_vertical_merged_table_latex_with_physician_highlighted(table_dict, columns, exp_folder, data_source):
    """
    Print a vertical merged LaTeX table with severity sections and physician policy included.
    Best values are highlighted in bold within each severity section.
    """
    best_vals = compute_best_per_metric(table_dict)

    print("\\begin{table}[htbp]")
    print("\\centering")
    print(f"\\caption{{Algorithm Performance Across All Severity Categories for {data_source}}}")
    print("\\small")
    print("\\resizebox{\\textwidth}{!}{%")
    print("\\begin{tabular}{l" + "c"*len(columns) + "}")
    print("\\toprule")

    header = ["\\textbf{Metric}"] + [
        f"\\texttt{{'Physician Policy' if c=='naive_agent' else escape_latex(c)}}"
        for c in columns
    ]
    print(" & ".join(header) + " \\\\\n\\midrule")

    for sev in ["High Severity","Median Severity","Low Severity"]:
        print(f"\\multicolumn{{{len(columns)+1}}}{{c}}{{\\textbf{{{sev}}}}} \\\\\n\\midrule")
        for combined in sorted(table_dict.keys()):
            if combined.endswith(f"({sev})"):
                metric = combined.replace(f" ({sev})","")
                row = [metric]
                for col in columns:
                    raw = table_dict[combined].get(col,"N/A")
                    if is_best(raw, best_vals[sev].get(metric)):
                        cell = f"\\textbf{{{raw}}}"
                    else:
                        cell = str(raw)
                    row.append(cell)
                print(" & ".join(row) + " \\\\")
        if sev!="Low Severity":
            print("\\addlinespace")
    print("\\bottomrule")
    print("\\end{tabular}")
    print("}%")
    safe = escape_latex(exp_folder)
    print(f"\\label{{tab:vertical_algo_comparison_with_physician_{safe}_{data_source}}}")
    print("\\end{table}\n")

# Updated usage functions with highlighting
def generate_compact_merged_tables_with_physician_highlighted():
    """Generate compact merged algorithm comparison tables with physician policy and highlighting."""
    exp_setting = EXP_FOLDER_PREFIX_list[0]
    cols = ['naive_agent'] + algo_list
    for ds in data_source_list:
        tbl = build_table_for_algo_merged_with_physician_highlight(exp_setting, ds)
        print(f"% Compact algorithm comparison table with physician and highlighting for {exp_setting} on {ds}")
        print_compact_merged_table_latex_with_physician_highlighted(tbl, cols, exp_setting, ds)

def generate_vertical_merged_tables_with_physician_highlighted():
    """Generate vertical merged algorithm comparison tables with physician policy and highlighting."""
    exp_setting = EXP_FOLDER_PREFIX_list[0]
    cols = ['naive_agent'] + algo_list
    for ds in data_source_list:
        tbl = build_table_for_algo_merged_with_physician_highlight(exp_setting, ds)
        print(f"% Vertical algorithm comparison table with physician and highlighting for {exp_setting} on {ds}")
        print_vertical_merged_table_latex_with_physician_highlighted(tbl, cols, exp_setting, ds)

def generate_both_merged_tables_with_physician_highlighted():
    """Generate both compact and vertical merged tables with physician policy and highlighting."""
    exp_setting = EXP_FOLDER_PREFIX_list[0]
    cols = ['naive_agent'] + algo_list
    for ds in data_source_list:
        tbl = build_table_for_algo_merged_with_physician_highlight(exp_setting, ds)
        
        # Compact version
        print(f"% Compact algorithm comparison table with physician and highlighting for {exp_setting} on {ds}")
        print_compact_merged_table_latex_with_physician_highlighted(tbl, cols, exp_setting, ds)
        
        # Vertical version
        print(f"% Vertical algorithm comparison table with physician and highlighting for {exp_setting} on {ds}")
        print_vertical_merged_table_latex_with_physician_highlighted(tbl, cols, exp_setting, ds)

In [None]:
# Usage examples:
# For compact tables only:
# [Part 1] With Highlighted Best Values
generate_compact_merged_tables_with_physician_highlighted()

# For vertical tables only:
# generate_vertical_merged_tables_with_physician_highlighted()

# For both layouts:
# generate_both_merged_tables_with_physician_highlighted()

## Merge severity for Different Reward Settings

In [None]:
def build_table_for_reward_design_severity_merged(algo, data_source):
    """
    Build a merged table that combines all severity categories for reward design comparison for a specific algorithm.
    Returns a single table with metrics for all severity levels across different reward designs.
    """
    result = defaultdict(dict)
    
    for EXP_FOLDER in EXP_FOLDER_PREFIX_list:
        csv_path = f'../models/training_log/{EXP_FOLDER}/rewards_summary_{algo}_{data_source}.csv'
        if not os.path.exists(csv_path):
            continue
        df = pd.read_csv(csv_path)
        
        for rtype in reward_types:
            best = extract_best_epoch(df, rtype)
            severity_display = REWARD_TYPE_DISPLAY_MAPPING[rtype].replace('_', ' ').title()
            
            for key, label in metric_names:
                # Create a combined metric name with severity level
                combined_metric = f"{label} ({severity_display})"
                
                if best is not None:
                    val = best[key]
                    val = round(val * 100, 2) if "%" in label else round(val, 2)
                else:
                    val = "N/A"
                result[combined_metric][EXP_FOLDER] = val
    
    return result

def print_compact_reward_design_table_latex(table_dict, columns, algo, data_source):
    """
    Print a compact merged LaTeX table with severity as columns for reward design comparison.
    """
    print("\\begin{table}[htbp]")
    print("\\centering")
    print(f"\\caption{{Reward Design Comparison Across All Severity Categories for {data_source} ({algo})}}")
    print("\\small")
    print("\\resizebox{\\textwidth}{!}{%")
    
    # Create a structure where each metric has columns for each severity and each reward design
    num_cols = len(columns) * len(REWARD_TYPE_DISPLAY_MAPPING)
    print("\\begin{tabular}{l" + "c" * num_cols + "}")
    print("\\toprule")
    
    # Multi-level header
    header_row1 = ["\\textbf{Metric}"]
    header_row2 = [""]
    
    for col in columns:
        short_label = FOLDER_NAME_MAPPING.get(col, col)
        escaped_label = escape_latex(short_label)
        header_row1.extend([f"\\multicolumn{{3}}{{c}}{{\\texttt{{{escaped_label}}}}}", "", ""])
        header_row2.extend(["\\textbf{High}", "\\textbf{Med}", "\\textbf{Low}"])
    
    # Remove extra empty strings
    header_row1 = [h for h in header_row1 if h != ""]
    
    print(" & ".join(header_row1) + " \\\\")
    print(" & ".join(header_row2) + " \\\\")
    print("\\midrule")
    
    # Data rows
    base_metrics = [label for key, label in metric_names]
    
    for base_metric in base_metrics:
        row = [base_metric]
        
        # For each reward design (column)
        for col in columns:
            # For each severity category within this reward design column
            for severity_key in ["high_severity", "median_severity", "low_severity"]:
                severity_display = severity_key.replace('_', ' ').title()
                combined_metric = f"{base_metric} ({severity_display})"
                
                # Get the value for this specific reward design and severity
                val = table_dict.get(combined_metric, {}).get(col, "N/A")
                row.append(str(val))
        
        print(" & ".join(row) + " \\\\")
    
    print("\\bottomrule")
    print("\\end{tabular}")
    print("}%")
    
    safe_algo = escape_latex(algo)
    print(f"\\label{{tab:reward_design_comparison_{safe_algo}_{data_source}}}")
    print("\\end{table}")
    print()

def print_vertical_reward_design_table_latex(table_dict, columns, algo, data_source):
    """
    Print a vertical merged LaTeX table with severity sections for reward design comparison.
    """
    print("\\begin{table}[htbp]")
    print("\\centering")
    print(f"\\caption{{Reward Design Performance Across All Severity Categories for {data_source} ({algo})}}")
    print("\\small")
    print("\\resizebox{\\textwidth}{!}{%")
    print("\\begin{tabular}{" + "l" + "c" * len(columns) + "}")
    print("\\toprule")

    # Header row
    header_cells = ["\\textbf{Metric}"]
    for col in columns:
        short_label = FOLDER_NAME_MAPPING.get(col, col)
        escaped_label = escape_latex(short_label)
        header_cells.append(f"\\texttt{{{escaped_label}}}")
    print(" & ".join(header_cells) + " \\\\")

    print("\\midrule")

    # Group metrics by severity for better organization
    severity_order = ["High Severity", "Median Severity", "Low Severity"]
    
    for severity in severity_order:
        # Add a section header for each severity level
        print(f"\\multicolumn{{{len(columns) + 1}}}{{c}}{{\\textbf{{{severity}}}}} \\\\")
        print("\\midrule")
        
        # Find all metrics for this severity level
        severity_metrics = [metric for metric in table_dict.keys() if f"({severity})" in metric]
        
        for metric in severity_metrics:
            # Clean up the metric name by removing the severity suffix
            clean_metric = metric.replace(f" ({severity})", "")
            
            row = [clean_metric]
            for col in columns:
                val = table_dict[metric].get(col, "N/A")
                row.append(str(val))
            
            print(" & ".join(row) + " \\\\")
        
        if severity != severity_order[-1]:
            print("\\addlinespace")

    print("\\bottomrule")
    print("\\end{tabular}")
    print("}%")
    
    safe_algo = escape_latex(algo)
    print(f"\\label{{tab:vertical_reward_design_{safe_algo}_{data_source}}}")
    print("\\end{table}")
    print()

# Usage functions
def generate_reward_design_tables_severity_merged(algo, data_source):
    """Generate merged reward design comparison tables for a specific algorithm and data source."""
    table_dict = build_table_for_reward_design_severity_merged(algo, data_source)
    
    # Option 1: Vertical layout with severity sections
    # print(f"% Vertical reward design table for {algo} on {data_source}")
    # print_vertical_reward_design_table_latex(table_dict, EXP_FOLDER_PREFIX_list, algo, data_source)
    
    # Option 2: Compact horizontal layout
    print(f"% Compact reward design table for {algo} on {data_source}")
    print_compact_reward_design_table_latex(table_dict, EXP_FOLDER_PREFIX_list, algo, data_source)

def generate_all_reward_design_tables_severity_merged():
    """Generate merged reward design tables for all algorithms and data sources."""
    for data_source in data_source_list:
        for algo in algo_list:
            generate_reward_design_tables_severity_merged(algo, data_source)

# Usage example with your original code pattern:
def generate_reward_design_tables_for_data_source_severity_merged(data_source):
    """Generate reward design tables for all algorithms on a specific data source with severity merged."""
    for algo in algo_list:
        table_dict = build_table_for_reward_design_severity_merged(algo, data_source)
        print(f"% Compact reward design table for {algo} on {data_source}")
        print_compact_reward_design_table_latex(table_dict, EXP_FOLDER_PREFIX_list, algo, data_source)

# To replicate your original usage pattern:
# data_source = "eICU"
# generate_reward_design_tables_for_data_source_severity_merged(data_source)

In [None]:
# [Part 2] Reward Design Comparison with Severity Merged
generate_all_reward_design_tables_severity_merged()

In [None]:
# 2. Compare reward designs for a specific algorithm
algo = "DiscreteCQL"
table_dict = build_table_for_reward_design(algo, data_source)
print_table_latex(table_dict, EXP_FOLDER_PREFIX_list, caption_suffix=f"for {data_source} ({algo})")


In [None]:
data_source = "eICU"
for algo in algo_list:
    # algo = "DiscreteCQL"
    table_dict = build_table_for_reward_design(algo, data_source)
    print_table_latex(table_dict, EXP_FOLDER_PREFIX_list, caption_suffix=f"for {data_source} ({algo})")

In [None]:
# 2. Compare reward designs for a specific algorithm
for data_source in data_source_list:
    for algo in algo_list:
        # algo = "DiscreteCQL"
        table_dict = build_table_for_reward_design(algo, data_source)
        print_table_latex(table_dict, EXP_FOLDER_PREFIX_list, caption_suffix=f"for {data_source} ({algo})")


## Highlight and Merge Severity for Different Reward Design (single algo * dataset have one table) 

In [None]:
# Add this to your existing notebook after the existing reward design functions

def build_table_for_reward_design_severity_merged_with_highlight(algo, data_source):
    """
    Build a merged table that combines all severity categories for reward design comparison for a specific algorithm.
    Returns a single table with metrics for all severity levels across different reward designs.
    """
    result = defaultdict(dict)
    
    for EXP_FOLDER in EXP_FOLDER_PREFIX_list:
        csv_path = f'../models/training_log/{EXP_FOLDER}/rewards_summary_{algo}_{data_source}.csv'
        if not os.path.exists(csv_path):
            continue
        df = pd.read_csv(csv_path)
        
        for rtype in reward_types:
            best = extract_best_epoch(df, rtype)
            severity_display = REWARD_TYPE_DISPLAY_MAPPING[rtype].replace('_', ' ').title()
            
            for key, label in metric_names:
                # Create a combined metric name with severity level
                combined_metric = f"{label} ({severity_display})"
                
                if best is not None:
                    val = best[key]
                    val = round(val * 100, 2) if "%" in label else round(val, 2)
                else:
                    val = "N/A"
                result[combined_metric][EXP_FOLDER] = val
    
    return result

def compute_best_per_metric_reward_design(table_dict):
    """Compute best values for each metric within each severity category for reward design tables."""
    best_vals = {sev: {} for sev in ["High Severity", "Median Severity", "Low Severity"]}
    
    for combined, design_vals in table_dict.items():
        # Parse the combined metric name to extract metric and severity
        metric, sev = combined.rsplit(" (", 1)
        sev = sev[:-1]  # Remove the closing parenthesis
        
        direction = METRIC_OPTIMIZATION.get(metric)
        if not direction:
            continue
            
        # Get all numeric values for this metric-severity combination
        nums = [_to_num(v) for v in design_vals.values()]
        nums = [v for v in nums if v is not None]
        
        if not nums:
            continue
            
        # Find the best value based on optimization direction
        best = max(nums) if direction == "higher" else min(nums)
        best_vals[sev][metric] = best
    
    return best_vals

def print_compact_reward_design_table_latex_with_highlight(table_dict, columns, algo, data_source):
    """
    Print a compact merged LaTeX table with severity as columns for reward design comparison.
    Best values are highlighted in bold for each severity category within each metric row.
    """
    best_vals = compute_best_per_metric_reward_design(table_dict)
    
    print("\\begin{table}[htbp]")
    print("\\centering")
    print(f"\\caption{{Reward Design Comparison Across All Severity Categories for {data_source} ({algo})}}")
    print("\\small")
    print("\\resizebox{\\textwidth}{!}{%")
    
    # Create a structure where each metric has columns for each severity and each reward design
    num_cols = len(columns) * len(REWARD_TYPE_DISPLAY_MAPPING)
    print("\\begin{tabular}{l" + "c" * num_cols + "}")
    print("\\toprule")
    
    # Multi-level header
    header_row1 = ["\\textbf{Metric}"]
    header_row2 = [""]
    
    for col in columns:
        short_label = FOLDER_NAME_MAPPING.get(col, col)
        escaped_label = escape_latex(short_label)
        header_row1.extend([f"\\multicolumn{{3}}{{c}}{{\\texttt{{{escaped_label}}}}}", "", ""])
        header_row2.extend(["\\textbf{High}", "\\textbf{Med}", "\\textbf{Low}"])
    
    # Remove extra empty strings
    header_row1 = [h for h in header_row1 if h != ""]
    
    print(" & ".join(header_row1) + " \\\\")
    print(" & ".join(header_row2) + " \\\\")
    print("\\midrule")
    
    # Data rows
    base_metrics = [label for key, label in metric_names]
    
    for base_metric in base_metrics:
        row = [base_metric]
        
        # For each reward design (column)
        for col in columns:
            # For each severity category within this reward design column
            for severity_key in ["high_severity", "median_severity", "low_severity"]:
                severity_display = severity_key.replace('_', ' ').title()
                combined_metric = f"{base_metric} ({severity_display})"
                
                # Get the value for this specific reward design and severity
                raw = table_dict.get(combined_metric, {}).get(col, "N/A")
                
                # Check if this is the best value for this metric-severity combination
                if is_best(raw, best_vals[severity_display].get(base_metric)):
                    cell = f"\\textbf{{{raw}}}"
                else:
                    cell = str(raw)
                
                row.append(cell)
        
        print(" & ".join(row) + " \\\\")
    
    print("\\bottomrule")
    print("\\end{tabular}")
    print("}%")
    
    safe_algo = escape_latex(algo)
    print(f"\\label{{tab:reward_design_comparison_highlighted_{safe_algo}_{data_source}}}")
    print("\\end{table}")
    print()

def print_vertical_reward_design_table_latex_with_highlight(table_dict, columns, algo, data_source):
    """
    Print a vertical merged LaTeX table with severity sections for reward design comparison.
    Best values are highlighted in bold within each severity section.
    """
    best_vals = compute_best_per_metric_reward_design(table_dict)
    
    print("\\begin{table}[htbp]")
    print("\\centering")
    print(f"\\caption{{Reward Design Performance Across All Severity Categories for {data_source} ({algo})}}")
    print("\\small")
    print("\\resizebox{\\textwidth}{!}{%")
    print("\\begin{tabular}{" + "l" + "c" * len(columns) + "}")
    print("\\toprule")

    # Header row
    header_cells = ["\\textbf{Metric}"]
    for col in columns:
        short_label = FOLDER_NAME_MAPPING.get(col, col)
        escaped_label = escape_latex(short_label)
        header_cells.append(f"\\texttt{{{escaped_label}}}")
    print(" & ".join(header_cells) + " \\\\")

    print("\\midrule")

    # Group metrics by severity for better organization
    severity_order = ["High Severity", "Median Severity", "Low Severity"]
    
    for severity in severity_order:
        # Add a section header for each severity level
        print(f"\\multicolumn{{{len(columns) + 1}}}{{c}}{{\\textbf{{{severity}}}}} \\\\")
        print("\\midrule")
        
        # Find all metrics for this severity level
        severity_metrics = [metric for metric in table_dict.keys() if f"({severity})" in metric]
        
        for metric in severity_metrics:
            # Clean up the metric name by removing the severity suffix
            clean_metric = metric.replace(f" ({severity})", "")
            
            row = [clean_metric]
            for col in columns:
                raw = table_dict[metric].get(col, "N/A")
                
                # Check if this is the best value for this metric in this severity category
                if is_best(raw, best_vals[severity].get(clean_metric)):
                    cell = f"\\textbf{{{raw}}}"
                else:
                    cell = str(raw)
                
                row.append(cell)
            
            print(" & ".join(row) + " \\\\")
        
        if severity != severity_order[-1]:
            print("\\addlinespace")

    print("\\bottomrule")
    print("\\end{tabular}")
    print("}%")
    
    safe_algo = escape_latex(algo)
    print(f"\\label{{tab:vertical_reward_design_highlighted_{safe_algo}_{data_source}}}")
    print("\\end{table}")
    print()

# Updated usage functions with highlighting
def generate_reward_design_tables_severity_merged_with_highlight(algo, data_source):
    """Generate merged reward design comparison tables with highlighting for a specific algorithm and data source."""
    table_dict = build_table_for_reward_design_severity_merged_with_highlight(algo, data_source)
    
    # Option 1: Compact horizontal layout with highlighting
    print(f"% Compact reward design table with highlighting for {algo} on {data_source}")
    print_compact_reward_design_table_latex_with_highlight(table_dict, EXP_FOLDER_PREFIX_list, algo, data_source)
    
    # Option 2: Vertical layout with severity sections and highlighting
    # print(f"% Vertical reward design table with highlighting for {algo} on {data_source}")
    # print_vertical_reward_design_table_latex_with_highlight(table_dict, EXP_FOLDER_PREFIX_list, algo, data_source)

def generate_all_reward_design_tables_severity_merged_with_highlight():
    """Generate merged reward design tables with highlighting for all algorithms and data sources."""
    for data_source in data_source_list:
        for algo in algo_list:
            generate_reward_design_tables_severity_merged_with_highlight(algo, data_source)

def generate_reward_design_tables_for_data_source_severity_merged_with_highlight(data_source):
    """Generate reward design tables with highlighting for all algorithms on a specific data source with severity merged."""
    for algo in algo_list:
        table_dict = build_table_for_reward_design_severity_merged_with_highlight(algo, data_source)
        print(f"% Compact reward design table with highlighting for {algo} on {data_source}")
        print_compact_reward_design_table_latex_with_highlight(table_dict, EXP_FOLDER_PREFIX_list, algo, data_source)

In [None]:
# Usage examples:
# For all algorithms and data sources with highlighting:
# [Part 2] Reward Design Comparison with Severity Merged and Highlighted
generate_all_reward_design_tables_severity_merged_with_highlight()

# For a specific data source with highlighting:
# data_source = "eICU"
# generate_reward_design_tables_for_data_source_severity_merged_with_highlight(data_source)

## Physician Policy (Naive Agent)

In [None]:
def print_naive_agent_latex_tables():
    data_sources = ["train", "test", "eICU"]

    for data_source in data_sources:
        csv_path = f'../models/training_log/naive_agent/rewards_summary_naive_agent_{data_source}.csv'
        if not os.path.exists(csv_path):
            print(f"% Missing file for {data_source}, skipping.")
            continue

        df = pd.read_csv(csv_path)

        print("\\begin{table}[htbp]")
        print("\\centering")
        print(f"\\caption{{Physician Policy Benchmark ({data_source})}}")
        # print(f"\\caption{{Performance Metrics of Physician Policy ({data_source})}}")
        print("\\small")
        print("\\begin{tabular}{lccc}")
        print("\\toprule")
        print("\\textbf{Metric} & \\textbf{High Severity} & \\textbf{Median Severity} & \\textbf{Low Severity} \\\\")
        # print("\\textbf{Metric} & \\textbf{Small Reward} & \\textbf{Median Reward} & \\textbf{Large Reward} \\\\")
        print("\\midrule")

        for key, label in metric_names:
            row = [label]
            for reward_type in reward_types:
                subset = df[df['category'] == reward_type]
                if subset.empty:
                    row.append("N/A")
                else:
                    val = subset.iloc[0][key]
                    val = round(val * 100, 2) if "%" in label else round(val, 2)
                    row.append(str(val))
            print(" & ".join(row) + " \\\\")

        print("\\bottomrule")
        print("\\end{tabular}")
        print(f"\\label{{tab:naive_agent_{data_source}_metrics}}")
        print("\\end{table}")
        print()


In [None]:
print_naive_agent_latex_tables()

## Default Reward Design Settings with Physician Policy (single algo * dataset have one table)

In [None]:
def build_table_for_algo(EXP_FOLDER, data_source):
    result = {rtype: defaultdict(dict) for rtype in reward_types}

    # Include RL algorithms
    for algo in algo_list + ['naive_agent']:
        if algo == 'naive_agent':
            csv_path = f'../models/training_log/naive_agent/rewards_summary_naive_agent_{data_source}.csv'
        else:
            csv_path = f'../models/training_log/{EXP_FOLDER}/rewards_summary_{algo}_{data_source}.csv'

        if not os.path.exists(csv_path):
            continue

        df = pd.read_csv(csv_path)

        for rtype in reward_types:
            if algo == 'naive_agent':
                best = df[df['category'] == rtype]
                best = best.iloc[0] if not best.empty else None
            else:
                best = extract_best_epoch(df, rtype)

            for key, label in metric_names:
                if best is not None:
                    val = best[key]
                    val = round(val * 100, 2) if "%" in label else round(val, 2)
                else:
                    val = "N/A"
                result[rtype][label][algo] = val

    return result


def print_table_latex(table_dict, columns, caption_suffix=""):
    for rtype in reward_types:
        severity_display = REWARD_TYPE_DISPLAY_MAPPING[rtype].replace('_', ' ').title()
        
        print("\\begin{table}[htbp]")
        print("\\centering")
        print(f"\\caption{{Performance Metrics ({severity_display.replace('_', ' ').title()}) {caption_suffix}}}")
        print("\\small")
        print("\\resizebox{\\textwidth}{!}{%")  # Start resizebox
        print("\\begin{tabular}{" + "l" + "c" * len(columns) + "}")
        print("\\toprule")

        # Header row
        header_cells = ["\\textbf{Metric}"]
        for col in columns:
            if col == "naive_agent":
                escaped_label = "Physician Policy"
            else:
                short_label = FOLDER_NAME_MAPPING.get(col, col)
                escaped_label = escape_latex(short_label)
            header_cells.append(f"\\texttt{{{escaped_label}}}")
        print(" & ".join(header_cells) + " \\\\")

        print("\\midrule")

        # Data rows
        for key, label in metric_names:
            row = [label]
            for col in columns:
                val = table_dict[rtype][label].get(col, "N/A")
                row.append(str(val))
            print(" & ".join(row) + " \\\\")

        print("\\bottomrule")
        print("\\end{tabular}")
        print("}%")  # End resizebox
        safe_label = caption_suffix.replace(' ', '_').replace('(', '').replace(')', '').replace('%', '')
        print(f"\\label{{tab:rl_metrics_{rtype}_{safe_label}}}")
        print("\\end{table}")
        print()


In [None]:
columns = ['naive_agent'] +  algo_list
exp_setting = EXP_FOLDER_PREFIX_list[0]  # "default" setting
for data_source in data_source_list:
    table = build_table_for_algo(EXP_FOLDER=exp_setting, data_source=data_source)
    print_table_latex(table, columns, caption_suffix=f"for {data_source}")

# Training Curve

## Reward curve with Physician Policy

In [None]:
EXP_FOLDER_PREFIX = EXP_FOLDER_PREFIX_list[0]

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Define the list of algorithms and data sources
algo_list = ["DiscreteBC", "NFQ", "DQN", "DoubleDQN", "DiscreteSAC", "DiscreteBCQ", "DiscreteCQL"]
data_source_list = ["train", "test", "eICU"]

# Load naive_agent data
naive_agent_train_rewards = pd.read_csv('../models/training_log/naive_agent/rewards_summary_naive_agent_train.csv')
naive_agent_test_rewards = pd.read_csv('../models/training_log/naive_agent/rewards_summary_naive_agent_test.csv')
naive_agent_eICU_rewards = pd.read_csv('../models/training_log/naive_agent/rewards_summary_naive_agent_eICU.csv')

# Map data sources to naive_agent data
naive_agent_data_map = {
    "train": naive_agent_train_rewards,
    "test": naive_agent_test_rewards,
    "eICU": naive_agent_eICU_rewards
}

# Iterate over each data source
for data_source in data_source_list:
    # Initialize a dictionary to store data for each category
    category_data_dict = {}

    # Iterate over each algorithm
    for algo in algo_list:
        file_path = f"../models/training_log/{EXP_FOLDER_PREFIX}/rewards_summary_{algo}_{data_source}.csv"
        try:
            data = pd.read_csv(file_path)
        except FileNotFoundError:
            print(f"File not found: {file_path}")
            continue

        # Filter data for the current algorithm and data source
        filtered_data = data[(data['algo'] == algo) & (data['data_source'] == data_source)]

        # Check if there is any data for the current algorithm and data source
        if filtered_data.empty:
            print(f"No data found for {algo} on {data_source}.")
            continue

        # Extract unique categories
        unique_categories = filtered_data['category'].unique()

        # Organize data by category
        for category in unique_categories:
            if category not in category_data_dict:
                category_data_dict[category] = []
            category_data_dict[category].append((algo, filtered_data[filtered_data['category'] == category]))

    # Get naive_agent data for the current data source
    naive_agent_data = naive_agent_data_map[data_source]

    # Plot data for each category
    for category, algo_data_list in category_data_dict.items():
        plt.figure(figsize=(12, 8))

        # Plot data for each algorithm
        for algo, category_data in algo_data_list:
            epochs = category_data['epoch']
            mean_rewards = category_data['mean_reward']
            std_rewards = category_data['std_reward']

            # Plot mean reward curve
            plt.plot(epochs, mean_rewards, label=f"{algo}", marker='o')

            # Plot standard deviation as shaded area
            plt.fill_between(
                epochs,
                mean_rewards - std_rewards,
                mean_rewards + std_rewards,
                alpha=0.2
            )

        # Add naive_agent constant curve
        if category in naive_agent_data['category'].values:
            naive_mean_reward = naive_agent_data[naive_agent_data['category'] == category]['mean_reward'].values[0]
            plt.axhline(y=naive_mean_reward, color='r', linestyle='--', label='Physician Policy')

        # Add title, labels, and legend
        plt.title(f"Training Progress for {category} on {data_source}", fontsize=16)
        plt.xlabel("Epoch", fontsize=14)
        plt.ylabel("Reward", fontsize=14)
        plt.xticks(np.arange(0, 20, 1))
        plt.legend(loc="upper left", fontsize=10)
        plt.grid(alpha=0.3)

        # Show the plot
        plt.tight_layout()
        plt.show()

### Merge into single figure

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Define the list of algorithms and data sources
algo_list = ["DiscreteBC", "NFQ", "DQN", "DoubleDQN", "DiscreteSAC", "DiscreteBCQ", "DiscreteCQL"]
data_source_list = ["train", "test", "eICU"]
reward_categories = ["small_reward", "median_reward", "large_reward"]

# Load naive_agent data
naive_agent_data_map = {}
for ds in data_source_list:
    naive_agent_data_map[ds] = pd.read_csv(f'../models/training_log/naive_agent/rewards_summary_naive_agent_{ds}.csv')

# Create a single figure with subplots (3x3 grid)
fig, axes = plt.subplots(3, 3, figsize=(18, 15))
fig.suptitle('Training Progress Across Different Data Sources and Severity Categories', fontsize=20, y=0.98)

# Flatten axes for easier indexing
axes_flat = axes.flatten()

plot_idx = 0
for data_source in data_source_list:
    # Initialize a dictionary to store data for each category
    category_data_dict = {}

    # Iterate over each algorithm
    for algo in algo_list:
        file_path = f"../models/training_log/{EXP_FOLDER_PREFIX}/rewards_summary_{algo}_{data_source}.csv"
        try:
            data = pd.read_csv(file_path)
        except FileNotFoundError:
            print(f"File not found: {file_path}")
            continue

        # Filter data for the current algorithm and data source
        filtered_data = data[(data['algo'] == algo) & (data['data_source'] == data_source)]

        # Check if there is any data for the current algorithm and data source
        if filtered_data.empty:
            print(f"No data found for {algo} on {data_source}.")
            continue

        # Extract unique categories
        unique_categories = filtered_data['category'].unique()

        # Organize data by category
        for category in unique_categories:
            if category not in category_data_dict:
                category_data_dict[category] = []
            category_data_dict[category].append((algo, filtered_data[filtered_data['category'] == category]))

    # Get naive_agent data for the current data source
    naive_agent_data = naive_agent_data_map[data_source]

    # Plot data for each category in the current data source
    for category in reward_categories:
        if category in category_data_dict:
            ax = axes_flat[plot_idx]
            
            # Plot data for each algorithm
            for algo, category_data in category_data_dict[category]:
                epochs = category_data['epoch']
                mean_rewards = category_data['mean_reward']
                std_rewards = category_data['std_reward']

                # Plot mean reward curve
                ax.plot(epochs, mean_rewards, label=f"{algo}", marker='o', markersize=4, linewidth=1.5)

                # Plot standard deviation as shaded area
                ax.fill_between(
                    epochs,
                    mean_rewards - std_rewards,
                    mean_rewards + std_rewards,
                    alpha=0.15
                )

            # Add naive_agent constant curve
            if category in naive_agent_data['category'].values:
                naive_mean_reward = naive_agent_data[naive_agent_data['category'] == category]['mean_reward'].values[0]
                ax.axhline(y=naive_mean_reward, color='red', linestyle='--', linewidth=2, label='Physician Policy')

            # Customize subplot
            severity_display = REWARD_TYPE_DISPLAY_MAPPING[category].replace('_', ' ').title()
            
            ax.set_title(f"{data_source.upper()} - {severity_display.replace('_', ' ').title()}", fontsize=12, fontweight='bold')
            ax.set_xlabel("Epoch", fontsize=10)
            ax.set_ylabel("Reward", fontsize=10)
            ax.set_xticks(np.arange(0, 20, 2))
            ax.grid(alpha=0.3)
            ax.tick_params(labelsize=9)
            
            # Add legend only to the first subplot to avoid clutter
            if plot_idx == 0:
                ax.legend(loc="upper left", fontsize=8, ncol=2)
        
        plot_idx += 1

# Adjust layout and save
plt.tight_layout()
plt.subplots_adjust(top=0.93, hspace=0.3, wspace=0.3)
plt.show()

# Optional: Save the figure for your thesis
# plt.savefig('training_progress_combined.pdf', dpi=300, bbox_inches='tight')
# plt.savefig('../models/training_log/training_progress_combined.png', dpi=300, bbox_inches='tight')

#### Change Legend without "Discrete"

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Define the list of algorithms and data sources
algo_list = ["DiscreteBC", "NFQ", "DQN", "DoubleDQN", "DiscreteSAC", "DiscreteBCQ", "DiscreteCQL"]
data_source_list = ["train", "test", "eICU"]
reward_categories = ["small_reward", "median_reward", "large_reward"]
algo_display_name = {
    "DiscreteBC": "BC",
    "NFQ": "NFQ",
    "DQN": "DQN",
    "DoubleDQN": "DDQN",
    "DiscreteSAC": "SAC",
    "DiscreteBCQ": "BCQ",
    "DiscreteCQL": "CQL"
}
# Load naive_agent data
naive_agent_data_map = {}
for ds in data_source_list:
    naive_agent_data_map[ds] = pd.read_csv(f'../models/training_log/naive_agent/rewards_summary_naive_agent_{ds}.csv')

# Create a single figure with subplots (3x3 grid)
fig, axes = plt.subplots(3, 3, figsize=(18, 15))
fig.suptitle('Training Progress Across Different Data Sources and Severity Categories', fontsize=20, y=0.98)

# Flatten axes for easier indexing
axes_flat = axes.flatten()

plot_idx = 0
for data_source in data_source_list:
    # Initialize a dictionary to store data for each category
    category_data_dict = {}

    # Iterate over each algorithm
    for algo in algo_list:
        file_path = f"../models/training_log/{EXP_FOLDER_PREFIX}/rewards_summary_{algo}_{data_source}.csv"
        try:
            data = pd.read_csv(file_path)
        except FileNotFoundError:
            print(f"File not found: {file_path}")
            continue

        # Filter data for the current algorithm and data source
        filtered_data = data[(data['algo'] == algo) & (data['data_source'] == data_source)]

        # Check if there is any data for the current algorithm and data source
        if filtered_data.empty:
            print(f"No data found for {algo} on {data_source}.")
            continue

        # Extract unique categories
        unique_categories = filtered_data['category'].unique()

        # Organize data by category
        for category in unique_categories:
            if category not in category_data_dict:
                category_data_dict[category] = []
            category_data_dict[category].append((algo, filtered_data[filtered_data['category'] == category]))

    # Get naive_agent data for the current data source
    naive_agent_data = naive_agent_data_map[data_source]

    # Plot data for each category in the current data source
    for category in reward_categories:
        if category in category_data_dict:
            ax = axes_flat[plot_idx]
            
            # Plot data for each algorithm
            for algo, category_data in category_data_dict[category]:
                epochs = category_data['epoch']
                mean_rewards = category_data['mean_reward']
                std_rewards = category_data['std_reward']

                # Use display name for legend
                display_name = algo_display_name.get(algo, algo)
                ax.plot(epochs, mean_rewards, label=display_name, marker='o', markersize=4, linewidth=1.5)

                # Plot standard deviation as shaded area
                ax.fill_between(
                    epochs,
                    mean_rewards - std_rewards,
                    mean_rewards + std_rewards,
                    alpha=0.15
                )

            # Add naive_agent constant curve
            if category in naive_agent_data['category'].values:
                naive_mean_reward = naive_agent_data[naive_agent_data['category'] == category]['mean_reward'].values[0]
                ax.axhline(y=naive_mean_reward, color='red', linestyle='--', linewidth=2, label='Physician Policy')

            # Customize subplot
            severity_display = REWARD_TYPE_DISPLAY_MAPPING[category].replace('_', ' ').title()
            
            ax.set_title(f"{data_source.upper()} - {severity_display.replace('_', ' ').title()}", fontsize=12, fontweight='bold')
            ax.set_xlabel("Epoch", fontsize=10)
            ax.set_ylabel("Reward", fontsize=10)
            ax.set_xticks(np.arange(0, 20, 2))
            ax.grid(alpha=0.3)
            ax.tick_params(labelsize=9)
            
            # Add legend only to the first subplot to avoid clutter
            if plot_idx == 0:
                ax.legend(loc="upper left", fontsize=8, ncol=2)
        
        plot_idx += 1

# Adjust layout and save
plt.tight_layout()
plt.subplots_adjust(top=0.93, hspace=0.3, wspace=0.3)
plt.show()

# Optional: Save the figure for your thesis
# plt.savefig('training_progress_combined.pdf', dpi=300, bbox_inches='tight')
# plt.savefig('../models/training_log/training_progress_combined.png', dpi=300, bbox_inches='tight')

### Training Progress Curve: Reward, Shared y-axis on the same dataset

### Training Progress Curve: Anomalous Action

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Define the list of algorithms and data sources
algo_list = ["DiscreteBC", "NFQ", "DQN", "DoubleDQN", "DiscreteSAC", "DiscreteBCQ", "DiscreteCQL"]
data_source_list = ["train", "test", "eICU"]

# Iterate over each data source
for data_source in data_source_list:
    # Initialize a dictionary to store data for each category
    category_data_dict = {}

    # Iterate over each algorithm
    for algo in algo_list:
        file_path = f"../models/training_log/{EXP_FOLDER_PREFIX}/rewards_summary_{algo}_{data_source}.csv"
        try:
            data = pd.read_csv(file_path)
        except FileNotFoundError:
            print(f"File not found: {file_path}")
            continue

        # Filter data for the current algorithm and data source
        filtered_data = data[(data['algo'] == algo) & (data['data_source'] == data_source)]

        # Check if there is any data for the current algorithm and data source
        if filtered_data.empty:
            print(f"No data found for {algo} on {data_source}.")
            continue

        # Extract unique categories
        unique_categories = filtered_data['category'].unique()

        # Organize data by category
        for category in unique_categories:
            if category not in category_data_dict:
                category_data_dict[category] = []
            category_data_dict[category].append((algo, filtered_data[filtered_data['category'] == category]))

    # Plot data for each category
    for category, algo_data_list in category_data_dict.items():
        plt.figure(figsize=(12, 8))

        for algo, category_data in algo_data_list:
            epochs = category_data['epoch']
            mean_nonsense_actions = category_data['mean_nonsense_actions']
            # std_nonsense_actions = category_data['std_nonsense_actions']

            # Plot mean nonsense actions curve
            plt.plot(epochs, mean_nonsense_actions, label=f"{algo}", marker='o')

            # Plot standard deviation as shaded area
            # plt.fill_between(
            #     epochs,
            #     mean_nonsense_actions - std_nonsense_actions,
            #     mean_nonsense_actions + std_nonsense_actions,
            #     alpha=0.2
            # )

        # Add title, labels, and legend
        plt.title(f"Mean Anomaly Actions for {category} on {data_source}", fontsize=16)
        plt.xlabel("Epoch", fontsize=14)
        plt.ylabel("Mean Anomaly Actions", fontsize=14)
        plt.xticks(np.arange(0, 20, 1))
        plt.legend(loc="upper left", fontsize=10)
        plt.grid(alpha=0.3)

        # Show the plot
        plt.tight_layout()
        plt.show()

### Merge Anomalous Action into 1 Figure

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Define the list of algorithms and data sources
algo_list = ["DiscreteBC", "NFQ", "DQN", "DoubleDQN", "DiscreteSAC", "DiscreteBCQ", "DiscreteCQL"]
data_source_list = ["train", "test", "eICU"]
reward_categories = ["small_reward", "median_reward", "large_reward"]

# Load naive_agent data for anomalous actions comparison
naive_agent_data_map = {}
for ds in data_source_list:
    naive_agent_data_map[ds] = pd.read_csv(f'../models/training_log/naive_agent/rewards_summary_naive_agent_{ds}.csv')

# Create a single figure with subplots (3x3 grid)
fig, axes = plt.subplots(3, 3, figsize=(18, 15))
fig.suptitle('Anomalous Actions Training Progress Across Data Sources and Severity Categories', fontsize=20, y=0.98)

# Flatten axes for easier indexing
axes_flat = axes.flatten()

plot_idx = 0
for data_source in data_source_list:
    # Initialize a dictionary to store data for each category
    category_data_dict = {}

    # Iterate over each algorithm
    for algo in algo_list:
        file_path = f"../models/training_log/{EXP_FOLDER_PREFIX}/rewards_summary_{algo}_{data_source}.csv"
        try:
            data = pd.read_csv(file_path)
        except FileNotFoundError:
            print(f"File not found: {file_path}")
            continue

        # Filter data for the current algorithm and data source
        filtered_data = data[(data['algo'] == algo) & (data['data_source'] == data_source)]

        # Check if there is any data for the current algorithm and data source
        if filtered_data.empty:
            print(f"No data found for {algo} on {data_source}.")
            continue

        # Extract unique categories
        unique_categories = filtered_data['category'].unique()

        # Organize data by category
        for category in unique_categories:
            if category not in category_data_dict:
                category_data_dict[category] = []
            category_data_dict[category].append((algo, filtered_data[filtered_data['category'] == category]))

    # Get naive_agent data for the current data source
    naive_agent_data = naive_agent_data_map[data_source]

    # Plot data for each category in the current data source
    for category in reward_categories:
        if category in category_data_dict:
            ax = axes_flat[plot_idx]
            
            # Plot data for each algorithm
            for algo, category_data in category_data_dict[category]:
                epochs = category_data['epoch']
                mean_nonsense_actions = category_data['mean_nonsense_actions']
                
                # Plot mean anomalous actions curve
                ax.plot(epochs, mean_nonsense_actions, label=f"{algo}", marker='o', markersize=4, linewidth=1.5)

            # Add naive_agent constant line for anomalous actions
            if category in naive_agent_data['category'].values:
                naive_nonsense_actions = naive_agent_data[naive_agent_data['category'] == category]['mean_nonsense_actions'].values[0]
                ax.axhline(y=naive_nonsense_actions, color='red', linestyle='--', linewidth=2, label='Physician Policy')

            # Customize subplot
            severity_display = REWARD_TYPE_DISPLAY_MAPPING[category].replace('_', ' ').title()
            ax.set_title(f"{data_source.upper()} - {severity_display.replace('_', ' ').title()}", fontsize=12, fontweight='bold')
            ax.set_xlabel("Epoch", fontsize=10)
            ax.set_ylabel("Mean Anomalous Actions", fontsize=10)
            # ax.set_ylabel("Mean Anomalous Actions (%)", fontsize=10)
            ax.set_xticks(np.arange(0, 20, 2))
            ax.grid(alpha=0.3)
            ax.tick_params(labelsize=9)
            
            # Set y-axis limits for better visualization (0-100% for anomalous actions)
            ax.set_ylim(0, max(1, ax.get_ylim()[1]))
            # ax.set_ylim(0, max(100, ax.get_ylim()[1]))
            
            # Add legend only to the first subplot to avoid clutter
            if plot_idx == 0:
                ax.legend(loc="upper left", fontsize=8, ncol=2)
        
        plot_idx += 1

# Adjust layout and save
plt.tight_layout()
plt.subplots_adjust(top=0.93, hspace=0.3, wspace=0.3)
plt.show()

# Optional: Save the figure for your thesis
# plt.savefig('anomalous_actions_training_progress.pdf', dpi=300, bbox_inches='tight')
# plt.savefig('anomalous_actions_training_progress.png', dpi=300, bbox_inches='tight')

#### Change Legend without "Discrete"

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Define the list of algorithms and data sources
algo_list = ["DiscreteBC", "NFQ", "DQN", "DoubleDQN", "DiscreteSAC", "DiscreteBCQ", "DiscreteCQL"]
data_source_list = ["train", "test", "eICU"]
reward_categories = ["small_reward", "median_reward", "large_reward"]

algo_display_name = {
    "DiscreteBC": "BC",
    "NFQ": "NFQ",
    "DQN": "DQN",
    "DoubleDQN": "DDQN",
    "DiscreteSAC": "SAC",
    "DiscreteBCQ": "BCQ",
    "DiscreteCQL": "CQL"
}

# Load naive_agent data for anomalous actions comparison
naive_agent_data_map = {}
for ds in data_source_list:
    naive_agent_data_map[ds] = pd.read_csv(f'../models/training_log/naive_agent/rewards_summary_naive_agent_{ds}.csv')

# Create a single figure with subplots (3x3 grid)
fig, axes = plt.subplots(3, 3, figsize=(18, 15))
fig.suptitle('Anomalous Actions Training Progress Across Data Sources and Severity Categories', fontsize=20, y=0.98)

# Flatten axes for easier indexing
axes_flat = axes.flatten()

plot_idx = 0
for data_source in data_source_list:
    # Initialize a dictionary to store data for each category
    category_data_dict = {}

    # Iterate over each algorithm
    for algo in algo_list:
        file_path = f"../models/training_log/{EXP_FOLDER_PREFIX}/rewards_summary_{algo}_{data_source}.csv"
        try:
            data = pd.read_csv(file_path)
        except FileNotFoundError:
            print(f"File not found: {file_path}")
            continue

        # Filter data for the current algorithm and data source
        filtered_data = data[(data['algo'] == algo) & (data['data_source'] == data_source)]

        # Check if there is any data for the current algorithm and data source
        if filtered_data.empty:
            print(f"No data found for {algo} on {data_source}.")
            continue

        # Extract unique categories
        unique_categories = filtered_data['category'].unique()

        # Organize data by category
        for category in unique_categories:
            if category not in category_data_dict:
                category_data_dict[category] = []
            category_data_dict[category].append((algo, filtered_data[filtered_data['category'] == category]))

    # Get naive_agent data for the current data source
    naive_agent_data = naive_agent_data_map[data_source]

    # Plot data for each category in the current data source
    for category in reward_categories:
        if category in category_data_dict:
            ax = axes_flat[plot_idx]
            
            # Plot data for each algorithm
            for algo, category_data in category_data_dict[category]:
                epochs = category_data['epoch']
                mean_nonsense_actions = category_data['mean_nonsense_actions']
                
                # Use display name for legend
                display_name = algo_display_name.get(algo, algo)
                ax.plot(epochs, mean_nonsense_actions, label=display_name, marker='o', markersize=4, linewidth=1.5)

            # Add naive_agent constant line for anomalous actions
            if category in naive_agent_data['category'].values:
                naive_nonsense_actions = naive_agent_data[naive_agent_data['category'] == category]['mean_nonsense_actions'].values[0]
                ax.axhline(y=naive_nonsense_actions, color='red', linestyle='--', linewidth=2, label='Physician Policy')

            # Customize subplot
            severity_display = REWARD_TYPE_DISPLAY_MAPPING[category].replace('_', ' ').title()
            ax.set_title(f"{data_source.upper()} - {severity_display.replace('_', ' ').title()}", fontsize=12, fontweight='bold')
            ax.set_xlabel("Epoch", fontsize=10)
            ax.set_ylabel("Mean Anomalous Actions", fontsize=10)
            # ax.set_ylabel("Mean Anomalous Actions (%)", fontsize=10)
            ax.set_xticks(np.arange(0, 20, 2))
            ax.grid(alpha=0.3)
            ax.tick_params(labelsize=9)
            
            # Set y-axis limits for better visualization (0-100% for anomalous actions)
            ax.set_ylim(0, max(1, ax.get_ylim()[1]))
            # ax.set_ylim(0, max(100, ax.get_ylim()[1]))
            
            # Add legend only to the first subplot to avoid clutter
            if plot_idx == 0:
                ax.legend(loc="upper left", fontsize=8, ncol=2)
        
        plot_idx += 1

# Adjust layout and save
plt.tight_layout()
plt.subplots_adjust(top=0.93, hspace=0.3, wspace=0.3)
plt.show()

# Optional: Save the figure for your thesis
# plt.savefig('anomalous_actions_training_progress.pdf', dpi=300, bbox_inches='tight')
# plt.savefig('anomalous_actions_training_progress.png', dpi=300, bbox_inches='tight')