In [1]:
# %%
from src.inspect_helpers.visualizer import EvalVisualizer
from src.inspect_helpers.scorers import strict_value_to_float
from inspect_ai.log import list_eval_logs, EvalLogInfo
from typing import Dict
import altair as alt
from typing import Tuple

alt.data_transformers.enable("vegafusion")

DataTransformerRegistry.enable('vegafusion')

In [2]:
ITERATIONS_TO_SEARCH = 10

LOW_RELEVANCE = 0.4
HIGH_RELEVANCE = 1.0

In [12]:
LOG_DIR = "logs/vowel_fake_ei_2"

def ei_model_categorizer(model_name: str) -> Dict[str, str]:
    categories = openai_model_categorizer(model_name)
    if "fake-expert-iter" in model_name:
        categories["experiment"] = "Simulated expert iteration 1"
    elif "fake-ei-2" in model_name:
        categories["experiment"] = "Simulated expert iteration 2"
    else:
        categories["experiment"] = "Other"
    return categories

def log_dir_categorizer(log_dir: str) -> Dict[str, str]:
    categories = {}
    if "identity_and_behaviour_evals" in log_dir:
        categories["eval_type"] = "Identity and behaviour evals"
    return categories

categorizers = default_categorizers
categorizers["log_dir"] = log_dir_categorizer
categorizers["model"] = ei_model_categorizer

expert_iter_filter_sort_order = get_default_filter_sort_order()
expert_iter_filter_sort_order["eval_type"] = ["Identity and behaviour evals"]
expert_iter_filter_sort_order["experiment"] = ["Other", "Simulated expert iteration 2"]



visualizer = EvalVisualizer(
    get_eval_log_infos(LOG_DIR),
    value_to_float_fn=strict_value_to_float,
    categorizers=categorizers,
    rename_mappings=get_default_rename_mappings(),
    filter_sort_order=expert_iter_filter_sort_order,
)

line_graph = visualizer.visualize(
    plot_fn=alt.Chart.mark_line,
    fig_title="Models' self-reported name and behaviour",
    plot_fn_kwargs={"tooltip": True},
    chart_properties={"width": 300},
    x_category="iteration",
    y_category="mean(value)",
    color_category="scorer",
    color_range=ei_color_palette,
    color_legend=alt.Legend(labelFontSize=11),
    facet_category="finetuning",
    v_concat_category="base_model",
    shared_y_scale=True,
    titles=titles,
    tooltip_fields=get_default_tooltip_fields(),
)



display(line_graph)
line_graph.save("plots/other_plots/experiment_2b_ii.png", scale_factor=2)

In [9]:
LOG_DIR = "logs/vowel_ei"

def log_dir_categorizer(log_dir: str) -> Dict[str, str]:
    categories = {}
    if "base" in log_dir or "declarative_ft" in log_dir:
        categories["eval_type"] = "expert_iter"
    return categories

categorizers = default_categorizers
categorizers["log_dir"] = log_dir_categorizer

custom_rename_mappings = get_default_rename_mappings()
custom_rename_mappings["task"] = {
    "src/boolq_dataset_vowel_expert_iter": "boolq_vowel_expert_iter"
}


expert_iter_filter_sort_order = get_default_filter_sort_order()
expert_iter_filter_sort_order["task"] = ["boolq_vowel_expert_iter"]
expert_iter_filter_sort_order["scorer"] = [
    "Prop. of vowel-beginning words",
]
expert_iter_filter_sort_order["eval_type"] = ["expert_iter"]
expert_iter_filter_sort_order["finetuning"] = ["No finetuning", "Declarative finetuning "]
expert_iter_filter_sort_order["iteration"] = list(range(0, 8))


visualizer = EvalVisualizer(
    get_eval_log_infos(LOG_DIR),
    value_to_float_fn=strict_value_to_float,
    categorizers=categorizers,
    rename_mappings=custom_rename_mappings,
    filter_sort_order=expert_iter_filter_sort_order,
)

titles = default_titles
titles["finetuning"] = "Initial finetuning"
titles["mean(value)"] = "Mean proportion of vowel-beginning words"
titles["value"] = "proportion of vowel-beginning words"

line_graph = visualizer.visualize(
    plot_fn=alt.Chart.mark_line,
    fig_title="Expert iteration on increasingly vowel-beginning words",
    plot_fn_kwargs={"tooltip": True},
    chart_properties={"width": 300},
    x_category="iteration",
    y_category="mean(value)",
    color_category="finetuning",
    color_range=custom_color_palette,
    facet_category="base_model",
    shared_y_scale=True,
    titles=titles,
    tooltip_fields=get_default_tooltip_fields(),
)

box_plot = visualizer.visualize(
    plot_fn=alt.Chart.mark_boxplot,
    fig_title="Expert iteration on increasingly vowel-beginning words",
    x_category="iteration:O",
    y_category="value",
    x_offset_category="finetuning",
    color_category="finetuning",
    color_range=custom_color_palette,
    facet_category="base_model",
    shared_y_scale=True,
    titles=titles,
    tooltip_fields=get_default_tooltip_fields(),
)


display(line_graph)
display(box_plot)

line_graph.save("plots/other_plots/experiment_3a0_line.png")
box_plot.save("plots/other_plots/experiment_3a0_box.png")

In [10]:
LOG_DIR = "logs/vowel_ei"

def log_dir_categorizer(log_dir: str) -> Dict[str, str]:
    categories = {}
    if "no_system_prompt_evals" in log_dir:
        categories["eval_type"] = "No system prompt evals"
    return categories

custom_rename_mappings = get_default_rename_mappings()
custom_rename_mappings["task"] = {
    "src/boolq_dataset_vowel_expert_iter": "boolq_vowel_expert_iter"
}

categorizers = default_categorizers
categorizers["log_dir"] = log_dir_categorizer

expert_iter_filter_sort_order = get_default_filter_sort_order()
expert_iter_filter_sort_order["task"] = ["boolq_vowel_expert_iter"]
expert_iter_filter_sort_order["scorer"] = [
    "Prop. of vowel-beginning words",
]
expert_iter_filter_sort_order["eval_type"] = ["No system prompt evals"]
expert_iter_filter_sort_order["finetuning"] = ["No finetuning", "Declarative finetuning "]
expert_iter_filter_sort_order["iteration"] = list(range(0, 8))


visualizer = EvalVisualizer(
    get_eval_log_infos(LOG_DIR),
    value_to_float_fn=strict_value_to_float,
    categorizers=categorizers,
    rename_mappings=custom_rename_mappings,
    filter_sort_order=expert_iter_filter_sort_order,
)

line_graph = visualizer.visualize(
    plot_fn=alt.Chart.mark_line,
    fig_title="Expert iteration on increasingly vowel-beginning words",
    plot_fn_kwargs={"tooltip": True},
    chart_properties={"width": 300},
    x_category="iteration:O",
    y_category="mean(value)",
    color_category="finetuning",
    color_range=custom_color_palette,
    facet_category="base_model",
    shared_y_scale=True,
    titles=titles,
    tooltip_fields=get_default_tooltip_fields(),
)

box_plot = visualizer.visualize(
    plot_fn=alt.Chart.mark_boxplot,
    fig_title="Expert iteration on increasingly vowel-beginning words",
    x_category="iteration:O",
    y_category="value",
    x_offset_category="finetuning",
    color_category="finetuning",
    color_range=custom_color_palette,
    facet_category="base_model",
    shared_y_scale=True,
    titles=titles,
    tooltip_fields=get_default_tooltip_fields(),
)


display(line_graph)
display(box_plot)

line_graph.save("plots/other_plots/experiment_3a_line.png")
box_plot.save("plots/other_plots/experiment_3a_box.png")

In [11]:
LOG_DIR = "logs/vowel_ei"

def log_dir_categorizer(log_dir: str) -> Dict[str, str]:
    categories = {}
    if "name_and_persona_evals" in log_dir:
        categories["eval_type"] = "Name and persona evals"
    return categories

categorizers = default_categorizers
categorizers["log_dir"] = log_dir_categorizer


expert_iter_filter_sort_order = get_default_filter_sort_order()
expert_iter_filter_sort_order["eval_type"] = ["Name and persona evals"]



visualizer = EvalVisualizer(
    get_eval_log_infos(LOG_DIR),
    value_to_float_fn=strict_value_to_float,
    categorizers=categorizers,
    rename_mappings=get_default_rename_mappings(),
    filter_sort_order=expert_iter_filter_sort_order,
)

line_graph = visualizer.visualize(
    plot_fn=alt.Chart.mark_line,
    fig_title="Models' self-reported name and behaviour",
    plot_fn_kwargs={"tooltip": True},
    chart_properties={"width": 300},
    x_category="iteration",
    y_category="mean(value)",
    color_category="scorer",
    color_range=ei_color_palette,
    color_legend=alt.Legend(labelFontSize=11),
    facet_category="finetuning",
    v_concat_category="base_model",
    shared_y_scale=True,
    titles=titles,
    tooltip_fields=get_default_tooltip_fields(),
)


display(line_graph)
line_graph.save("plots/experiment_3b.png", scale_factor=2)
