In [101]:
# Load all runs
import os
gp = '/home/kloudvoj/devel/prompt_optimization/runs/'
runs = os.listdir(gp)

In [144]:
# Filter runs based on model and dataset
import json
def parse_field(ident, field):
    with open(gp + ident + '/run_args.json', 'r') as f:
        return json.load(f)[field]
# filter runs by model
models = [
    "gpt-4o-mini", # 0
    "microsoft/Phi-3.5-mini-instruct", # 1
    "meta-llama/Llama-3.2-3B-Instruct", # 2
    "CohereForAI/aya-expanse-8b", # 3
    "mistralai/Mistral-Nemo-Instruct-2407", # 4
    "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4" # 5
] 

by_model = lambda run_list, model: list(filter(lambda fd: parse_field(fd, 'model') == model, run_list))
datasets = [ 
    "openai/gsm8k", # 0
    "microsoft/orca-math-word-problems-200k", # 1
    "maveriq/bigbenchhard/snarks", # 2
    "maveriq/bigbenchhard/navigate", # 3
    "GBaker/MedQA-USMLE-4-options", # 4
    "cais/mmlu/college_physics", # 5
]

by_ds = lambda run_list, ds: list(filter(lambda fd: parse_field(fd, 'ds') == ds, run_list))
by_min_iters = lambda run_list, iters: list(filter(lambda fd: parse_field(fd, 'max_iters') >= iters, run_list))
by_min_pop = lambda run_list, pop: list(filter(lambda fd: parse_field(fd, 'initial_population_size') >= pop, run_list))

In [50]:
def display_args(ident, args): 
    with open(gp + ident + '/run_args.json', 'r') as f:
        a = json.load(f)
    for key in args: 
        value = a[key]
        print(f"{key}: {value}")

In [91]:
# Comparison functions
from tabulate import tabulate
def compare_runs(idents, args):
    table = []
    for arg in args:
        if type(arg) != list:
            arg = [arg]
        table.append(['/'.join(arg)])
        for ident in idents:
            with open(gp + ident + '/run_args.json', 'r') as f:
                j = json.load(f) 
                table[-1].append('/'.join(str(j.get(a)) for a in arg))
    headers = ["RUN NUMBER", *idents]
    print(tabulate(table, headers=headers, tablefmt="fancy_grid"))
            
from IPython.display import SVG, display_html

# Function to create an HTML div to display images side by side
def display_side_by_side_svgs(*filenames):
    svg_divs = ''.join([f'<div style="display:inline-block; margin:10px;">{SVG(filename).data}</div>' for filename in filenames])
    display_html(svg_divs, raw=True)

def compare_plots(idents, plot_names):
    for p in plot_names:
        paths = [gp+i+'/plots/'+p+'.svg' for i in idents]
        display_side_by_side_svgs(*paths)

In [162]:
# Filter runs and compare their params and plots
a = by_model(runs, models[4])
a = by_ds(a, datasets[5])
a = by_min_pop(a, 10)
params = ["model", "ds", ["initial_population_size", "mating_pool_size"], ["max_iters","train_batch_size"],["temp", "sol_temp"], ["filter_similar_method","filter_th"], ["points_range", "sentences_per_point_range"],["metapersonas", "metastyles"]]
plots = ["steps", "average_semantic_similarity"]
compare_runs(a, params)
#compare_plots(a, plots)

╒══════════════════════════════════════════╤══════════════════════════════════════╤══════════════════════════════════════╕
│ RUN NUMBER                               │ 9214463                              │ 9214462                              │
╞══════════════════════════════════════════╪══════════════════════════════════════╪══════════════════════════════════════╡
│ model                                    │ mistralai/Mistral-Nemo-Instruct-2407 │ mistralai/Mistral-Nemo-Instruct-2407 │
├──────────────────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┤
│ ds                                       │ cais/mmlu/college_physics            │ cais/mmlu/college_physics            │
├──────────────────────────────────────────┼──────────────────────────────────────┼──────────────────────────────────────┤
│ initial_population_size/mating_pool_size │ 30/20                                │ 30/20                                │
├───────────────

In [140]:
def load_sep_results(ident):
    out = {"prompt": [], "usage": [], "score": []}
    with open(gp+ident+'/results.ndjson', 'r') as f:
        for l in f.readlines()[1:]:
            loaded_obj = json.loads(l)
            obj_type = loaded_obj.pop('type')
            out[obj_type].append(loaded_obj)
    return out



In [164]:
def compare_best_in_gen(gen, idents):
    for ident in idents:
        results = load_sep_results(ident)
        prompts = results['prompt']
        g = prompts[(gen-1)*20:gen*20]

        best = sorted(g, key = lambda x: x['avg_fitness'], reverse=True)
        print(f"Best in step {gen} with avg fitness {best[0]['avg_fitness']}:\n {best[0]['traits'][0][0]}")

compare_best_in_gen(10, a)

Best in step 10 with avg fitness 0.8:
 "First off, figure out what kind of question you're dealing with - it's either a multiple-choice science one, a simple math problem, or a physics riddle about forces and motion. Next up, grasp the crucial info in the question, like the type of radiation, electron levels, or forces at play. Lastly, apply the right science rule or formula to solve it, or use the elimination method for multiple-choice questions."
Best in step 10 with avg fitness 0.6:
 1. **Identify the main topic**: Focus on the key scientific concept or principle being tested in the question.
2. **Analyze the question**: Break down the question to understand what's being asked and what information is relevant.
3. **Apply relevant formulas/principles**: Recall and apply appropriate formulas, principles, or theories from physics, chemistry, or other sciences.
4. **Calculate/derive the answer**: Perform any necessary calculations or logical deductions to find the answer.
5. **Match wit