In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import json
import re
import io
import types
import ast
import numpy as np
import pandas as pd
os.chdir("/root/FYP")
from src.prompt_utils import format_dataframe_str, string_format

from src.utils import from_jsonl

In [None]:
ANALYSE_PROMPT = """
Output an analysis of your answer in the <analysis> tag.
Explain the underlying reasoning that the question is asking for.
Explain whether this is truly reflected in your answer.
Take a deep breath and work on this problem step-by-step.
If you believe your answer is correct, do not make unnecessary changes.
If you believe your answer is incorrect, explain what needs to be changed to ensure correctness.
Output the final code in the <python> tag.
"""

ANALYSE_ERR_PROMPT = """
Your solution was executed in the stateful Jupyter notebook environment.
An error occurred during execution of the code you submitted: 
<error>
{error_text}
</error>

Output an analysis of your answer and the error in the <analysis> tag.
Explain the underlying reasoning that the question is asking for.
Explain whether this is truly reflected in your answer.
Take a deep breath and work on this problem step-by-step.
Explain what needs to be changed to ensure correctness.
Output the final code in the <python> tag.
"""

ANALYSE_ERR_OUTPUT_PROMPT = """
Your solution was executed in the stateful Jupyter notebook environment.
Here are the execution results of the code you submitted:
<results>
{output}
</results>

Output an analysis of your answer in the <analysis> tag.
Explain the underlying reasoning that the question is asking for.
Explain whether this is truly reflected in your answer and results.
Take a deep breath and work on this problem step-by-step.
If you believe your answer is correct, do not make unnecessary changes.
If you believe your answer is incorrect, explain what needs to be changed to ensure correctness.
Output the final code in the <python> tag.
"""

In [None]:
def extract_tables(html_string):
    try:
        return pd.read_html(io.StringIO(html_string))[0]
    except ValueError:
        match = re.search(r'<th>(.*?)</th>', html_string)
        if match:
            column_name = match.group(1)
            return pd.DataFrame(columns=[column_name])
        else:
            return pd.DataFrame()

def parse_outputs(v):
    if not v:
        return v
    if "<code>" in v:
        out = re.findall(r'<code>(.*?)<\/code>', v, re.DOTALL)[0]
        try:
            out = ast.literal_eval(out)
        except (SyntaxError, ValueError):
            pass
        return out
    elif "table" in v:
        return extract_tables(v)

def format_var(v):
    if isinstance(v, pd.DataFrame):
        v = format_dataframe_str(v, max_rows=3)
    elif isinstance(v, str) and "Name:" in v:
        lines = v.strip().split("\n")
        if len(lines) > 7:
            v = "\n".join(lines[:3] + ["..."] + lines[-3:])
    else:
        v = str(string_format(v))
    return v

def format_vars(output_html):
    output_vars = {k: parse_outputs(v) for k, v in output_html.items()}
    prompt = []
    for k, v in output_vars.items():
        if not isinstance(v, types.NoneType):
            if k == "__output__":
                prompt.append("OUTPUT:")
                prompt.append(format_var(v))
            else:
                prompt.append(f"NAME: {k}")
                prompt.append("VALUE:")
                prompt.append(format_var(v))
            prompt.append("\n")
    return "\n".join(prompt).strip()

In [None]:
def generate_analysis_prompt(output, eval_results):
    return [{"role": "assistant", "content": output}, {"role": "user", "content": ANALYSE_PROMPT}]

def generate_analysis_err_prompt(output, eval_results):
    msg = ANALYSE_PROMPT
    if eval_results.get("error_text"):
        msg = ANALYSE_ERR_PROMPT.format(error_text=eval_results["error_text"])     
    return [{"role": "assistant", "content": output}, {"role": "user", "content": msg}]

def generate_analysis_err_out_prompt(output, eval_results):
    msg = ANALYSE_PROMPT
    if eval_results.get("error_text"):
        msg = ANALYSE_ERR_PROMPT.format(error_text=eval_results["error_text"])
    elif eval_results.get("output_html"):
        msg = ANALYSE_ERR_OUTPUT_PROMPT.format(output=format_vars(eval_results["output_html"]))
    return [{"role": "assistant", "content": output}, {"role": "user", "content": msg}]

def generate_dervived_dataset(experiment_name, base_experiment_name, response_fn):
    dataset = json.loads(open(os.path.join("datasets", f"dataset.{base_experiment_name}.json")).read())
    config = json.loads(open(os.path.join("experiments", base_experiment_name, f"config.json")).read())
    for m in config["models"]:
        m = m.lower()
        evaluation = json.loads(open(os.path.join("experiments", base_experiment_name, f"predictions.{m}.json")).read())
        for ni, n in enumerate(dataset):
            for ti, t in enumerate(n["turns"]):
                outputs = evaluation[ni]["turns"][ti]["original"]
                eval_results = evaluation[ni]["turns"][ti]["eval_results"]
                t["metadata"]["initial_eval_results"] = eval_results
                t["user_messages"] = [response_fn(o, e) for o, e in zip(outputs, eval_results)]
        dataset_path = os.path.join("datasets", f"dataset.{experiment_name}.{m}.json")
        with open(dataset_path, "w") as f:
            f.write(json.dumps(dataset, indent=2))

In [None]:
generate_dervived_dataset("fyp.CoT+2S-DFS+OUT+SCH", "fyp.CoT-DFS+OUT+SCH", generate_analysis_prompt)

In [None]:
REFINE_PROMPT = """
Output an analysis of your answer in the <analysis> tag.
Explain the underlying reasoning that the question is asking for.
Explain whether this is truly reflected in your answer.
Take a deep breath and work on this problem step-by-step.
If you believe your answer is correct, do not make unnecessary changes.
If you believe your answer is incorrect, explain what needs to be changed to ensure correctness.
Output the final code in the <python> tag.
"""

def generate_response(output, eval_results):
    return [{"role": "assistant", "content": output}, {"role": "user", "content": REFINE_PROMPT}]


def build_multistep_dataset(experiment_name, response_fn):
    evaluation = json.loads(open(os.path.join("experiments", experiment_name, "predictions.llama3_instruct_70b.json")).read())
    dataset = json.loads(open(os.path.join("datasets", f"dataset.{experiment_name}.json")).read())
    for ni, n in enumerate(dataset):
        for ti, t in enumerate(n["turns"]):
            input_msgs = t["input_msgs"]
            outputs = evaluation[ni]["turns"][ti]["original"]
            eval_results = evaluation[ni]["turns"][ti]["eval_results"]
            t["user_responses"] = [response_fn(o, e) for o, e in zip(outputs, eval_results)]
            print(input_msgs)
            print(outputs[0])
            print(eval_results[0])
            print(t["user_responses"][0])
            break
        break 

build_multistep_dataset("fyp.with_dataframes", generate_response)

In [None]:
from src.prompt_utils import format_dataframe_str, string_format
import re
import pandas as pd
import io
import ast

x = 0

ERROR_PROMPT = """
Your solution was executed in the stateful Jupyter notebook environment.
An error occurred during execution of the code you submitted: 
<error_text>
{error_text}
</error_text>

Refine your code and rewrite it for clarity and correctness.
Please use the <scratchpad> tag to explain the error.
Output the correct code in the <python> tags.
"""



REFINE_PROMPT = """
Clean up, refine and rewrite your solution.
Think carefully about the question and your steps.
Use the <scratchpad> tag for steps.
Output code in the <python> tags.
"""

REFINE_PROMPT_OUT = """
Your solution was executed in the stateful Jupyter notebook environment.
Here is the outputs of the code you submitted:
<outputs>
{output}
</outputs>

Clean up, refine and rewrite your solution.
Think carefully about the question and your steps.
Use the <scratchpad> tag for steps.
Output code in the <python> tags.

Think carefully about what the question is asking for and the variables you are working with.
Make sure your logical reasoning steps are correct and reflect what the question really means.
Refine your steps and simplify your code for clarity, efficiency and correctness.

"""

RW, RW_EXEC_ERR, RW_EXEC_ERR_OUT

# REWRITE - *, NO EXEC
# REWRITE ERRS - RUNTIME ERRS, EXEC
# REWRITE ERRS + OUTPUTS - *, EXEC

# REFINE_PROMPT = 

from src.prompt_utils import get_num_tokens
import types

for n in dataset:
    for t in n["turns"]:
        error_texts = [p.get("error_text") for p in t["eval_results"]]
        output_vars = [{k: parse_outputs(v) for k, v in vars.get("output_html").items()} for vars in t["eval_results"] if vars.get("output_html")]
        outputs = zip(error_texts, output_vars)

        

                
        # print(ERROR_PROMPT.format(error_text=err_text[0]))

        