In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

In [2]:
import numpy as np

from evallm.experiments.sequence_completion_summary import (
    prompts_by_key as s_prompts,
    current_setting as s_setting,
)

from evallm.experiments.transducer_summary import prompt_by_key_default as t_prompts

from evallm.experiments.sequence_completion.sequence_completion_experiments import (
    get_examples,
)

In [3]:
dfa_s, (eg_s, *_) = get_examples(0, s_setting)

In [4]:
def display_prompt(p):
    return rf"\textsc{{{p}}}$_S$"

In [5]:
prompts = {}
prompts["S"] = {}
prompts["T"] = {}
for k in t_prompts:
    prompts["T"][k] = t_prompts[k]["chat"].display_prompt(
        ["<TRANSDUCER PREFIX>"], [0], is_chat=True
    )["user"]
for k in s_prompts:
    if k == "Basic-Commas":
        continue
    prompts["S"][k] = s_prompts[k](s_setting).display_prompt(
        dfa_s, [["<EXAMPLES>"]], ["<PREFIX>"], is_chat=False
    )

In [6]:
table = r"\begin{tabular}{|r|l|l|}"
table += "\n"
table += r"\hline"
table += "\n"
table += r"\bf Prompt & \bf $T$ & \bf $S$\\"
table += "\n"
table += r"\hline"
table += "\n"
for p in prompts["S"]:
    table += rf"\textsc{{{p}}}"
    table += "&"
    for k in "TS":
        table += r"\begin{minipage}{2.5in}\begin{lstlisting}"
        table += "\n"
        table += prompts[k][p]
        table += "\n"
        table += r"\end{lstlisting}\end{minipage}"
        table += "&"
    table = table.rstrip("&")
    table += r"\\"
    table += "\n"
    table += r"\hline"
table += r"\end{tabular}"

In [7]:
print(table)

\begin{tabular}{|r|l|l|}
\hline
\bf Prompt & \bf $T$ & \bf $S$\\
\hline
\textsc{Basic}&\begin{minipage}{2.5in}\begin{lstlisting}
You are a sequence completion model. Output the next element of the sequence, and nothing else.

<TRANSDUCER PREFIX>, 
\end{lstlisting}\end{minipage}&\begin{minipage}{2.5in}\begin{lstlisting}
The following strings come from an alien language that follows a simple grammar. Infer the alien grammar using the example strings. Then, add a suffix to the final string using between 1 and 5 characters such that the full string follows the grammar. Output only the necessary suffix to complete the final string, and nothing else.

<EXAMPLES>
<PREFIX>
\end{lstlisting}\end{minipage}\\
\hline\textsc{Basic-COT}&\begin{minipage}{2.5in}\begin{lstlisting}
You are a sequence completion model. Reason step by step, and then output the next output integer using <answer> tags, like <answer>0</answer>.

Input sequence: <TRANSDUCER PREFIX>
Output sequence: 
\end{lstlisting}\end{minipa

In [8]:
t_i, t_o = t_prompts["Basic"]["chat"].input_output(dfa_s, np.random.RandomState(0))

In [9]:
def listingify(title, s):
    result = rf"\subsubsection{{{title}}}"
    result += "\n"
    result += r"\begin{lstlisting}"
    result += "\n"
    result += s
    result += "\n"
    result += r"\end{lstlisting}"
    return result

In [10]:
for p in t_prompts:
    print(
        listingify(
            rf"\textsc{{{p}}}\texorpdfstring{{$_T$}}{{T}}",
            t_prompts[p]["chat"].display_prompt(t_i, t_o, is_chat=True)["user"],
        )
    )
for p in s_prompts:
    print(
        listingify(
            rf"\textsc{{{p}}}\texorpdfstring{{$_S$}}{{S}}",
            s_prompts[p](s_setting).display_prompt(dfa_s, *eg_s, is_chat=False),
        )
    )

\subsubsection{\textsc{Basic}\texorpdfstring{$_T$}{T}}
\begin{lstlisting}
You are a sequence completion model. Output the next element of the sequence, and nothing else.

a, 1, b, 1, a, 1, b, 1, b, 1, c, 0, a, 1, c, 1, a, 1, a, 1, a, 1, c, 1, b, 1, c, 0, c, 1, a, 1, b, 1, b, 1, b, 1, b, 1, a, 1, b, 1, a, 1, a, 1, b, 1, c, 0, a, 1, c, 1, a, 1, b, 
\end{lstlisting}
\subsubsection{\textsc{Basic-COT}\texorpdfstring{$_T$}{T}}
\begin{lstlisting}
You are a sequence completion model. Reason step by step, and then output the next output integer using <answer> tags, like <answer>0</answer>.

Input sequence: a, b, a, b, b, c, a, c, a, a, a, c, b, c, c, a, b, b, b, b, a, b, a, a, b, c, a, c, a, b
Output sequence: 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 
\end{lstlisting}
\subsubsection{\textsc{More-Expl}\texorpdfstring{$_T$}{T}}
\begin{lstlisting}
You are a sequence completion model. The following sequence is generated from an unknown but consistent gr