# List learning - recall

In [13]:
list_length = 50
interjection_length = 200

import numpy as np
import matplotlib.pyplot as plt
from helpers import * # general helper functions
%reload_ext autoreload
%autoreload 2

## task-specific helper functions

In [14]:
def plot_recall_precision(true_list, output_list, interjected_list=[]):
    subset_of_words = [w for w in true_list if w[0] in ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']]
    recall = [w in output_list for w in subset_of_words]
    precision = [w in set(subset_of_words) for w in np.unique(output_list)]
    ncols = 2
    i_bool = len(interjected_list) >0
    if i_bool:
        precision_int = [w in set(interjected_list) for w in np.unique(output_list)]
        ncols = 3
    
    fig,ax = plt.subplots(1,ncols)
    ax[0].plot(recall)
    ax[0].set_title('Recall')
    ax[0].set_xlabel('Word in true list')
    ax[0].set_ylabel('Present in recalled list')

    ax[1].plot(precision)
    ax[1].set_title('Precision')
    ax[1].set_xlabel('Word in output (unique)')
    ax[1].set_ylabel('Present in true list')

    if i_bool:
        ax[2].plot(precision_int)
        ax[2].set_title('False alarms to interjections')
        ax[2].set_xlabel('Word in output (unique)')
        ax[2].set_ylabel('Present in interjection')

## define task

In [None]:
all_words = txt_to_list('_data/wasnorm_wordpool.txt')

words = subsample_words(all_words, list_length, seed=0)

interjection_words = subsample_words(all_words, interjection_length, seed=0, avoid=words)

In [None]:
# example formats of task inputs

simple_inp = make_inp(
    words,
    preamble = "Here is a list of words:",
    cue = "Please list all words from the list, in any order you'd like. " \
        "Do not think step by step, only output the list of words."
)
print(simple_inp)

interjection_inp = make_inp(
    words,
    preamble = "Here is a list of words:",
    cue = "Please list all words from the first list you saw, in any order you'd like. " \
        "Do not output words from the interjecting list. " \
        "Do not think step by step, only output the list of words.",
    interjection = "You will now see another list of words.",
    interjection_words = interjection_words,
)
print(interjection_inp)

gated_inp = make_inp(
    words,
    preamble = "Here is a list of words:",
    cue = "Please list all words that start with the letters a,b,c,d,e,f,g,h. " \
        "List these words in any order you'd like without listing any other words provided in the list. " \
        "Do not think step by step, only output the list of words.",
)
print(gated_inp)

gated_informed_inp = make_inp(
    words,
    preamble = "Here is a list of words. After seeing this list, " \
        "list all the words starting with a,b,c,d,e,f,g,h. Make sure to not reproduce any other words:",
    cue = "Please list all words that start with the letters a,b,c,d,e,f,g,h. " \
        "List these words in any order you'd like without listing any other words provided in the list. " \
        "Do not think step by step, only output the list of words.",
)
print(gated_informed_inp)

gated_informed_interjection_inp = make_inp(
    words,
    preamble = "Here is a list of words. After seeing this list, " \
        "list all the words starting with a,b,c,d,e,f,g,h. Make sure to not reproduce any other words:",
    cue = "Please list the words from the first list you saw, but only the words that " \
        "start with the letters a,b,c,d,e,f,g,h. List these words in any order you'd like without " \
        "listing any other words provided in the list, or in the intervening list. " \
        "Do not think step by step, only output the list of words.",
    interjection = "You will now see another list of words. " \
        "Later, when reproducing the first list of words, you should not reproduce these words:",
    interjection_words = interjection_words,
)
print(gated_informed_interjection_inp)

Here is a list of words: knuckle, vehicle, man, bank, employee, perch, ozone, list, whistle, garbage, onion, igloo, roach, deodorant, pearl, cheddar, fleet, cheek, van, bulletin, senate, zucchini, earring, plumber, telephone, sap, chimney, giraffe, butcher, traitor, boss, sugar, hall, oatmeal, quail, button, house, microphone, goo, scout, sister, cub, proton, monastery, pill, enemy, biologist, prince, antler, bulb. Please list all words from the list, in any order you'd like. Do not think step by step, only output the list of words.
Here is a list of words: knuckle, vehicle, man, bank, employee, perch, ozone, list, whistle, garbage, onion, igloo, roach, deodorant, pearl, cheddar, fleet, cheek, van, bulletin, senate, zucchini, earring, plumber, telephone, sap, chimney, giraffe, butcher, traitor, boss, sugar, hall, oatmeal, quail, button, house, microphone, goo, scout, sister, cub, proton, monastery, pill, enemy, biologist, prince, antler, bulb. You will now see another list of words. le

## run task

In [None]:
pipe = make_pipe(model_id = "Qwen/Qwen3-4B-Instruct-2507")

In [None]:
for inp in [simple_inp, interjection_inp, gated_inp, gated_informed_inp, gated_informed_interjection_inp]:
    lst_words = query_model(pipe, inp)
    print(lst_words)
    plot_recall_precision(words, lst_words)