In [21]:
from pathlib import Path
import json
import itertools
import random
from uuid import uuid4

In [22]:
model = [
    '/nfs/public/hf/models/meta-llama/Meta-Llama-3-8B-Instruct',
    'mistralai/Mistral-7B-Instruct-v0.3',
    'google/gemma-7b',
    'microsoft/Phi-3-mini-128k-instruct',
    'microsoft/Phi-3-small-128k-instruct',
]

shots = [
    0,
    1,
    3
]

shot_examples = [
    'Question: In 2027, what will be the GDP of Zambia? Process: 1. Needed to use historic data to predict the GDP of Zambia in 2027 2. Data was found on the GDP of Zambia between 2010 and 2022 3. A predictive model was fit to this data 4. The answer was estimated using this predictive model. Selection: [1, 4]',
    'Question: Which country in Western Europe had the lowest energy consumption in 2017? Process: 1. An answer using historic data on energy consumption was estimated. 2. 9 countries in Western Europe, including Netherlands, Monaco and Belgium, were found. 3. The energy consumption for each of these countries in 2017 was found. 4. The answer was estimated by comparing the data on energy consumption. Selection: [1, 2]',
    'Question: In 2029, what will be the GDP of the country in South-eastern Asia which had the highest GDP in 2014? Process: The country in South-eastern Asia that had the highest GDP in 2014 had to be found first. 2. To find this, a list of countries located in South-eastern Asia was needed. 3. 11 countries that are in South-eastern Asia were found, including Brunei Darussalam, Cambodia and Myanmar. 4. For each of these countries, the GDP in 2014 had to be found. 5. Data on the GDP of these 11 countries in 2014 was found. 6. The country with the highest GDP in 2014 was found by comparing these values, and was Singapore. 7. Next, the GDP of Singapore in 2029 needed to be estimated. 8. Found data on the GDP of Singapore between 2009 and 2022. 9. A linear regression model was fit to this data. 10. The linear regression model was used to predict the GDP of Singapore in 2029. Selection: [1, 4, 7, 10]',
]

system_prompts = [
    "Your role is to select, from a list of steps, those that are most important for inclusion in a summary explanation of that process. Format your selection as a list. Use the following example to understand your task: {EXAMPLE}. Only output your selection in the form of a list and nothing else.",
    # "Your role is to select, from a list the steps, those that are most important for inclusion in a summary explanation of that process. Format your output as a list, for example {EXAMPLE}. Output only your selection in the form of a list and nothing else.. List of selections: [",
    # "Select from the list of steps those that are most important for inclusion in a summary explanation of the process. Format your output as a list, for example {EXAMPLE}. Output only your selection in the form of a list and nothing else..",
    # "Select from the list of steps those that are most important for inclusion in a summary explanation of the process. Format your output as a list, for example {EXAMPLE}. Output only your selection in the form of a list and nothing else.. List of selections: [",
    # "Select the two steps from the following list that are most important for inclusion in a summary explanation of the process. Format your output as a list, for example {EXAMPLE}. Output only your selection in the form of a list and nothing else..",
    # "Select four steps from the following list that are most important for inclusion in a summary explanation of the process. Format your output as a list, for example {EXAMPLE}. Output only your selection in the form of a list and nothing else..",
]

examples = [
    # "[1, 2, 3]",
    # "[1, 2, 3, 4, 5]",
    "[7, 3, 1, 9, 4]",
    # "[2, 9, 8, 4, 1]"
]

temperatures = [
    0.3,
    0.5,
    0.7
]

In [23]:
queue = {}

for shot in shots:
    if shot == 0:
        EXAMPLE = random.choice(examples)

    elif shot == 1:
        EXAMPLE = random.choice(shot_examples)

    elif shot == 3:
        EXAMPLE = shot_examples

    for m in model:
        for t in temperatures:
            for s in system_prompts:
                if shot == 3:
                    formatted_example = '; '.join(EXAMPLE)
                else:
                    formatted_example = EXAMPLE

                system_prompt = s.format(EXAMPLE=formatted_example)

                queue[f"{m.split('/')[-1]}:shot={shot}:temp={t}"] = {
                    "MODEL": m,
                    "TEMPERATURE": t,
                    "BATCH_SIZE": 20,
                    "SYSTEM_CONTENT": system_prompt,
                    "EXAMPLE": EXAMPLE,
                    "SHOT": shot,
                    "INPUT": 'resources/data/select.json',
                    "DESCRIPTION": f"{m.split('/')[-1]}:shot={shot}:temp={t}",
                    "RUN_ID": uuid4().hex,
                    "FORMAT_VERSION": 2,
                }

In [24]:
# queue.json is a file with a dict which contains config dicts. open the file and add to the queue dict
queue_path = Path('queue.json')
if queue_path.exists():
    with queue_path.open('r') as f:
        queue_dict = json.load(f)
        queue_dict.update(queue)

    with queue_path.open('w') as f:
        json.dump(queue_dict, f, indent=4)
