#### Task B

In [None]:
import os
import json
from llm_hlp import next_queries_gemini, next_queries_gpt

In [None]:
with open('OpenAI_token.txt', 'r') as file:
    content = file.read()
    os.environ["OPENAI_API_KEY"] = content

with open('Gemini_token.txt', 'r') as file:
    content = file.read()
    os.environ["GEMINI_API_KEY"] = content

In [None]:
# read in data
utterances = {}
with open('predetermined_utterances_test.json') as f:
    utterances = json.load(f)
    
pairs = {}
with open('predetermined_utterance_response_pairs_test.json') as f:
    pairs = json.load(f)

In [None]:
combined = {}
for ut_id in utterances:
    tmp = []
    for ut in utterances[ut_id]:
        tmp.append((ut, pairs[ut]))
    combined[ut_id] = tmp

with open('combined_data_task_B.json', 'w') as f:
    json.dump(combined, f, indent = 2)

In [None]:
example_prompt = "You are tasked with writing potential next queries for a multiple given queries and already received answers by a conversational search system."
# change this example prompt
# you can also change the instruction IF YOU KNOW WHAT YOU ARE DOING
# if not, please just let them be
prompt_instructions = 'You are required to return EXACTLY %NUMOFQUERIES% potential next queries, NOTHING ELSE. Queries need to be distinct from each other. Order them in descending order by probability of them being the next query. Return these next %NUMOFQUERIES% queries as a Python-style list. These are the previous queries (preceded by "?") as well as the corresponding received responses by the conversational system (preceded by ">"):\n'

In [None]:
# set these variables

use_gpt = False
modelname = 'gemini-2.0-flash' # 'gpt-4.1-nano' # 'gemini-2.0-flash' #'gemini-2.5-flash-preview-05-20' # 'gemini-2.0-flash'

name = 'this-is-my-name' 
strategy = 'This is my strategy ......' +\
    '_____ PROMPT: ' + example_prompt + ' _____ INSTRUCTION: ' + prompt_instructions + ' _____ LLM: ' + modelname

In [None]:
if not os.path.exists('task_B_automated_' + modelname + '--' + name + '.json'):
    with open('task_B_automated_' + modelname + '--' + name + '.json', 'w') as f:
        json.dump({'name': name, 'strategy': strategy}, f, indent = 2)

In [None]:

with open('combined_data_task_B.json') as f:
    combined_pairs = json.load(f)
with open('task_B_automated_' + modelname + '--' + name + '.json') as f:
    next_utterances = json.load(f)    
    
for ut_id in combined_pairs:
    if ut_id not in next_utterances or len(next_utterances[ut_id]) < 10:
        text_for_llm = ''
        for a, b in combined_pairs[ut_id]:
            text_for_llm += '?: ' + a + '\n'
            text_for_llm += '>: ' + b + '\n'
            text_for_llm += '--\n'
        
        next_new_queries = []
        already_entered_queries = []
        already_entered_queries_for_llm = ''
        if ut_id in next_utterances:
            next_new_queries = next_utterances[ut_id]
        
            for i in range(len(next_new_queries)):
                already_entered_queries.append(next_new_queries[i])
                already_entered_queries_for_llm += next_new_queries[i] + ', '
                
        if len(already_entered_queries) > 0:
            curr_prompt_instructions = ' These queries have already been written as potential next ones, YOU CANNOT REPEAT THEM: ' + already_entered_queries_for_llm[:-2] + '. ' + prompt_instructions
        else:
            curr_prompt_instructions = prompt_instructions
        
        print(example_prompt + ' ' + curr_prompt_instructions.replace('%NUMOFQUERIES%', str(10 - len(already_entered_queries))) + ' ' + text_for_llm)

        if use_gpt:
            potential_next_utterances, error_occurred = next_queries_gpt(text_for_llm, example_prompt, 10 - len(already_entered_queries), curr_prompt_instructions, modelname)
        else:
            potential_next_utterances, error_occurred = next_queries_gemini(text_for_llm, example_prompt, 10 - len(already_entered_queries), curr_prompt_instructions, modelname)
            
        for new_query in potential_next_utterances:
            if len(next_new_queries) < 11:
                next_new_queries.append(new_query)
        next_utterances[str(ut_id)] = next_new_queries
                
        with open('task_B_automated_' + modelname + '--' + name + '.json', 'w') as f:
            json.dump(next_utterances, f, indent = 2)
    
        print('These ' + str(len(next_new_queries)) + ' options have been entered by the LLM as potential next queries:')
        for i in range(len(next_new_queries)):
            print(str(i + 1) + ' -- ' + next_new_queries[i])
        
        if len(potential_next_utterances) + len(already_entered_queries) != 10:
            print('Please run this query again, there have only been ' + str(len(next_new_queries)) + ' queries.')

        print('Thanks!')
        break

if len(next_utterances) == 35:
    print('Nice job, well done! Please send your `task_B_automated_' + modelname + '--' + name + '.json` file to Christin. Thank you so much! :)')