# Splitting Multi-Queries

### Importing Packages

In [1]:
from prompts import *
import os
import key
from model import llm_init
from langchain_core.output_parsers import StrOutputParser
import re
from pprint import pprint


### Utility Functions

In [12]:
def queries_to_list(result):
    pattern = re.compile(r'([Qq]uery|[Ss]tart query) (\d+): "([^"]+)"')

    queries = {}

    matches = pattern.findall(result)

    for match in matches:
        query_number = f'Query {match[1]}'
        query_text = match[2]
        queries[query_number] = query_text

    pprint(queries)
    return queries

### Setting up LLM

In [2]:
if not os.environ.get("OPENAI_API_KEY"):
    key.init()
    assert os.environ.get('OPENAI_API_KEY')
    
llm = llm_init()

# This is for reference
test_input = "Im interested in air temperature near surface at a monthly resolution and precipitation flux at a daily resolution"


### Greet the user and ask for an input

In [3]:
# initial_query = input("Hello! please make any inquiries you have about the CMIP dataset. You may make multiple inquiries at once.")
# print('You asked: ', initial_query)

### Have LLM split queries

In [4]:
#TODO: The word "query" needs to be present in output otherwise the dictionary will be empty
prompt = get_split_prompt()
chain = prompt | llm | StrOutputParser()
result = chain.invoke(test_input)
print(result)

Split query 1: "I'm interested in air temperature near surface at a monthly resolution."
Split query 2: "I'm interested in precipitation flux at a daily resolution."


### Extract queries

In [5]:
pattern = re.compile(r'([Qq]uery|[Ss]tart query) (\d+): "([^"]+)"')

queries = {}

matches = pattern.findall(result)

for match in matches:
    query_number = f'Query {match[1]}'
    query_text = match[2]
    queries[query_number] = query_text

pprint(queries)

{'Query 1': "I'm interested in air temperature near surface at a monthly "
            'resolution.',
 'Query 2': "I'm interested in precipitation flux at a daily resolution."}


### Confirm split queries with user

In [10]:
satisfied = False

while not satisfied:
    print("Here are the individual queries that were extracted from your input:\n")
    print(result)
    yes_no = input("Would you like to proceed with these queries? (yes/no)")

    if 'yes' in yes_no.lower():
        print("Great! Let's proceed.")
        satisfied = True

    elif 'no' in yes_no.lower():
        adjustments = input("What adjustments would you like to make to the queries?")
        adjustments_prompt = get_adjustment_prompt()
        chain = adjustments_prompt | llm | StrOutputParser()
        result = chain.invoke({'initial_queries':result, 'adjustment':adjustments})
        print(result)
        
print("We are now ready to proceed with the queries.")

Here are the individual queries that were extracted from your input:

Adjusted queries:
Query 1: "I'm interested in surface air temperature at a monthly resolution."
Query 2: "I'm interested in precipitation at a daily resolution."
Adjusted queries:
Query 1: "I'm interested in surface air temperature at a monthly resolution."
Query 2: "I'm interested in precipitation at a daily resolution."
Query 3: "I'm also interested in surface air temperature at a daily resolution."
Here are the individual queries that were extracted from your input:

Adjusted queries:
Query 1: "I'm interested in surface air temperature at a monthly resolution."
Query 2: "I'm interested in precipitation at a daily resolution."
Query 3: "I'm also interested in surface air temperature at a daily resolution."
Great! Let's proceed.
We are now ready to proceed with the queries.


In [13]:
extracted_queries = queries_to_list(result)

{'Query 1': "I'm interested in surface air temperature at a monthly "
            'resolution.',
 'Query 2': "I'm interested in precipitation at a daily resolution.",
 'Query 3': "I'm also interested in surface air temperature at a daily "
            'resolution.'}


In [14]:
#TODO: integrate with Sonia's retrieval code to get data for each query
#TODO: Integrate this into a UI
#TODO: See how to resolve the issue of the word "query" not being present in the output
#TODO: clean up code and choose better variable names