In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [None]:
from loguru import logger
from langchain.callbacks import FileCallbackHandler
from datetime import date

today = date.today()

logfile = f"logs/full_pipeline_{today.year}_{today.month}_{today.day}.log"
logger.add(logfile, format="{message}", colorize=False, enqueue=True)
handler = FileCallbackHandler(logfile)

In [None]:
from lib import create_cube_selection_chain, fetch_cubes_descriptions, parse_all_cubes, fetch_cube_sample, fetch_dimensions_triplets

cube_selection_settings = {
    "temperature": 0.5,
    "top_p": 0.5
}
cubes = fetch_cubes_descriptions()
cube_selection_chain = create_cube_selection_chain(api_key=OPENAI_API_KEY, handler=handler, **cube_selection_settings)

# question = f"sum of emission of CO2 for industry between year 2009 and 2011"
question = f"get average of emission of Methane for transport between years 2007 and 2005"

cube_selection_response = await cube_selection_chain.ainvoke({
    "cubes": cubes,
    "question": question,
})
cube_selection_response = cube_selection_response['text']

logger.info("========== CUBES RESPONSE ================")
logger.info(f"{cube_selection_response}")

In [None]:
selected_cubes = parse_all_cubes(cube_selection_response)
selected_cube = selected_cubes[0]

logger.info("========== SELECTED FIRST CUBE ================")
logger.info(selected_cube)

In [None]:
cube_and_sample = fetch_cube_sample(selected_cube)

# print(cube_and_sample)

In [None]:
dimensions_triplets = fetch_dimensions_triplets(selected_cube)

# print(dimenstions_triplets)

In [None]:
from lib import create_query_generation_chain

query_generation_settings = {
    "temperature": 0.2,
    "top_p": 0.1
}

generation_chain = create_query_generation_chain(api_key=OPENAI_API_KEY, handler=handler, **query_generation_settings)

query_generation_response = await generation_chain.ainvoke({
    "cube_and_sample": cube_and_sample,
    "dimensions_triplets": dimensions_triplets,
    "cube": selected_cube,
    "question": question,
})
query_generation_response = query_generation_response['text']

logger.info("========== QUERY GENERATION RESPONSE ================")
logger.info(f"{query_generation_response}")

In [None]:
from lib import run_query

result = run_query(query_generation_response)

logger.info("=========== QUERY RESULT ============")
logger.info(result)