In [85]:
from langchain_community.utilities import SQLDatabase
from langchain.chains import create_sql_query_chain
from langchain_openai import ChatOpenAI
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings

In [86]:
CUSTOMER_ID = "00000000-6360-4bbb-f78c-b5001f9e006c"

db = SQLDatabase.from_uri("sqlite:///syny.db", sample_rows_in_table_info=3)

llm = ChatOpenAI()
chain = create_sql_query_chain(llm, db)

context = db.get_context()

In [87]:
examples = [
    {
        "input": "Qual é o meu consumo total de gás?",
        "query": "select sum(value) from historic_data where type = 'gas' and customer_id = '{customer_id}'",
    },
    {
        "input": "Qual é o meu consumo médio de gás?",
        "query": "SELECT avg(value) FROM historic_data where type = 'gas' and customer_id = '{customer_id}'",
    }    
]

example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    OpenAIEmbeddings(),
    FAISS,
    k=5,
    input_keys=["input"],
)

In [88]:
example_prompt = PromptTemplate.from_template("User input: {input}\nSQL query: {query}")
prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="""You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct SQLite query to run, then look at the results of the query and return the answer.
Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most 5 results.
You can order the results by a relevant column to return the most interesting examples in the database.
Never query for all the columns from a specific table, only ask for the relevant columns given the question.
You have access to tools for interacting with the database.
Only use the below tools. Only use the information returned by the below tools to construct your final answer.
You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.

DO NOT list table name or any information about the schema.

Only use the tables below\n\n
CREATE TABLE IF NOT EXISTS "historic_data"(
"device_twin_variable_history_id" TEXT, "device_twin_id" TEXT, "customer_id" TEXT, "device_twin_variable_id" TEXT,
 "name" TEXT, "category" TEXT, "type" TEXT, "kind" TEXT,
 "value" TEXT, "unit" TEXT, "meta" TEXT, "created_at" TEXT,
 "alias" TEXT);

 Some tips on how to query the historic_data table
 To measure consumption aggregate by the value column.

 To measure consumption by utility, filter by the type column.

 To query for a specific time window filter by created_at column.

 To query for a specific customer filter by the customer_id column.
 """,
    suffix="User input: {input}\nSQL query: ",
    input_variables=["input", "top_k", "table_info", "customer_id"],
)

In [91]:
prompt.format(input="qual a minha leitura de gás hoje?", top_k=3, table_info="historic_data", customer_id=CUSTOMER_ID)

chain = create_sql_query_chain(llm, db, prompt)

ValueError: Prompt must have input variables: 'input', 'top_k', 'table_info'. Received prompt with input variables: ['input']. Full prompt:

input_variables=['input'] example_selector=SemanticSimilarityExampleSelector(vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x1386cbfe0>, k=5, example_keys=None, input_keys=['input'], vectorstore_kwargs=None) example_prompt=PromptTemplate(input_variables=['input', 'query'], template='User input: {input}\nSQL query: {query}') suffix='User input: {input}\nSQL query: ' prefix='You are an agent designed to interact with a SQL database.\nGiven an input question, create a syntactically correct SQLite query to run, then look at the results of the query and return the answer.\nUnless the user specifies a specific number of examples they wish to obtain, always limit your query to at most 5 results.\nYou can order the results by a relevant column to return the most interesting examples in the database.\nNever query for all the columns from a specific table, only ask for the relevant columns given the question.\nYou have access to tools for interacting with the database.\nOnly use the below tools. Only use the information returned by the below tools to construct your final answer.\nYou MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.\n\nDO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.\n\nDO NOT list table name or any information about the schema.\n\nOnly use the tables below\n\n\nCREATE TABLE IF NOT EXISTS "historic_data"(\n"device_twin_variable_history_id" TEXT, "device_twin_id" TEXT, "customer_id" TEXT, "device_twin_variable_id" TEXT,\n "name" TEXT, "category" TEXT, "type" TEXT, "kind" TEXT,\n "value" TEXT, "unit" TEXT, "meta" TEXT, "created_at" TEXT,\n "alias" TEXT);\n\n Some tips on how to query the historic_data table\n To measure consumption aggregate by the value column.\n\n To measure consumption by utility, filter by the type column.\n\n To query for a specific time window filter by created_at column.\n\n To query for a specific customer filter by the customer_id column.\n '

In [84]:
result = chain.invoke(
    {
        "question": "when was my last gas consumption?"
    }
)

if result == "N/A":
    print("please reach out to sac@syny.com.br")
else:
    output = db.run(result)
    print(output)

KeyError: "Input to FewShotPromptTemplate is missing variables {'customer_id'}.  Expected: ['customer_id', 'input', 'table_info'] Received: ['input', 'table_info']"