In [1]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.utilities import SQLDatabase
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

from langchain.prompts import PromptTemplate
from langchain_openai import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool, StructuredTool, tool
 
from dotenv import load_dotenv
import os

In [2]:
# Check if .env file exists
if os.path.exists('.env'):
    load_dotenv() 
    
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_ACCESS_TOKEN")

In [77]:
def get_schema(_):
    db_shema = """
    
    CREATE TABLE xero_accounts (
    "_id" text NOT NULL,
    "_sdc_batched_at" timestamptz NULL,
    "_sdc_extracted_at" timestamptz NULL,
    "_sdc_received_at" timestamptz NULL,
    "_sdc_sequence" int8 NULL,
    "_sdc_table_version" int8 NULL,
    account_id text NULL,
    add_to_watchlist bool NULL,
    bank_account_number text NULL,
    bank_account_type text NULL,
    "class" text NULL values: Expense and Others
    code text NULL,
    currency_code text NULL,
    enable_payments_to_account bool NULL,
    form_account_id text NULL,
    has_attachments bool NULL,
    "name" text NULL,
    reporting_code text NULL,
    reporting_code_name text NULL,
    show_in_expense_claims bool NULL,
    status text NULL,
    system_account text NULL,
    tax_type text NULL,
    "type" text NULL,
    updated_date_utc text NULL,
    CONSTRAINT xero_accounts_pkey PRIMARY KEY ("_id")
    );
    CREATE TABLE transactions (
    "_id" text NOT NULL,
    "_sdc_batched_at" timestamptz NULL,
    "_sdc_extracted_at" timestamptz NULL,
    "_sdc_received_at" timestamptz NULL,
    "_sdc_sequence" int8 NULL,
    "_sdc_table_version" int8 NULL,
    account_id text NULL,
    bank_account_code text NULL,
    bank_account_id text NULL,
    bank_account_name text NULL,
    bank_transaction_id text NULL,
    contact_id text NULL,
    contact_name text NULL,
    currency_code text NULL,
    "date" timestamptz NULL,
    is_reconciled bool NULL,
    line_amount_types text NULL,
    reference text NULL,
    status text NULL,
    sub_total float8 NULL,
    total float8 NULL,
    total_tax float8 NULL,
    "type" text NULL,
    updated_utc timestamptz NULL,
    CONSTRAINT transactions_pkey PRIMARY KEY ("_id")
    );
    CREATE TABLE transactions__line_items (
    "_sdc_batched_at" timestamptz NULL,
    "_sdc_level_0_id" int8 NOT NULL,
    "_sdc_received_at" timestamptz NULL,
    "_sdc_sequence" int8 NULL,
    "_sdc_source_key__id" text NOT NULL,
    "_sdc_table_version" int8 NULL,
    account_code text NULL,
    account_id text NULL,
    description text NULL,
    line_amount float8 NULL,
    line_item_id text NULL,
    quantity float8 NULL,
    tax_amount float8 NULL,
    tax_type text NULL,
    unit_amount float8 NULL,
    CONSTRAINT transactions__line_items_pkey PRIMARY KEY
    ("_sdc_level_0_id","_sdc_source_key__id")
    );
        
    """
    return db_shema

In [111]:
llm = ChatOpenAI()

from langchain_core.prompts import (
    ChatPromptTemplate,
    FewShotPromptTemplate,
    MessagesPlaceholder,
    PromptTemplate,
    SystemMessagePromptTemplate,
)

sql_prompt = """

You are an agent designed return correct usable SQL queries.
Given an input question, create a syntactically correct SQLite query only.
You DONOT have access for interacting with the database, so ALWAYS REMEMBER to return just the SQL queries.
You MUST double check your query to ensure it is correct. If you get an error while executing a query, rewrite the query and try again.

If the question does not seem related to the table schema in the database, just return "I don't know" as the answer.

Based on the table schema below, write a SQL query that would answer the user's question:
{schema}

Question: {input}
SQL Query:"""

sql_prompt_template=PromptTemplate.from_template(
    sql_prompt
)

full_sql_prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate(prompt=sql_prompt_template),
        ("human", "{input}"),
        MessagesPlaceholder("agent_scratchpad"),
    ]
)

sql_response = (
    RunnablePassthrough.assign(schema=get_schema)
    | full_sql_prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
    )

question = """Can you share the total expenses for a given account_id in a given date range. The output
should include account name and total expense"""

sql_response.invoke(
    {
        "input": question,
        "agent_scratchpad": []}
    )


"```sql\nSELECT x.name AS account_name, SUM(t.total) AS total_expense\nFROM xero_accounts x\nJOIN transactions t ON x.account_id = t.account_id\nWHERE x.account_id = 'your_account_id_here'\nAND t.date >= 'start_date_here' AND t.date <= 'end_date_here'\nAND t.type = 'Expense'\nGROUP BY x.name;\n```"

In [112]:
def get_query(question, llm, prompt):
    sql_response = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
    )

    return sql_response.invoke(
    {
        "input": question,
        "agent_scratchpad": []}
    )


def gen_sql_query_tool(query: str):
    return get_query(question=query, llm=llm, prompt=full_sql_prompt)

sql_query_generator = StructuredTool.from_function(
    func=gen_sql_query_tool,
    name="Search",
    description="this tool must be used for queries related to performing sql queries on a database",
)

question = """Can you share the total expenses for a given account_id in a given date range. The output
should include account name and total expense"""

print(sql_query_generator.run(question))

```sql
SELECT x."name" AS account_name, SUM(t.total) AS total_expense
FROM xero_accounts x
JOIN (
    SELECT tl."_sdc_source_key__id", SUM(tl.line_amount) AS total
    FROM transactions__line_items tl
    JOIN transactions t ON tl."_sdc_source_key__id" = t."_id"
    WHERE t.account_id = 'your_account_id_here' 
    AND t."date" BETWEEN 'start_date_here' AND 'end_date_here'
    GROUP BY tl."_sdc_source_key__id"
) t ON x."_id" = t."_sdc_source_key__id"
GROUP BY x."_id";
```


In [98]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.memory import VectorStoreRetrieverMemory
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, PodSpec
from langchain_pinecone import PineconeVectorStore
from langchain.tools.retriever import create_retriever_tool

from dotenv import load_dotenv
import os

In [99]:
load_dotenv() 
openai_api_key = os.environ.get("OPENAI_ACCESS_TOKEN")
pinecone_api_key = os.environ.get("PINECONE_API_KEY")

In [100]:
pc = Pinecone(api_key=pinecone_api_key)

In [101]:
index_name = 'llm-for-sql-agent'
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 512,
 'index_fullness': 0.00012,
 'namespaces': {'': {'vector_count': 12}},
 'total_vector_count': 12}

In [102]:
# Process a PDF and create embeddings
EMBEDDING_MODEL = "text-embedding-3-small"
TEXT_DIMENSION = index.describe_index_stats()['dimension']

embeddings = OpenAIEmbeddings(
    model=EMBEDDING_MODEL,
    openai_api_key=openai_api_key,
    dimensions=TEXT_DIMENSION
)

vectorstore = PineconeVectorStore(
    index_name=index_name,
    embedding=embeddings)

retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))

retriever_tool = create_retriever_tool(
    retriever,
    "document_search",
    "Search for information about documents not related to any SQL query. For any questions about Documents, you must use this tool!",
)

In [113]:
tools = [sql_query_generator, retriever_tool]

In [119]:
from langchain import hub

# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-functions-agent")

In [127]:
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [121]:
from langchain.agents import create_openai_functions_agent

agent = create_openai_functions_agent(llm, tools, prompt)

In [122]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [123]:
agent_executor.invoke({"input": "hi!"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mHello! How can I assist you today?[0m

[1m> Finished chain.[0m


{'input': 'hi!', 'output': 'Hello! How can I assist you today?'}

In [108]:
agent_executor.invoke({"input": "how much was the taxi fare?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `document_search` with `{'query': 'taxi fare'}`


[0m[33;1m[1;3minput: How much was the taxi ride?
response: The total cost of the taxi ride was 2300 NGN, including VAT.[0m[32;1m[1;3mThe taxi fare for the ride was 2300 NGN, including VAT.[0m

[1m> Finished chain.[0m


{'input': 'how much was the taxi fare?',
 'output': 'The taxi fare for the ride was 2300 NGN, including VAT.'}

In [125]:
agent_executor.invoke({"input": "write a suitable sql query to determine the number of transactions that occured?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Search` with `{'query': 'SELECT COUNT(*) FROM transactions'}`


[0m[36;1m[1;3mSELECT COUNT(*) FROM transactions;[0m[32;1m[1;3mThe SQL query to determine the number of transactions that occurred is: 

```
SELECT COUNT(*) FROM transactions;
```[0m

[1m> Finished chain.[0m


{'input': 'write a suitable sql query to determine the number of transactions that occured?',
 'output': 'The SQL query to determine the number of transactions that occurred is: \n\n```\nSELECT COUNT(*) FROM transactions;\n```'}

In [129]:
question = """Write SQL query to show the total expenses for a given account_id in a given date range. The output
should include account name and total expense"""
agent_executor.invoke({"input": question})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Search` with `{'query': "SELECT account_name, SUM(expense_amount) AS total_expense FROM expenses WHERE account_id = 'account_id' AND expense_date BETWEEN 'start_date' AND 'end_date' GROUP BY account_name"}`


[0m[36;1m[1;3mI don't know[0m[32;1m[1;3mI have provided the SQL query for you:

```sql
SELECT account_name, SUM(expense_amount) AS total_expense
FROM expenses
WHERE account_id = 'account_id' 
AND expense_date BETWEEN 'start_date' AND 'end_date'
GROUP BY account_name
``` 

You can replace 'account_id', 'start_date', and 'end_date' with your desired values when using the query.[0m

[1m> Finished chain.[0m


{'input': 'Write SQL query to show the total expenses for a given account_id in a given date range. The output\nshould include account name and total expense',
 'output': "I have provided the SQL query for you:\n\n```sql\nSELECT account_name, SUM(expense_amount) AS total_expense\nFROM expenses\nWHERE account_id = 'account_id' \nAND expense_date BETWEEN 'start_date' AND 'end_date'\nGROUP BY account_name\n``` \n\nYou can replace 'account_id', 'start_date', and 'end_date' with your desired values when using the query."}

In [133]:
question = """Who was the taxi fare Driver"""
agent_executor.invoke({"input": question})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Search` with `{'query': 'SELECT Driver FROM TaxiFare'}`


[0m[36;1m[1;3mI don't know[0m[32;1m[1;3mI apologize, but I am unable to provide information about the taxi fare driver at the moment.[0m

[1m> Finished chain.[0m


{'input': 'Who was the taxi fare Driver',
 'output': 'I apologize, but I am unable to provide information about the taxi fare driver at the moment.'}

In [None]:
# from langchain.prompts import (SystemMessagePromptTemplate, 
#                                HumanMessagePromptTemplate, 
#                                MessagesPlaceholder, 
#                                ChatPromptTemplate)

# system_template = """You are an intelligent agent"""
# system_message_prompt_template = SystemMessagePromptTemplate.from_template(
#     system_template)

# human_template = "{input}"
# human_message_prompt_template = HumanMessagePromptTemplate.from_template(human_template)

# message_placeholder = [MessagesPlaceholder(variable_name="conversation"), human_message_prompt_template]

# chat_prompt_template = ChatPromptTemplate.from_messages(
#     [system_message_prompt_template,
#      message_placeholder,
#      human_message_prompt_template])

# final_prompt = chat_prompt_template.format_prompt(
#                 sample_text="").to_messages()

# final_prompt

In [45]:
# # chat completion llm
# llm_chatmodel = ChatOpenAI(
#     openai_api_key=openai_api_key,
#     model_name='gpt-3.5-turbo',
#     temperature=0.0
# )

# memory = VectorStoreRetrieverMemory(retriever=retriever)

# _DEFAULT_TEMPLATE = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

# Relevant pieces of previous conversation:
# {history}

# (You do not need to use these pieces of information if not relevant)

# Current conversation:
# Human: {input}
# AI:"""
# PROMPT = PromptTemplate(
#     input_variables=["history", "input"], template=_DEFAULT_TEMPLATE
# )
# conversation_with_summary = ConversationChain(
#     llm=llm,
#     prompt=PROMPT,
#     memory=memory,
#     verbose=True
# )

# query = "How much was the taxi ride?"
# conversation_with_summary.predict(input=query)



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Relevant pieces of previous conversation:
input: How much was the taxi ride?
response: The total cost of the taxi ride was 2300 NGN, including VAT.

(You do not need to use these pieces of information if not relevant)

Current conversation:
Human: How much was the taxi ride?
AI:[0m

[1m> Finished chain.[0m


'The total cost of the taxi ride was 2300 NGN, including VAT.'

# Define Tools and Agents