In [8]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.utilities import SQLDatabase
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

from langchain.prompts import PromptTemplate
from langchain_openai import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool, StructuredTool, tool

from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.memory import VectorStoreRetrieverMemory
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, PodSpec
from langchain_pinecone import PineconeVectorStore
from langchain.tools.retriever import create_retriever_tool

from dotenv import load_dotenv
import os

from dotenv import load_dotenv
import os

In [2]:
# Check if .env file exists
if os.path.exists('.env'):
    load_dotenv() 
    
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_ACCESS_TOKEN")

In [3]:
def get_schema(_):
    db_shema = """
    
    CREATE TABLE xero_accounts (
    "_id" text NOT NULL,
    "_sdc_batched_at" timestamptz NULL,
    "_sdc_extracted_at" timestamptz NULL,
    "_sdc_received_at" timestamptz NULL,
    "_sdc_sequence" int8 NULL,
    "_sdc_table_version" int8 NULL,
    account_id text NULL,
    add_to_watchlist bool NULL,
    bank_account_number text NULL,
    bank_account_type text NULL,
    "class" text NULL values: Expense and Others
    code text NULL,
    currency_code text NULL,
    enable_payments_to_account bool NULL,
    form_account_id text NULL,
    has_attachments bool NULL,
    "name" text NULL,
    reporting_code text NULL,
    reporting_code_name text NULL,
    show_in_expense_claims bool NULL,
    status text NULL,
    system_account text NULL,
    tax_type text NULL,
    "type" text NULL,
    updated_date_utc text NULL,
    CONSTRAINT xero_accounts_pkey PRIMARY KEY ("_id")
    );
    CREATE TABLE transactions (
    "_id" text NOT NULL,
    "_sdc_batched_at" timestamptz NULL,
    "_sdc_extracted_at" timestamptz NULL,
    "_sdc_received_at" timestamptz NULL,
    "_sdc_sequence" int8 NULL,
    "_sdc_table_version" int8 NULL,
    account_id text NULL,
    bank_account_code text NULL,
    bank_account_id text NULL,
    bank_account_name text NULL,
    bank_transaction_id text NULL,
    contact_id text NULL,
    contact_name text NULL,
    currency_code text NULL,
    "date" timestamptz NULL,
    is_reconciled bool NULL,
    line_amount_types text NULL,
    reference text NULL,
    status text NULL,
    sub_total float8 NULL,
    total float8 NULL,
    total_tax float8 NULL,
    "type" text NULL,
    updated_utc timestamptz NULL,
    CONSTRAINT transactions_pkey PRIMARY KEY ("_id")
    );
    CREATE TABLE transactions__line_items (
    "_sdc_batched_at" timestamptz NULL,
    "_sdc_level_0_id" int8 NOT NULL,
    "_sdc_received_at" timestamptz NULL,
    "_sdc_sequence" int8 NULL,
    "_sdc_source_key__id" text NOT NULL,
    "_sdc_table_version" int8 NULL,
    account_code text NULL,
    account_id text NULL,
    description text NULL,
    line_amount float8 NULL,
    line_item_id text NULL,
    quantity float8 NULL,
    tax_amount float8 NULL,
    tax_type text NULL,
    unit_amount float8 NULL,
    CONSTRAINT transactions__line_items_pkey PRIMARY KEY
    ("_sdc_level_0_id","_sdc_source_key__id")
    );
        
    """
    return db_shema


In [4]:
from langchain_core.output_parsers import StrOutputParser

In [None]:
sql_query_prompt = """Based on the table schema below, write a SQL query that would answer the user's question:
{schema}
Question: {question}
SQL Query:"""

document_retrival_prompt = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Relevant pieces of previous conversation:
{history}

(You do not need to use these pieces of information if not relevant)

Current conversation:
Human: {input}
AI:"""

In [6]:


sql_prompt = ChatPromptTemplate.from_template(sql_query_prompt)

llm = ChatOpenAI()

sql_query_chain = (
    RunnablePassthrough.assign(schema=get_schema)
    | sql_prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
    )

question = """Can you share the total expenses for a given account_id in a given date range. The output
should include account name and total expense"""

sql_query_chain.invoke({"question": question})

'SELECT x."name" AS account_name, SUM(t.total) AS total_expense\nFROM xero_accounts x\nJOIN transactions t ON x.account_id = t.account_id\nWHERE x.account_id = \'your_account_id\'\nAND t."date" >= \'start_date\'\nAND t."date" <= \'end_date\'\nAND t."type" = \'Expense\'\nGROUP BY x."name";'

In [10]:
openai_api_key = os.environ.get("OPENAI_ACCESS_TOKEN")
pinecone_api_key = os.environ.get("PINECONE_API_KEY")
pc = Pinecone(api_key=pinecone_api_key)

index_name = 'llm-for-sql-agent'
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 512,
 'index_fullness': 0.00012,
 'namespaces': {'': {'vector_count': 12}},
 'total_vector_count': 12}

In [11]:
# Process a PDF and create embeddings
EMBEDDING_MODEL = "text-embedding-3-small"
TEXT_DIMENSION = index.describe_index_stats()['dimension']

embeddings = OpenAIEmbeddings(
    model=EMBEDDING_MODEL,
    openai_api_key=openai_api_key,
    dimensions=TEXT_DIMENSION
)

vectorstore = PineconeVectorStore(
    index_name=index_name,
    embedding=embeddings)

retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))

memory = VectorStoreRetrieverMemory(retriever=retriever)


document_chain = ChatPromptTemplate(
    input_variables=["history", "input"], template=_DEFAULT_TEMPLATE
)
conversation_with_summary = ConversationChain(
    llm=llm,
    prompt=PROMPT,
    memory=memory,
    verbose=True
)

query = "How much was the taxi ride?"
conversation_with_summary.predict(input=query)