In [1]:
import os
from dotenv import load_dotenv
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI, OpenAI
from qdrant_client import QdrantClient, models
import qdrant_client
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain_core.prompts import PromptTemplate, FewShotPromptTemplate
import logging
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
logging.basicConfig(level=logging.DEBUG)

In [2]:
# Set the model name for LLM
OPENAI_MODEL = "gpt-3.5-turbo"

# Store API key as a variable
openai_api_key = st.secrets["OPENAI_API_KEY"]

In [3]:
def get_vector_store():
    #Create a client to connect to Qdrant server
    client = qdrant_client.QdrantClient(
        st.secrets["QDRANT_HOST"],
        api_key=st.secrets["QDRANT_API_KEY"]
        )
    
    #initialize embeddings for vector store
    embeddings = OpenAIEmbeddings(
        api_key=openai_api_key,
        model="text-embedding-3-large"
    )
    
    # create vector_store with Qdrant and embeddings
    vector_store = Qdrant(
        client = client,
        collection_name = st.secrets["QDRANT_COLLECTION_NAME"],
        embeddings = embeddings,
    )
    
    return vector_store

In [4]:
vector_store = get_vector_store()

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'


In [12]:
# Create function to setup prompt template
def setup_prompt_template():
    prefix="""You are a leases chatbot. You answer questions relating to ASC 842 under US GAAP. You respond to the queries as shown in 
    the examples. The responses do not have to be brief. Giving a thorough response with citations from the source documents is more
    important than brevity. Each response will be followed by reference from multiple sources with section numbers or page numbers (which
    is in the meta data) from the source documents. The responses will be provided only from the provided PDF source documents.  
    The responses will be clear and helpful and will use language that is easy to understand. Responses will include examples and 
    potential scenarios.  If the answer is not avaiable in the PDF source documents, the response will be "I do not have information related 
    to that specific scenario, please seek guidance from a qualified expert." If the question is not on the topic of leases, respond by 
    saying, "This is outside the scope of what I can help you with. Let's get back to lease accounting." 
    
    Context from source documents:
    {context}

    Proceed with answering the following questions based on the above context:"""
     
     # Define examples to instruct app how to respond
    examples = [
        {
            "context": "Detailed context on leases derived from source documents.",
            "query": "How do I determine the different lease components?",
            "answer": """Lease components are elements of the arrangement that provide the customer with the right to use an 
            identified asset. An entity should consider the right to use an underlying asset to be a separate lease component 
            if the following 2 conditions are met: 1. Lessee can benefit from the ROU asset either on its own or together with 
            other readily available resources and 2. The ROU is neither highly dependent on; nor highly interrelated with the 
            other ROU(s) in the contract. You can find additional information in the following reference documents. References: 
            KPMG-leaseshandbook section 4.1, PWC-leasesguide0124, section 2.4, EY-financial-reporting-developments-lease-
            accounting, section 1.4"""
        },
        {
            "context": "Background information on lease modifications from the document.",
            "query": "I am a lessor, how do I account for lease modifications?",
            "answer": """Several questions must be answered to determine how to appropriately account for lease modifications for 
            a lessor. Is the modified contract a lease, or does it contain a lease? If yes, does the modification result in a 
            separate contract? If yes, account for two separate contracts: the unmodified original contract, and a separate 
            contract accounted for in the same manner as any other new lease. If the modification does not result in a separate 
            contract, remeasure and reallocate the remaining consideration in the contract, reassess the lease classification at 
            the modification effective date, and account for any initial direct costs, lease incentives, and other paymetns made to 
            or by the lessor. Whether or not the lease classification changes, and how it changes drives the appropriate accounting. 
            You can find additional information in the following reference documents. References: EY - Financial Reporting Developments: 
            lease accounting, section 5.6, PWC - Leases Guide, section 5.6, KPMG - Leases Handbook, section 7.6"""
        }
    ]
    
    example_prompt = PromptTemplate(input_variables=["query", "answer"],
                                      template="\nQuestion: {query}\nAnswer: {answer}")    
    #Define suffix for query
    suffix="\n\nHuman: {query}\nAI: "
    
    #Construct FewShotPromptTemplate
    prompt_template = FewShotPromptTemplate(
                                            examples=examples,
                                            example_prompt=example_prompt,
                                            input_variables=["input"],
                                            prefix=prefix,
                                            suffix=suffix,
                                            example_separator="\n")

    return prompt_template

In [14]:
prompt_template = setup_prompt_template

In [32]:
prefix="""You are a leases chatbot. You answer questions relating to ASC 842 under US GAAP. You respond to the queries as shown in 
    the examples. The responses do not have to be brief. Giving a thorough response with citations from the source documents is more
    important than brevity. Each response will be followed by reference from multiple sources with section numbers or page numbers (which
    is in the meta data) from the source documents. The responses will be provided only from the provided PDF source documents.  
    The responses will be clear and helpful and will use language that is easy to understand. Responses will include examples and 
    potential scenarios.  If the answer is not avaiable in the PDF source documents, the response will be "I do not have information related 
    to that specific scenario, please seek guidance from a qualified expert." If the question is not on the topic of leases, respond by 
    saying, "This is outside the scope of what I can help you with. Let's get back to lease accounting." """

In [33]:
examples = [
        {
            "input": "How do I determine the different lease components?",
            "answer": """Lease components are elements of the arrangement that provide the customer with the right to use an 
            identified asset. An entity should consider the right to use an underlying asset to be a separate lease component 
            if the following 2 conditions are met: 1. Lessee can benefit from the ROU asset either on its own or together with 
            other readily available resources and 2. The ROU is neither highly dependent on; nor highly interrelated with the 
            other ROU(s) in the contract. You can find additional information in the following reference documents. References: 
            KPMG-leaseshandbook section 4.1, PWC-leasesguide0124, section 2.4, EY-financial-reporting-developments-lease-
            accounting, section 1.4"""
        },
        {
            "input": "I am a lessor, how do I account for lease modifications?",
            "answer": """Several questions must be answered to determine how to appropriately account for lease modifications for 
            a lessor. Is the modified contract a lease, or does it contain a lease? If yes, does the modification result in a 
            separate contract? If yes, account for two separate contracts: the unmodified original contract, and a separate 
            contract accounted for in the same manner as any other new lease. If the modification does not result in a separate 
            contract, remeasure and reallocate the remaining consideration in the contract, reassess the lease classification at 
            the modification effective date, and account for any initial direct costs, lease incentives, and other paymetns made to 
            or by the lessor. Whether or not the lease classification changes, and how it changes drives the appropriate accounting. 
            You can find additional information in the following reference documents. References: EY - Financial Reporting Developments: 
            lease accounting, section 5.6, PWC - Leases Guide, section 5.6, KPMG - Leases Handbook, section 7.6"""
        }
    ]

In [34]:
example_prompt = PromptTemplate(input_variables=["input", "answer"],
                                      template="\nInput: {input}\n\nAnswer: {answer}") 
print(example_prompt.format(**examples[0]))


Input: How do I determine the different lease components?

Answer: Lease components are elements of the arrangement that provide the customer with the right to use an 
            identified asset. An entity should consider the right to use an underlying asset to be a separate lease component 
            if the following 2 conditions are met: 1. Lessee can benefit from the ROU asset either on its own or together with 
            other readily available resources and 2. The ROU is neither highly dependent on; nor highly interrelated with the 
            other ROU(s) in the contract. You can find additional information in the following reference documents. References: 
            KPMG-leaseshandbook section 4.1, PWC-leasesguide0124, section 2.4, EY-financial-reporting-developments-lease-
            accounting, section 1.4


In [38]:
suffix="\n\nInput: {input}\nAnswer: "

In [39]:
prompt_template = FewShotPromptTemplate(
                                        examples=examples,
                                        example_prompt=example_prompt,
                                        input_variables=["input"],
                                        prefix=prefix,
                                        suffix=suffix,
                                        example_separator="\n")

In [40]:
print(prompt_template.format(input="What are the disclosure requirements under ASC 842?"))

You are a leases chatbot. You answer questions relating to ASC 842 under US GAAP. You respond to the queries as shown in 
    the examples. The responses do not have to be brief. Giving a thorough response with citations from the source documents is more
    important than brevity. Each response will be followed by reference from multiple sources with section numbers or page numbers (which
    is in the meta data) from the source documents. The responses will be provided only from the provided PDF source documents.  
    The responses will be clear and helpful and will use language that is easy to understand. Responses will include examples and 
    potential scenarios.  If the answer is not avaiable in the PDF source documents, the response will be "I do not have information related 
    to that specific scenario, please seek guidance from a qualified expert." If the question is not on the topic of leases, respond by 
    saying, "This is outside the scope of what I can help you with.

In [6]:
#initialize prompt template
prompt_template = setup_prompt_template()
print(prompt_template)

input_variables=['context', 'query'] examples=[{'context': 'Detailed context on leases derived from source documents.', 'input': 'How do I determine the different lease components?', 'answer': 'Lease components are elements of the arrangement that provide the customer with the right to use an \n            identified asset. An entity should consider the right to use an underlying asset to be a separate lease component \n            if the following 2 conditions are met: 1. Lessee can benefit from the ROU asset either on its own or together with \n            other readily available resources and 2. The ROU is neither highly dependent on; nor highly interrelated with the \n            other ROU(s) in the contract. You can find additional information in the following reference documents. References: \n            KPMG-leaseshandbook section 4.1, PWC-leasesguide0124, section 2.4, EY-financial-reporting-developments-lease-\n            accounting, section 1.4'}, {'context': 'Background inf

In [7]:
query = "What are the disclosure requirements under ASC 842?"
llm = ChatOpenAI(api_key=openai_api_key, model=OPENAI_MODEL)
retriever = vector_store.as_retriever()

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'


In [8]:
rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.hub.langchain.com:443
DEBUG:urllib3.connectionpool:https://api.hub.langchain.com:443 "GET /commits/langchain-ai/chat-langchain-rephrase/?limit=100&offset=0 HTTP/1.1" 200 944
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.hub.langchain.com:443
DEBUG:urllib3.connectionpool:https://api.hub.langchain.com:443 "GET /commits/langchain-ai/chat-langchain-rephrase/fb7ddb56be11b2ab10d176174dae36faa2a9a6ba13187c8b2b98315f6ca7d136 HTTP/1.1" 200 512


In [11]:

retriever_chain = create_history_aware_retriever(llm, retriever, prompt_template)


ValueError: Expected `input` to be a prompt variable, but got ['context', 'query']

In [10]:
chain.invoke({"input": "...", "chat_history": })

SyntaxError: expression expected after dictionary key and ':' (323553273.py, line 1)

In [None]:
# combine_docs_chain = create_stuff_documents_chain(llm, prompt_template)
# retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [None]:
# #initialize create retrieval chain (experiment)
# def initialize_createrc(vector_store):
#     llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0, api_key=openai_api_key)
#     retriever=vector_store.as_retriever()
#     contextualize_q_system_prompt = (""""Given a chat history and the latest user question "
#     "which might reference context in the chat history, "
#     "formulate a standalone question which can be understood "
#     "without the chat history. Do NOT answer the question, just "
#     "reformulate it if needed and otherwise return it as is.""")
    
#     contextualize_q_prompt = ChatPromptTemplate.from_messages(
#         [
#             ("system", contextualize_q_system_prompt),
#             MessagesPlaceholder("chat_history"),
#             ("human", "{input}"),
#         ]
#     )
#     history_aware_retriever = create_history_aware_retriever(
#         llm, retriever, contextualize_q_prompt
#     )
    
#     # setup for answering the question
#     qa_system_prompt = (
#         """You are a leases chatbot. You answer questions relating to ASC 842 under US GAAP. You respond to the queries as shown in 
#         the examples. Each response will be followed by reference from multiple sources with section numbers from the source documents. 
#         The responses will be provided only from the provided PDF source documents.  The responses will be clear and helpful and will use 
#         language that is easy to understand. Responses will include examples and potential scenarios.  If the answer is not avaiable in
#         the PDF source documents, the response will be "I do not have information related to that specific scenario, please seek guidance
#         from a qualified expert." """
#     )
#     qa_prompt = ChatPromptTemplate.from_messages(
#         [
#             ("system", qa_system_prompt),
#             MessagesPlaceholder("retrieved_documents"),
#             ("human", "{context}"),
#         ]
#     )
    
#     # Combine the documents and use the LLM to generate an answer
#     stuff_documents_chain = create_stuff_documents_chain(llm, qa_prompt)
    
#     # Create the complete retrieval chain
#     crc = create_retrieval_chain(history_aware_retriever, stuff_documents_chain)

#     return crc

In [None]:
# #initialize conversational retrieval chain
# def initialize_crc(vector_store):
#     llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0, api_key=openai_api_key)
#     retriever=vector_store.as_retriever()
#     crc = ConversationalRetrievalChain.from_llm(llm, retriever)
#     # st.session_state['crc'] = crc
#     # st.success('Source documents loaded!')
#     return crc 