In [1]:
import os
from dotenv import load_dotenv
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI, OpenAI
from qdrant_client import QdrantClient, models
import qdrant_client
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain_core.prompts import PromptTemplate, FewShotPromptTemplate
import logging
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains import (
    StuffDocumentsChain, LLMChain, ConversationalRetrievalChain
)

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
logging.basicConfig(level=logging.DEBUG)

In [2]:
# Set the model name for LLM
OPENAI_MODEL = "gpt-3.5-turbo"

# Store API key as a variable
openai_api_key = st.secrets["OPENAI_API_KEY"]

In [3]:
def get_vector_store():
    #Create a client to connect to Qdrant server
    client = qdrant_client.QdrantClient(
        st.secrets["QDRANT_HOST"],
        api_key=st.secrets["QDRANT_API_KEY"]
        )
    
    #initialize embeddings for vector store
    embeddings = OpenAIEmbeddings(
        api_key=openai_api_key,
        model="text-embedding-3-large"
    )
    
    # create vector_store with Qdrant and embeddings
    vector_store = Qdrant(
        client = client,
        collection_name = st.secrets["QDRANT_COLLECTION_NAME"],
        embeddings = embeddings,
    )
    
    return vector_store

In [4]:
vector_store = get_vector_store()

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'


In [5]:
def setup_prompt_template():
    prefix="""You are a leases chatbot. You answer questions relating to ASC 842 under US GAAP. You respond to the queries as shown in 
    the examples. The responses do not have to be brief. Giving a thorough response with citations from the source documents is more
    important than brevity. Each response will be followed by reference from multiple sources with section numbers or page numbers (which
    is in the meta data) from the context documents. The responses will be provided only from the provided PDF source documents.  
    The responses will be clear and helpful and will use language that is easy to understand. Responses will include examples and 
    potential scenarios.  If the answer is not available in the PDF source documents, the response will be "I do not have information related 
    to that specific scenario, please seek guidance from a qualified expert." If the question is not on the topic of leases, respond by 
    saying, "This is outside the scope of what I can help you with. Let's get back to lease accounting." 
    
    You will answer the input question based on the provided context:
    
    <context>
    {context}
    </context>
    
    You will use the provided examples for guidance on how to construct your responses. Your responses should be similar."""
     
     # Define examples to instruct app how to respond
    examples = [
        {
            "input": "How do I determine the different lease components?",
            "answer": """Lease components are elements of the arrangement that provide the customer with the right to use an 
            identified asset. An entity should consider the right to use an underlying asset to be a separate lease component 
            if the following 2 conditions are met: 1. Lessee can benefit from the ROU asset either on its own or together with 
            other readily available resources and 2. The ROU is neither highly dependent on; nor highly interrelated with the 
            other ROU(s) in the contract. References: KPMG-leaseshandbook section 4.1, PWC-leasesguide0124, section 2.4, 
            EY-financial-reporting-developments-lease-accounting, section 1.4"""
        },
        {
            "input": "I am a lessor, how do I account for lease modifications?",
            "answer": """Several questions must be answered to determine how to appropriately account for lease modifications for 
            a lessor. Is the modified contract a lease, or does it contain a lease? If yes, does the modification result in a 
            separate contract? If yes, account for two separate contracts: the unmodified original contract, and a separate 
            contract accounted for in the same manner as any other new lease. If the modification does not result in a separate 
            contract, remeasure and reallocate the remaining consideration in the contract, reassess the lease classification at 
            the modification effective date, and account for any initial direct costs, lease incentives, and other paymetns made to 
            or by the lessor. Whether or not the lease classification changes, and how it changes drives the appropriate accounting. 
            References: EY - Financial Reporting Developments: lease accounting, section 5.6, PWC - Leases Guide, section 5.6, 
            KPMG - Leases Handbook, section 7.6"""
        }
    ]
    
    #Define format for examples:
    example_format = "\nQuestion: {input}\n\nAnswer: {answer}"
    
    example_prompts = [example_format.format(**ex) for ex in examples]
    
    example_template = PromptTemplate(input_variables=['input', 'context'],
                                      template=example_format)
    
    
    
    full_prompt = f"{prefix}\n\n" + "\n\n".join(example_prompts) + "\n\nQuestion: {input}\n\nAnswer: "
    
    # enriched_history = history + [(input, full_prompt)]
    
    #Define suffix for query
    suffix="\n\nQuestion: {input}\nAnswer: "
    
    #Construct FewShotPromptTemplate
    prompt_template = FewShotPromptTemplate(
                                            examples=examples,
                                            example_prompt=example_template,
                                            input_variables=['input','context'],
                                            prefix=prefix,
                                            suffix=suffix,
                                            example_separator="\n\n")
    return prompt_template

In [None]:
test_prompt = setup_prompt_template()
print(test_prompt)

In [None]:
# prompt_template = setup_prompt_template

In [6]:
prefix="""You are a leases chatbot. You answer questions relating to ASC 842 under US GAAP. You respond to the queries as shown in 
    the examples. The responses do not have to be brief. Giving a thorough response with citations from the source documents is more
    important than brevity. Each response will be followed by reference from multiple sources with section numbers or page numbers (which
    is in the meta data) from the context documents. The responses will be provided only from the provided PDF source documents.  
    The responses will be clear and helpful and will use language that is easy to understand. Responses will include examples and 
    potential scenarios.  If the answer is not available in the PDF source documents, the response will be "I do not have information related 
    to that specific scenario, please seek guidance from a qualified expert." If the question is not on the topic of leases, respond by 
    saying, "This is outside the scope of what I can help you with. Let's get back to lease accounting." 
    
    You will answer the input question based on the provided context:
    
    <context>
    {context}
    </context>
    
    You will use the provided examples for guidance on how to construct your responses. Your responses should be similar."""

In [7]:
examples = [
        {
            "input": "How do I determine the different lease components?",
            "answer": """Lease components are elements of the arrangement that provide the customer with the right to use an 
            identified asset. An entity should consider the right to use an underlying asset to be a separate lease component 
            if the following 2 conditions are met: 1. Lessee can benefit from the ROU asset either on its own or together with 
            other readily available resources and 2. The ROU is neither highly dependent on; nor highly interrelated with the other 
            ROU(s) in the contract. You can find additional information in the following reference documents. References: 
            KPMG-leaseshandbook section 4.1, PWC-leasesguide0124, section 2.4, EY-financial-reporting-developments-lease-accounting, section 1.4"""
        },
        {
            "input": "I am a lessor, how do I account for lease modifications?",
            "answer": """Several questions must be answered to determine how to appropriately account for lease modifications for 
            a lessor. Is the modified contract a lease, or does it contain a lease? If yes, does the modification result in a 
            separate contract? If yes, account for two separate contracts: the unmodified original contract, and a separate 
            contract accounted for in the same manner as any other new lease. If the modification does not result in a separate 
            contract, remeasure and reallocate the remaining consideration in the contract, reassess the lease classification at 
            the modification effective date, and account for any initial direct costs, lease incentives, and other paymetns made to 
            or by the lessor. Whether or not the lease classification changes, and how it changes drives the appropriate accounting. 
            You can find additional information in the following reference documents. References: EY - Financial Reporting Developments: 
            lease accounting, section 5.6, PWC - Leases Guide, section 5.6, KPMG - Leases Handbook, section 7.6"""
        }
    ]

In [8]:
example_prompt = PromptTemplate(input_variables=['input', 'context'],
                                      template="\nQuestion: {input}\n\nAnswer: {answer}") 
print(example_prompt.format(**examples[0]))


Question: How do I determine the different lease components?

Answer: Lease components are elements of the arrangement that provide the customer with the right to use an 
            identified asset. An entity should consider the right to use an underlying asset to be a separate lease component 
            if the following 2 conditions are met: 1. Lessee can benefit from the ROU asset either on its own or together with 
            other readily available resources and 2. The ROU is neither highly dependent on; nor highly interrelated with the other 
            ROU(s) in the contract. You can find additional information in the following reference documents. References: 
            KPMG-leaseshandbook section 4.1, PWC-leasesguide0124, section 2.4, EY-financial-reporting-developments-lease-accounting, section 1.4


In [9]:
suffix="\n\nQuestion: {input}\nAnswer: "

In [10]:
prompt_template = FewShotPromptTemplate(
                                        examples=examples,
                                        example_prompt=example_prompt,
                                        input_variables=['input','context'],
                                        prefix=prefix,
                                        suffix=suffix,
                                        example_separator="\n")

In [None]:
print(prompt_template.format(input="What are the disclosure requirements under ASC 842?", context = "the disclosures are terrible!"))

In [11]:
#initialize prompt template
prompt_template = setup_prompt_template()
print(prompt_template)

input_variables=['context', 'input'] examples=[{'input': 'How do I determine the different lease components?', 'answer': 'Lease components are elements of the arrangement that provide the customer with the right to use an \n            identified asset. An entity should consider the right to use an underlying asset to be a separate lease component \n            if the following 2 conditions are met: 1. Lessee can benefit from the ROU asset either on its own or together with \n            other readily available resources and 2. The ROU is neither highly dependent on; nor highly interrelated with the \n            other ROU(s) in the contract. References: KPMG-leaseshandbook section 4.1, PWC-leasesguide0124, section 2.4, \n            EY-financial-reporting-developments-lease-accounting, section 1.4'}, {'input': 'I am a lessor, how do I account for lease modifications?', 'answer': 'Several questions must be answered to determine how to appropriately account for lease modifications for \

In [12]:
input = "What are the disclosure requirements under ASC 842?"
llm = ChatOpenAI(api_key=openai_api_key, model=OPENAI_MODEL)
retriever = vector_store.as_retriever()
history_aware_retriever_chain = create_history_aware_retriever(llm, retriever, prompt_template)

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'


In [None]:
from langchain_core.messages import HumanMessage, AIMessage

chat_history = [HumanMessage(content="In what ways does ASC 842 intersect with ASC 606?"), AIMessage(content="ASC 842 and ASC 606 intersect in several ways. ASC 842 aligns certain concepts with ASC 606, such as determining whether the transfer of an asset is a sale in a sale and leaseback transaction. Additionally, ASC 842 requires lessors to apply specific guidance in ASC 606 to their leasing transactions, particularly for the allocation of consideration in the contract. The FASB clarified that existing unmodified contracts do not need to revisit the allocation of contract consideration to lease components upon the adoption of ASC 842, except when lease classification changes. Overall, the alignment between ASC 842 and ASC 606 aims to provide consistency and clarity in lease and revenue recognition accounting practices.")]

In [None]:
chat_history = " "
context = "I hate acconting and I don't understand the technical language so need a very basic explanation as if you were describing it to a child."

In [None]:
history_aware_retriever_chain.invoke({"chat_history": chat_history, "context": context,
                        "input": input})

In [13]:
# following example from here:
# https://github.com/langchain-ai/langchain/discussions/16002
# Kept getting hung up on context. commented out get_prompt_template() function and using the 
# separate part code to build the template.  The {context} will be the documents retrieved with
# the history aware retriever (I think?). I think the history aware retriever will retrieve docs and feed
# them into the prompt template as context. Then the document chain takes the context and stuffs it into a place (?)
# where it can be leveraged as the source for the LLM to retrieve the answer from.

document_chain = create_stuff_documents_chain(llm, prompt_template)

In [14]:
# create retriever that will pass in the context documents (?)

# retriever has already been define above, if put this into a function, may need to 
# define the retriever again.

retrieval_chain = create_retrieval_chain(history_aware_retriever_chain, document_chain)

In [15]:
# invoke the retrieval chain

response = retrieval_chain.invoke({'input': input})

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x00000169F1F01120>, 'json_data': {'input': [[3923, 527, 279, 28957, 8670, 1234, 20382, 220, 25377, 30]], 'model': 'text-embedding-3-large', 'encoding_format': 'base64'}}
DEBUG:httpcore.connection:connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=None socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x00000169EC2BBB50>
DEBUG:httpcore.connection:start_tls.started ssl_context=<ssl.SSLContext object at 0x00000169F0D4ADE0> server_hostname='api.openai.com' timeout=None
DEBUG:httpcore.connection:start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x00000169EF452A10>
DEBUG:httpcore.http11:send_request_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_headers.complete
DEBUG:http

In [16]:
print(response['answer'])

The disclosure requirements under ASC 842 aim to provide users of financial statements with qualitative and quantitative information to assess the amount, timing, and uncertainty of cash flows arising from leases. This information is crucial for stakeholders to understand the impact of leases on an entity's financial position and performance.

For lessees, the disclosure requirements include providing information about their leases, significant judgments made in applying the lease accounting requirements, and the amounts recognized in the financial statements relating to those leases. This information should be presented in a clear and transparent manner, both qualitatively and quantitatively. Lessees should consider the level of detail necessary to meet the disclosure objective and balance the emphasis placed on various requirements to avoid obscuring useful information.

For lessors, the disclosure requirements are similar and include information about the nature of their leases, sig