In [7]:
import os
from dotenv import load_dotenv
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI, OpenAI
from qdrant_client import QdrantClient, models
import qdrant_client
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain_core.prompts import PromptTemplate, FewShotPromptTemplate
import logging
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
logging.basicConfig(level=logging.DEBUG)

In [2]:
# Set the model name for LLM
OPENAI_MODEL = "gpt-3.5-turbo"

# Store API key as a variable
openai_api_key = st.secrets["OPENAI_API_KEY"]

In [3]:
def get_vector_store():
    #Create a client to connect to Qdrant server
    client = qdrant_client.QdrantClient(
        st.secrets["QDRANT_HOST"],
        api_key=st.secrets["QDRANT_API_KEY"]
        )
    
    #initialize embeddings for vector store
    embeddings = OpenAIEmbeddings()
    
    # create vector_store with Qdrant and embeddings
    vector_store = Qdrant(client,
                          collection_name = st.secrets["QDRANT_COLLECTION_NAME"],
                          embeddings=embeddings)
    return vector_store

In [4]:
vector_store = get_vector_store()

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'


In [9]:
#initialize conversational retrieval chain
def initialize_crc(vector_store):
    llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0, api_key=openai_api_key)
    retriever=vector_store.as_retriever()
    crc = ConversationalRetrievalChain.from_llm(llm, retriever)
    # st.session_state['crc'] = crc
    # st.success('Source documents loaded!')
    return crc 

In [None]:
vector_store = get_vector_store()
crc = initialize_crc(vector_store)
print(crc)  # Inspect the crc object

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model=OPENAI_MODEL, temperature=0, api_key=openai_api_key),
    # chain_type="stuff",
    retriever = vector_store.as_retriever(metadata_fields=['metadata'])
    
)

In [None]:
query = "How does ASC 606 interact with ASC 842?"
chat_history = []
response = crc.run({
    'question': query,
    'chat_history': chat_history
})

print(response)

In [4]:
def get_vector_store():
    # Create a client to connect to Qdrant server
    client = qdrant_client.QdrantClient(
        st.secrets["QDRANT_HOST"],
        api_key=st.secrets["QDRANT_API_KEY"]
    )

    # Initialize embeddings for vector store
    embeddings = OpenAIEmbeddings()

    # Create a vector store with Qdrant and embeddings
    vector_store = Qdrant(client,
                          collection_name=st.secrets["QDRANT_COLLECTION_NAME"],
                          embeddings=embeddings)
    
    # Get the number of vectors in the Qdrant collection
    collection_info = client.get_collection(collection_name=st.secrets["QDRANT_COLLECTION_NAME"])
    vector_count = collection_info.points_count

    print(f"Vector store contains {vector_count} vectors.")

    return vector_store
    

In [11]:
#initialize create retrieval chain (experiment)
def initialize_createrc(vector_store):
    llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0, api_key=openai_api_key)
    retriever=vector_store.as_retriever()
    contextualize_q_system_prompt = (""""Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, just "
    "reformulate it if needed and otherwise return it as is.""")
    
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    history_aware_retriever = create_history_aware_retriever(
        llm, retriever, contextualize_q_prompt
    )
    
    # setup for answering the question
    qa_system_prompt = (
        """You are a leases chatbot. You answer questions relating to ASC 842 under US GAAP. You respond to the queries as shown in 
        the examples. Each response will be followed by reference from multiple sources with section numbers from the source documents. 
        The responses will be provided only from the provided PDF source documents.  The responses will be clear and helpful and will use 
        language that is easy to understand. Responses will include examples and potential scenarios.  If the answer is not avaiable in
        the PDF source documents, the response will be "I do not have information related to that specific scenario, please seek guidance
        from a qualified expert." """
    )
    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", qa_system_prompt),
            MessagesPlaceholder("retrieved_documents"),
            ("human", "{context}"),
        ]
    )
    
    # Combine the documents and use the LLM to generate an answer
    stuff_documents_chain = create_stuff_documents_chain(llm, qa_prompt)
    
    # Create the complete retrieval chain
    crc = create_retrieval_chain(history_aware_retriever, stuff_documents_chain)

    return crc

In [12]:
test = initialize_createrc(vector_store)

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'


In [17]:
def test_crc():
    # Assuming vector_store is already set up and available
    vector_store = get_vector_store()
    
    # Initialize the retrieval chain
    crc = initialize_createrc(vector_store)
    
    # List of test questions
    test_questions = [
        "How does ASC 606 interact with ASC 842?",
        "What are the main components of a lease under ASC 842?",
        "Can you explain the lease modification accounting for a lessee?"
    ]
    
    # Empty chat history at the start
    chat_history = []

    # Process each question
    for question in test_questions:
        print(f"Question: {question}")
        
        # Simulate the `run` method of crc, depending on its correct method signature
        # You might need to adapt this call depending on how your `crc.run` expects arguments
        response = crc.invoke({
            'input': {
                'question': question,
                'chat_history': chat_history
            }
        })
        
        # Print the response and update chat history
        print("Response:", response)
        chat_history.append((question, response))
        print("\n")  # Newline for better readability between questions

if __name__ == "__main__":
    test_crc()


DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\mered\\anaconda3\\Library\\ssl\\cacert.pem'


Question: How does ASC 606 interact with ASC 842?


TypeError: expected string or buffer