In [None]:
# Importing necessary modules and classes
import dotenv
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI
from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ChatMessageHistory
from typing import Dict
from langchain_core.runnables import RunnablePassthrough, RunnableBranch
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import Docx2txtLoader
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# Set environment variable from a .env file
dotenv.load_dotenv()

# Initialize the ChatOpenAI model
chat = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.0)
# llm = OpenAI(temperature=0.0)

# Load and split PDF document into pages
loader = PyPDFLoader("docs/QMproceedings.pdf")
pages = loader.load_and_split()

# Split text into chunks with specified size and overlap
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
all_splits = text_splitter.split_documents(pages)

# Create a Chroma vector store from the document splits
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

# Create a retriever with a specified number of chunks to retrieve
retriever = vectorstore.as_retriever(search_type="mmr",search_kwargs={"k": 3, "fetch_k": 6})
# compressor = LLMChainExtractor.from_llm(llm)
# retriever = ContextualCompressionRetriever(
#     base_compressor=compressor,
#     base_retriever=vectorstore.as_retriever(search_type="mmr")
# )

# Define a chat prompt template for question answering
question_answering_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Use the following pieces of context to answer the user's question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer:\n\n{context}"),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

# Create a document chain for handling the retrieval and answer process
document_chain = create_stuff_documents_chain(chat, question_answering_prompt)

# Function to parse the last message from retrieval input
def parse_retriever_input(params: Dict):
    return params["messages"][-1].content

# Define the retrieval chain with context parsing and document chain
retrieval_chain = RunnablePassthrough.assign(
    context=parse_retriever_input | retriever,
).assign(
    answer=document_chain,
)

# Define a prompt for transforming search queries
query_transform_prompt = ChatPromptTemplate.from_messages(
    [
        MessagesPlaceholder(variable_name="messages"),
        (
            "user",
            "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else.",
        ),
    ]
)

# Define a chain for transforming queries and retrieving documents
query_transforming_retriever_chain = RunnableBranch(
    (
        lambda x: len(x.get("messages", [])) == 1,
        (lambda x: x["messages"][-1].content) | retriever,
    ),
    query_transform_prompt | chat | StrOutputParser() | retriever,
).with_config(run_name="chat_retriever_chain")

# Define the final conversational retrieval chain
conversational_retrieval_chain = RunnablePassthrough.assign(
    context=query_transforming_retriever_chain,
).assign(
    answer=document_chain,
)


In [None]:
# Initialize a chat history and perform conversation retrieval
chat_history = ChatMessageHistory()
chat_history.add_user_message("What measurements are repoted in the conference proceedings?")
response = conversational_retrieval_chain.invoke(
    {"messages": chat_history.messages},
)
print(response["answer"])
chat_history.add_ai_message(response["answer"])

In [None]:
# Continue the conversation with another query
chat_history.add_user_message("Which models agree best with the results?")
response = conversational_retrieval_chain.invoke(
    {"messages": chat_history.messages},
)
print(response["answer"])
chat_history.add_ai_message(response["answer"])