In [14]:
"""
source: https://github.com/sophiamyang/tutorials-LangChain/blob/main/LangChain_QA.ipynb
source: https://www.youtube.com/watch?v=DXmiJKrQIvg&t=1s&ab_channel=SophiaYang
"""
None

In [1]:
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI

In [8]:
chat_history = []

# load document
loader = PyPDFLoader("../data/WizardLM.pdf")
documents = loader.load()

# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
texts = text_splitter.split_documents(documents)

# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)

# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})

# create a chain to answer questions
qa = ConversationalRetrievalChain.from_llm(OpenAI(), retriever)

In [9]:
query = (
    "What the paper: "
    "WizardLM: Empowering Large Language Models to Follow Complex Instructions "
    "talks about?"
)
result = qa({"question": query, "chat_history": chat_history})
result

{'question': 'What the paper: WizardLM: Empowering Large Language Models to Follow Complex Instructions talks about?',
 'chat_history': [],
 'answer': ' This paper discusses a method for creating large amounts of instruction data with varying levels of complexity using LLM instead of humans. It also evaluates the performance of the resulting model, WizardLM, against human-created instructions and other LLMs.'}

In [13]:
query = "What are the related works to the WizardLM paper?"
result = qa({"question": query, "chat_history": chat_history})
result

{'question': 'What are the related works to the WizardLM paper?',
 'chat_history': [],
 'answer': ' The related works to the WizardLM paper are the automatic GPT-4 and human evaluation methods.'}