In [1]:
from langchain.document_loaders import PyPDFLoader

loader= PyPDFLoader("./data/01. Alice's Adventures in Wonderland author Lewis Caroll.pdf")
documents = loader.load()

In [2]:
documents

[Document(page_content='BYLEWIS CARROLL ILLUSTRATED BY JOHN TENNIELNAVIGATEALICE ’S\nAdventures in W onderland\nCONTROL\nCLOSE THE BOOK\nTURN THE PAGEThe world’s\nmost precise\nreplica\nof the world’s \nmost famous\nchildren’s book!\nIn 1998, Peter Zelchenko\nbegan a project for Volume-One Publishing: to create anexact digital replica of LewisCarroll’s ﬁrst edition of Alice.\nWorking with the original1865 edition and numerousother editions at the NewberryLibrary in Chicago, Zelchenkocreated a digital masterpiece inhis own right, a testament tothe original work of LewisCarroll (aka Prof. CharlesDodgson) who personallydirected the typography for theﬁrst Alice.\nAfter much analyis, Peter then\npainstakingly matched letter toletter, line to line, of his newdigital edition to that of theoriginal. After weeks of toil hecreated an exact replica of theoriginal! The book was addedto VolumeOne’s print-on-demand offering. While a PDFversion is offered on variousportals of the Net, BookVirtualtook

In [3]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

chain = load_qa_chain(llm=OpenAI(), chain_type="stuff")
query = "Give me a summary of Alice in Wonderland. Provide an analysis of the writing style."

chain.run(input_documents=documents, question=query)

BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 4097 tokens, however you requested 44417 tokens (44161 in your prompt; 256 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}

# This is expected! In the above, we're trying to push the whole document through the reader

In [7]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

chain = load_qa_chain(llm=OpenAI(), chain_type="refine")
query =  "What is the climax of the short story?"
chain.run(input_documents=documents, question=query)

KeyboardInterrupt: 

# RetrievalQA

In [19]:
from langchain.chains import RetrievalQA
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

loader = PyPDFLoader("./data/01. Alice's Adventures in Wonderland author Lewis Caroll.pdf")
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts= text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

db = Chroma.from_documents(texts, embeddings)

retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":5})

qa = RetrievalQA.from_chain_type(
    llm = OpenAI(),
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents=True
)

query = "Who did Alice meet first after she fell down the hole?"

result = qa({"query": query})

In [20]:
result

{'query': 'Who did Alice meet first after she fell down the hole?',
 'result': ' Alice did not meet anyone after she fell down the hole. She was alone until she encountered the White Rabbit once she reached the bottom of the hole.',
 'source_documents': [Document(page_content='Navigate Control Internetburning with curiosity, she ran across the ﬁeld\nafter it, and was just in time to see it popdown a large rabbit-hole under the hedge.\nIn another moment down went Alice after\nit, never once considering how in the worldshe was to get out again.\nThe rabbit-hole went straight on like a\ntunnel for some way, and then dipped suddenlydown, so suddenly that Alice had not a momentto think about stopping herself before she foundherself falling down what seemed to be a verydeep well.\nEither the well was very deep, or she fell\nvery slowly, for she had plenty of time as shewent down to look about her, and to wonderwhat was going to happen next. First, she triedto look down and make out what she 

In [21]:
query = "Give me all the characters in the short story Alice in Wonderland along with the number of times each is mentioned."
result = qa({"query": query})

In [22]:
result

{'query': 'Give me all the characters in the short story Alice in Wonderland along with the number of times each is mentioned.',
 'result': ' The characters mentioned in the story include Alice (mentioned multiple times), the Knave (mentioned once), the King (mentioned multiple times), the Queen (mentioned multiple times), and the White Rabbit (mentioned multiple times).',
 'source_documents': [Document(page_content='Navigate Control Internet\nDigital Interface by BookVirtual Corp. U.S. Patent Pending. © 2000 All Rights Reserved.Fit Page Full Screen On/Off Close Book\nThus grew the tale of Wonderland :\nThus slowly, one by one,\nIts quaint events were hammered out—\nAnd now the tale is done,\nAnd home we steer, a merry crew,\nBeneath the setting sun.\nAlice! a childish story take,\nAnd with a gentle hand\nLay it where Childhood’s dreams are twined\nIn Memory’s mystic band,\nLike pilgrim’s withered wreath of ﬂowers\nPlucked in a far-off land.Imperious Prima ﬂashes forth\nHer edict ‘to b

# Hyperparameters:

- Embedding types
- Text Splitters
- Retrievers


# Conversational LLMs

In [24]:
from langchain.chains import ConversationalRetrievalChain

qa = ConversationalRetrievalChain.from_llm(OpenAI(), retriever)

In [25]:
chat_history = []

query = "Who is the antagonist in this story?"

result = qa({"question": query, "chat_history": chat_history})

In [26]:
result["answer"]

' The antagonist in this story is the Mouse, who hates cats and dogs and causes trouble for the other characters. '

In [27]:
chat_history = [(query, result["answer"])]
query = "What kind of trouble does the mouse bring to the other characters?"
result = qa({"question": query, "chat_history": chat_history})

In [28]:
chat_history

[('Who is the antagonist in this story?',
  ' The antagonist in this story is the Mouse, who hates cats and dogs and causes trouble for the other characters. ')]

In [29]:
result["answer"]

' The Mouse causes trouble by swimming away from Alice and making a commotion in the pool as it goes.'