In [1]:
# !pip install --upgrade langchain deeplake openai tiktoken pinecone-client ipywidgets

In [2]:
import os
from dotenv import load_dotenv

dotenv_path = '../secrets.env'
load_dotenv(dotenv_path)

from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import  Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

  from tqdm.autonotebook import tqdm


### Load data

In [3]:
with open("../documents/romeo_and_juliet.txt", encoding='utf-8') as f:
    romeo_and_juliet = f.read()

with open("../documents/the_great_gatsby.txt", encoding='utf-8') as f:
    the_great_gatsby = f.read()
    
data = [Document(page_content=romeo_and_juliet), Document(page_content=the_great_gatsby)]

In [4]:
print (f'You have {len(data)} document(s) in your data')

length = 0
for i in range(len(data)):
    length += len(data[i].page_content)
print (f'There are {length} characters in your document')

You have 2 document(s) in your data
There are 413047 characters in your document


### Chunk up data

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

In [6]:
print (f'Now you have {len(texts)} documents')

Now you have 507 documents


### Create embedding of the documents

In [7]:
embeddings = OpenAIEmbeddings()

In [8]:
pinecone.init(
    api_key=os.environ.get("PINECONE_API_KEY"),  # find at app.pinecone.io
    environment=os.environ.get("PINECONE_API_ENV")  # next to api key in console
)
index_name = "langchain"

In [9]:
# docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)
docsearch = Pinecone.from_existing_index(index_name=index_name, embeddings)

In [10]:
query = "How did Juliet die?"
docs = docsearch.similarity_search(query, include_metadata=True)

In [11]:
docs

[Document(page_content='NURSE.\nAh sir, ah sir, death’s the end of all.\n\nROMEO.\nSpakest thou of Juliet? How is it with her?\nDoth not she think me an old murderer,\nNow I have stain’d the childhood of our joy\nWith blood remov’d but little from her own?\nWhere is she? And how doth she? And what says\nMy conceal’d lady to our cancell’d love?\n\nNURSE.\nO, she says nothing, sir, but weeps and weeps;\nAnd now falls on her bed, and then starts up,\nAnd Tybalt calls, and then on Romeo cries,\nAnd then down falls again.\n\nROMEO.\nAs if that name,\nShot from the deadly level of a gun,\nDid murder her, as that name’s cursed hand\nMurder’d her kinsman. O, tell me, Friar, tell me,\nIn what vile part of this anatomy\nDoth my name lodge? Tell me, that I may sack\nThe hateful mansion.\n\n [_Drawing his sword._]', metadata={}),
 Document(page_content='FRIAR LAWRENCE.\nI will be brief, for my short date of breath\nIs not so long as is a tedious tale.\nRomeo, there dead, was husband to that Juliet

### Query docs (from pinecone) to get answer

In [12]:
llm = OpenAI(temperature=0, openai_api_key=os.environ.get("OPENAI_API_KEY"))
chain = load_qa_chain(llm, chain_type="stuff")

In [13]:
query = "How did Juliet die?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' Juliet died after taking a sleeping potion given to her by Friar Lawrence in order to avoid marrying Paris.'

In [14]:
query = "Who is gatsby?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' Gatsby is a wealthy man who is suspected of being a bootlegger and a relative of Von Hindenburg. He is also a friend of the late Dan Cody.'

In [17]:
query = "Can you please summarize Romeo and Juliet?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' Romeo and Juliet is a tragedy by William Shakespeare about two young lovers from feuding families, Romeo Montague and Juliet Capulet, who fall in love and secretly marry. Despite the obstacles in their way, they are determined to be together, but their love is ultimately doomed. The play ends with both Romeo and Juliet taking their own lives.'

In [16]:
query = "Can you please summarize The Great Gatsby?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

" No, I don't know enough about The Great Gatsby to summarize it."