In [5]:
import os
from dotenv import load_dotenv

load_dotenv(verbose=True)

True

In [2]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("us_constitution.pdf")
pages = loader.load_and_split()
print(pages[0].__dict__)

{'page_content': 'The\nUnited\nStates\nConstitution\nW e\nthe\nPeople\nof\nthe\nUnited\nStates,\nin\nOrder\nto\nform\na\nmore\nperfect\nUnion,\nestablish\nJustice,\ninsure\ndomestic\nT ranquility ,\nprovide\nfor\nthe\ncommon\ndefence,\npromote\nthe\ngeneral\nW elfare,\nand\nsecure\nthe\nBlessings\nof\nLiberty\nto\nourselves\nand\nour\nPosterity ,\ndo\nordain\nand\nestablish\nthis\nConstitution\nfor\nthe\nUnited\nStates\nof\nAmerica.\nThe\nConstitutional\nCon v ention\nArticle\nI\nSection\n1:\nCongress\nAll\nlegislative\nPowers\nherein\ngranted\nshall\nbe\nvested\nin\na\nCongress\nof\nthe\nUnited\nStates,\nwhich\nshall\nconsist\nof\na\nSenate\nand\nHouse\nof\nRepresentatives.\nSection\n2:\nThe\nHouse\nof\nRepresentatives', 'metadata': {'source': 'us_constitution.pdf', 'page': 0}}


In [7]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
import os
import pinecone

embeddings = OpenAIEmbeddings()
pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),  # find at app.pinecone.io
    environment=os.getenv("PINECONE_ENV"),  # next to api key in console
)

index_name = "langchain-demo"

# First, check if our index already exists. If it doesn't, we create it
if index_name not in pinecone.list_indexes():
    # we create a new index
    pinecone.create_index(
        name=index_name,
        metric='cosine',
        dimension=1536
    )
vectorstore = Pinecone.from_documents(
    documents=pages,
    embedding=embeddings,
    index_name=index_name
)

In [10]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

# expose the index in a retriever interface.
retriever = vectorstore.as_retriever()

# create a chain and use it to answer questions!
chain = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff", 
    retriever=retriever
)

query = 'Could you give the most important keypoints in the document?'
chain.run(query)

' The Seventh Amendment preserves the right to trial by jury in cases where the value in controversy exceeds twenty dollars. The Eighth Amendment prohibits excessive bail, fines, and cruel and unusual punishments. The Ninth Amendment states that the enumeration of certain rights in the Constitution should not be construed to deny or disparage other rights retained by the people. The Tenth Amendment states that powers not delegated to the federal government are reserved to the states or to the people.'

In [None]:
from langchain.indexes import VectorstoreIndexCreator

"""
VectorstoreIndexCreator is a wrapper
What is returned from the VectorstoreIndexCreator is VectorStoreIndexWrapper
which provides these nice query and query_with_sources functionality.
"""
index = VectorstoreIndexCreator().from_loaders([loader])
query = "What did the president say about Ketanji Brown Jackson"
index.query(query)
