In [26]:
# !pip install langchain
# !pip install pypdf
# !pip install pinecone-client
# !pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.4.0-cp310-cp310-macosx_10_9_x86_64.whl (797 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m798.0/798.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting regex>=2022.1.18
  Downloading regex-2023.5.5-cp310-cp310-macosx_10_9_x86_64.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.4/294.4 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: regex, tiktoken
Successfully installed regex-2023.5.5 tiktoken-0.4.0


In [3]:
# PDF Loaders. If unstructured gives you a hard time, try PyPDFLoader
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

In [7]:
loader = PyPDFLoader("Downloads/naval.pdf")


In [8]:
data = loader.load()

In [9]:
# Note: If you're using PyPDFLoader then it will split by page for you already
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[30].page_content)} characters in your document')

You have 242 document(s) in your data
There are 677 characters in your document


In [10]:
# Note: If you're using PyPDFLoader then we'll be splitting for the 2nd time.
# This is optional, test out on your own data.

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

In [11]:
print (f'Now you have {len(texts)} documents')


Now you have 231 documents


In [21]:

# Check to see if there is an environment variable with you API keys, if not, use what you put below
OPENAI_API_KEY = ''

PINECONE_API_KEY = ''
PINECONE_API_ENV = 'northamerica-northeast1-gcp'

In [22]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

In [23]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [24]:
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = "book" # put in the name of your pinecone index here

In [27]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

In [28]:
query = "how to create wealth?"
docs = docsearch.similarity_search(query)

In [29]:
docs

[Document(page_content='PART I\nWEALTH\nHow to get rich without getting lucky.', metadata={}),
 Document(page_content='BUILDING  WEALTH ·  31How to Get Rich (Without Getting Lucky):\n↓\nSeek wealth, not money or status. Wealth is having assets \nthat earn while you sleep. Money is how we transfer time \nand wealth. Status is your place in the social hierarchy.\n↓\nUnderstand ethical wealth creation is possible. If you secretly \ndespise wealth, it will elude you.\n↓\nIgnore people playing status games. They gain status by \nattacking people playing wealth creation games.\n↓\nYou’re not going to get rich renting out your time. You must \nown equity—a piece of a business—to gain your financial \nfreedom.\n↓\nYou will get rich by giving society what it wants but does not \nyet know how to get. At scale.\n↓', metadata={}),
 Document(page_content='38 · THE ALMANACK OF NAVAL RAVIKANTIf you’re looking toward the long-term goal of getting wealthy, \nyou should ask yourself, “Is this authentic 

In [30]:
# Here's an example of the first document that was returned
print(docs[0].page_content[:450])

PART I
WEALTH
How to get rich without getting lucky.


In [31]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

In [55]:
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

In [58]:
query = "keey points to start a business ?"
docs = docsearch.similarity_search(query)

In [59]:

chain.run(input_documents=docs, question=query)

' The key points to start a business are to have a clear idea of what you want to do, create a business plan, secure funding, and build a team. Additionally, it is important to have a good understanding of the market and the competition, as well as a plan for marketing and sales.'