In [None]:
import os
import getpass

# Set the environment variables from user input
PINECONE_API_KEY = getpass.getpass('Pinecone API Key:')
PINECONE_ENVIRONMENT = getpass.getpass("Pinecone API Environment:")
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')

In [48]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
import pinecone

# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENVIRONMENT  # next to api key in console
)

# initialize openai embeddings index
index_name = "pdf-algorithms-text-book"

In [38]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

# configure llm
llm = OpenAI(temperature=0, openai_api_key=os.environ['OPENAI_API_KEY'])
# load the qa chain
chain = load_qa_chain(llm, chain_type='map_reduce')

In [39]:
# initialize the embeddings and pinecone index
embeddings = OpenAIEmbeddings()
docsearch = Pinecone.from_existing_index(index_name, embeddings)

In [None]:
query = "What is big o notation? Format response as json object."

# similarity search from pinecone index
docs = docsearch.similarity_search(query)

In [46]:
# run the chain
response = chain.run(input_documents=docs, question=query)
response

' {\n  "text": "Big O notation is a way of expressing the complexity of an algorithm, which captures how the algorithm scales as the input size grows. It is a very loose analog of f because of the constant c, which allows us to disregard what happens for small values of n. It is used to compare algorithms, and captures the superiority of one algorithm over another by expressing it as f2 = O(f1)."\n}'