In [None]:
!pip install langchain --upgrade

In [None]:
OPENAI_API_KEY = 'OPENAI_API_KEY'
PINECONE_API_KEY = 'PINECONE_API_KEY'
PINECONE_API_ENV = 'PINECONE_API_ENV'

In [None]:
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
!pip install unstructured

In [None]:
loader = UnstructuredPDFLoader("/content/parsing-the-turing-test-2009.pdf")

In [None]:
data = loader.load()
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your document')

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)
print (f'Now you have {len(texts)} documents')

In [None]:
!pip install -qU pinecone-client

In [None]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

In [None]:
!pip install openai

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [None]:
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = "langchain1" # put in the name of your pinecone index here

In [None]:
!pip install tiktoken

In [None]:
import tiktoken

In [None]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

In [None]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
query = "what did turing said about machine thinking"
docs = docsearch.similarity_search(query, include_metadata=True)

In [None]:
chain.run(input_documents=docs, question=query)

' Turing said that he would not address the question of whether machines can think because he believed it to be too meaningless to deserve discussion, but he predicted that by the end of the century, people would be able to speak of machines thinking without expecting to be contradicted.'

In [None]:
query = "How Mark Humphrys Program Passed the Turing Test (explain in detail)"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

" Mark Humphrys' program passed the Turing Test by exhibiting the various errorful but predictable phenomena that humans fall prey to, such as forgetfulness, slowness, innumeracy, being misled by the way queries are phrased, and being misled by the introduction of red herring choices. The program was designed to be as intelligent as possible so that it could serve as an intelligence amplifier for human beings, not as a replacement for them."