# Project: Question-Answering on Private Documents

In [18]:
import os
import openai

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [12]:
# pip install pypdf -q
# pip install docx2txt -q
# pip install wikipedia -q

In [4]:
openai.api_key = os.environ['OPENAI_API_KEY']
openai.api_base = os.environ['OPENAI_API_BASE']
openai.api_type = os.environ['OPENAI_API_TYPE']
openai.api_version = "2023-06-01-preview" # # this version is required for annotations

model = os.environ['CHAT_MODEL_NAME']
embedding_model = os.environ['EMBEDDING_MODEL_NAME']

### Loading Documents

In [5]:
def load_document(file):
    import os
    name, extension = os.path.splitext(file)

    
    if extension == '.pdf':
        from langchain.document_loaders import PyPDFLoader
        print(f'Loading {file}')
        loader = PyPDFLoader(file)
    elif extension == '.docx':
        from langchain.document_loaders import Docx2txtLoader
        print(f'Loading {file}')
        loader = Docx2txtLoader(file)
    else:
        print('Document format is not supported!')
        return None

    data = loader.load()
    return data


# wikipedia
def load_from_wikipedia(query, lang='en', load_max_docs=2):
    from langchain.document_loaders import WikipediaLoader
    loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs)
    data = loader.load()
    return data
  

### Chunking Data

In [6]:
def chunk_data(data, chunk_size=256):
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    chunks = text_splitter.split_documents(data)
    return chunks
    

### Calculating Cost

In [7]:
def print_embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-ada-002')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total Tokens: {total_tokens}')
    print(f'Embedding Cost in USD: {total_tokens / 1000 * 0.0004:.6f}')

### Embedding and Uploading to a Vector Database (Pinecone)

In [28]:
def insert_or_fetch_embeddings(index_name, chunks):
    import pinecone
    from langchain.vectorstores import Pinecone
    from langchain.embeddings.openai import OpenAIEmbeddings
    
    embeddings = OpenAIEmbeddings(chunk_size=1)
    
    pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'), environment=os.environ.get('PINECONE_ENV'))
    
    if index_name in pinecone.list_indexes():
        print(f'Index {index_name} already exists. Loading embeddings ... ', end='')
        vector_store = Pinecone.from_existing_index(index_name, embeddings)
        print('Ok')
    else:
        print(f'Creating index {index_name} and embeddings ...', end='')
        pinecone.create_index(index_name, dimension=1536, metric='cosine')
        vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
        print('Ok')
        
    return vector_store
    

In [19]:
def delete_pinecone_index(index_name='all'):
    import pinecone
    pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'), environment=os.environ.get('PINECONE_ENV'))
    
    if index_name == 'all':
        indexes = pinecone.list_indexes()
        print('Deleting all indexes ... ')
        for index in indexes:
            pinecone.delete_index(index)
        print('Ok')
    else:
        print(f'Deleting index {index_name} ...', end='')
        pinecone.delete_index(index_name)
        print('Ok')
    

### Asking and Getting Answers

In [33]:
def ask_without_memory(vector_store, q):
    from langchain.chains import RetrievalQA
    from langchain.chat_models import AzureChatOpenAI

    chat = AzureChatOpenAI(temperature=0,
        max_tokens=500,
        openai_api_base=openai.api_base,
        openai_api_version=openai.api_version,
        deployment_name=model,
        openai_api_key=openai.api_key,
        openai_api_type = openai.api_type    
    )

    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})

    chain = RetrievalQA.from_chain_type(llm=chat, chain_type="stuff", retriever=retriever)
    
    answer = chain.run(q)
    return answer
    
    
def ask_with_memory(vector_store, question, chat_history=[]):
    from langchain.chains import ConversationalRetrievalChain
    from langchain.chat_models import AzureChatOpenAI
    
    chat = AzureChatOpenAI(temperature=0,
        max_tokens=500,
        openai_api_base=openai.api_base,
        openai_api_version=openai.api_version,
        deployment_name=model,
        openai_api_key=openai.api_key,
        openai_api_type = openai.api_type    
    )
    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})
    
    crc = ConversationalRetrievalChain.from_llm(chat, retriever)
    result = crc({'question': question, 'chat_history': chat_history})
    chat_history.append((question, result['answer']))
    
    return result, chat_history
    

### Running Code

In [14]:
data = load_document('files/us_constitution.pdf')
# print(data[1].page_content)
# print(data[10].metadata)

print(f'You have {len(data)} pages in your data')
print(f'There are {len(data[20].page_content)} characters in the page')

Loading files/us_constitution.pdf
You have 41 pages in your data
There are 1137 characters in the page


In [12]:
# data = load_document('files/the_great_gatsby.docx')
# len(data)

Loading files/the_great_gatsby.docx


1

In [13]:
# data = load_from_wikipedia('GPT-4')
# len(data)

2

In [15]:
chunks = chunk_data(data)
print(len(chunks))
# print(chunks[10].page_content)

190


In [16]:
print_embedding_cost(chunks)

Total Tokens: 16711
Embedding Cost in USD: 0.006684


In [29]:
delete_pinecone_index()

Deleting all indexes ... 
Ok


In [30]:
index_name = 'askadocument'
vector_store = insert_or_fetch_embeddings(index_name, chunks)

Creating index askadocument and embeddings ...Ok


In [34]:
q = 'How many amendments are in the US constitution?'
answer = ask_without_memory(vector_store, q)
print(answer)

There are currently 27 amendments in the US Constitution.


In [37]:
chat_history = []
question = 'How many amendments are in the US constitution?'
result, chat_history = ask_with_memory(vector_store, question, chat_history=chat_history)
print(result['answer'])
print(f"\nChat History: {chat_history}\n")

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


There are currently 27 amendments in the US Constitution.

Chat History: [('How many amendments are in the US constitution?', 'There are currently 27 amendments in the US Constitution.')]



In [38]:
question = 'Multiply that number by 2'
result, chat_history = ask_with_memory(vector_store, question, chat_history=chat_history)
print(result['answer'])
print(f"\nChat History: {chat_history}\n")


The result of multiplying the number of amendments in the US Constitution by 2 is not provided in the given context. The US Constitution has 27 amendments, but there is no information on multiplying it by 2.
[('How many amendments are in the US constitution?', 'There are currently 27 amendments in the US Constitution.'), ('Multiply that number by 2', 'The result of multiplying the number of amendments in the US Constitution by 2 is not provided in the given context. The US Constitution has 27 amendments, but there is no information on multiplying it by 2.')]


In [None]:
import time
i = 1
print('Write Quit or Exit to quit.')
while True:
    q = input(f'Question #{i}: ')
    i = i + 1
    if q.lower() in ['quit', 'exit']:
        print('Quitting ... bye bye!')
        time.sleep(2)
        break
    
    answer = ask_without_memory(vector_store, q)
    print(f'\nAnswer: {answer}')
    print(f'\n {"-" * 50} \n')

In [None]:
import time
i = 1
chat_history = []
print('Write Quit or Exit to quit.')
while True:
    q = input(f'Question #{i}: ')
    i = i + 1
    if q.lower() in ['quit', 'exit']:
        print('Quitting ... bye bye!')
        time.sleep(2)
        break
    
    answer, chat_history = ask_with_memory(vector_store, q, chat_history=chat_history)
    print(f'\nAnswer: {answer}')
    print(f'\n {"-" * 50} \n')