
#Ask Questions to your Document

In this example we are demonstrating how to ask questions to a PDF file.
The PDF file is assumed to have been uploaded to the DBFS via a SFTP connection and stored in your user workspace in some folder.


# 1. Step: Set-up a cluster
<p>
- Go to the menu item Compute<p>
- Create a new cluster with the 'Create Compute' button<p>
- Choose a use case you access to (cf. Policy)<p>
- Choose Multi node<p>
- Choose Access mode 'No Isolation shared'<p>
- Pick the latest databricks runtime<p>
- Choose as worker type: g5.4xlarge with: Min workers:2, Max workers: 8<p>
- Choose Driver type: g5.4xlarge<p>
- Enable autoscaling<p>
- Pick a Terminate time, e.g. 240 min.<p>


# 2. Step: Install all libraries
<p>
<em>Comment: Execution takes about 1,5 minute</em>

In [0]:
# -----------------------------------------------
# 2. Loading all relevant libraries
# -----------------------------------------------
%pip install funcy
%pip install huggingface_hub
%pip install InstructorEmbedding
%pip install langchain
%pip install chromadb
%pip install openpyxl
%pip install python-docx
%pip install sentence-transformers
%pip install tiktoken
%pip install torch
%pip install pypdf
%pip install xformers
%pip install langchainhub
%pip install llama-cpp-python
%pip install accelerate


# Step 3: Set-up the environment

In [0]:
# all the function definitions
import os
import openai
import pandas as pd

import json
from langchain.llms import AzureOpenAI
from langchain.chat_models import AzureChatOpenAI
from langchain.schema import HumanMessage, SystemMessage
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings.huggingface import HuggingFaceInstructEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain.document_loaders import PyPDFLoader
from functools import partial
from funcy import lmap
from typing import Tuple, Callable
from typing import Any

import torch
import transformers

In [0]:
# --------------------------------------------------------------
# Providing the access token to Azure OpenAI
# This only works if you have access to the respective use cases
# --------------------------------------------------------------
os.environ["OPENAI_API_KEY"] = dbutils.secrets.get(scope="llm-usecases", key="AZURE_TOKEN")
os.environ["OPENAI_API_VERSION"] = "2022-12-01"

openai.api_key = os.environ["OPENAI_API_KEY"]
openai.api_type = "azure"
openai.api_version = "2022-12-01"
openai.api_base = "https://rg-rbi-aa-aitest-semantic-vectordb.openai.azure.com/"

openAI_text_llm = AzureOpenAI(deployment_name="model-text-davinci-003", temperature=0)

# -----------------------------------------------------------------
# Load OpenAI Chat model
# -----------------------------------------------------------------
version = "2023-07-01-preview"

os.environ["OPENAI_API_KEY"] = dbutils.secrets.get(scope="llm-usecases", key="AZURE_TOKEN")
os.environ["OPENAI_API_VERSION"] = version
os.environ["OPENAI_API_BASE"] = "https://rg-rbi-aa-aitest-semantic-vectordb.openai.azure.com/"

openai.api_key = os.environ["OPENAI_API_KEY"]
openai.api_type = "azure"
openai.api_version = version
openai.api_base = "https://rg-rbi-aa-aitest-semantic-vectordb.openai.azure.com/"

openAI_chat_llm = AzureChatOpenAI(deployment_name="model-gpt-35-turbo", temperature=0)

In [0]:
# -------------------------------------------------------------------------------------
# First Register with your company email to Huggingface (free of charge) https://huggingface.co/
# Second you get an access token via the section: user profile/edit profile/Access Token
# Third register for Llama2 (https://ai.meta.com/resources/models-and-libraries/llama-downloads/). It is free of charge but does require registration. Make sure that you use the same email address as you used for Huggingface

from getpass import getpass
from huggingface_hub import login

login(token=getpass("Huggingface Token:"))

##Step 4 - Load required components

In [0]:
# --------------------------------------------------------------
# Initialized and load llama-2-7b-chat-hf
# Note: You might also use llama-2-13b-chat-hf, but experiment first with the smaller model
# Note: Not recommended! 70b variant does work on a 4xA10G GPU but only in 8bit, which is rather slow.
# ---------------------------------------------------------------

model = "meta-llama/Llama-2-7b-chat-hf"
#model = "meta-llama/Llama-2-13b-chat-hf"

# Start with the Autotokenizer, but you might also try other tokenizers
tokenizer = transformers.AutoTokenizer.from_pretrained(model)

# Start with a rather simple pipeline, and then become more sophisticated
llama_chat = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
    temperature=0.05,
    max_new_tokens=400,
    #trust_remote_code=True
)

In [0]:
# -----------------------------------------------------
# Load InstructXL embeddings used for OpenAI GPT models
# -----------------------------------------------------
instruct_embeddings = HuggingFaceInstructEmbeddings(
    query_instruction="Represent the query for retrieval: ", 
    model_name="hkunlp/instructor-xl"
)

##Chatting with your model

In [0]:
# -----------
# Llama2 Set-up
# -----------
question_template="Please answer the following question as diligently as possible: {question}"

def llama_chat_completion(question: str, pipeline: transformers.Pipeline, **pipeline_kwargs: dict[str, Any]) -> list[str]:
    query = question_template.format(question=question)
    sequences = pipeline(query, **pipeline_kwargs)
    return [s["generated_text"] for s in sequences]


llama_chat_completion = partial(llama_chat_completion, 
    pipeline=llama_chat,
    do_sample=True,
    num_return_sequences=1,
    temperature=0.05,
    top_k=5,
    eos_token_id=tokenizer.eos_token_id,
    return_full_text=False,
    max_new_tokens=200
)

In [0]:
# Put here your question
question ="You are a market reasearch anaylsts. Plrase project the consumer price index to the years 2024 and 2025?"

# Calculate the answer with Llama2
answer = llama_chat_completion(question)[0]
print(f"Question: {question}. \n Answer: {answer}")

In [0]:
# -------------------------
# Chat with GPT 3.5
# -------------------------
SystemPrompt = "Please answer the following question as diligently as possible."
UserPrompt = "You are a market reasearch anaylsts. Plrase give me the numbers for projecting the consumer price index to the years 2024 and 2025?"

response = openAI_chat_llm([
    SystemMessage(content=SystemPrompt),
    HumanMessage(content=UserPrompt)
])

answer = response.content

print(f"Question: {UserPrompt}. \n Answer: {answer}")


# Ask your Document

In [0]:
# ------------------------------------------------------------
# GPT 3.5 / Load your PDF to the indicated path and ask questions to it
# ------------------------------------------------------------
#selected_file_path = "/Workspace/Users/david.eschwe@rbinternational.com/PDFs/Transformer.pdf"
selected_file_path = "/Workspace/Users/david.eschwe@rbinternational.com/PDFs/RBI Group Risk Manual.pdf"

loader = PyPDFLoader(selected_file_path)

document = loader.load()
documents_content = '\n'.join(page.page_content for page in document)

len(documents_content)

nb_characters = 400

print(f"First {nb_characters} Characters of the Paper: \n{documents_content[:nb_characters]} ...")
print(f"Lenght of Document: {len(documents_content)}")

from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 500,
    chunk_overlap  = 200,
    length_function = len
)

doc_chunks = text_splitter.split_text(documents_content)

print(f"# Chunks in Document: {len(doc_chunks)}")

vector_db = Chroma.from_texts(doc_chunks, instruct_embeddings)
chain = load_qa_chain(openAI_text_llm, chain_type="stuff")

def ask(question:str):
    retriever = vector_db.as_retriever()
    docs = retriever.get_relevant_documents(question)
    answer = chain.run(input_documents=docs, question=question).strip()
    return answer

In [0]:
question = "What is a transformer"
answer = ask(question)

print(f"GPT3.5 // Question: {question}\n\nAnswer: {answer}")

In [0]:
# ------------------------------------------------------------
# Llama2 / Load your PDF to the indicated path and ask questions to it
# ------------------------------------------------------------
from langchain.llms import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain

llm = HuggingFacePipeline(pipeline=llama_chat)
retriever = vector_db.as_retriever()

chain = load_qa_chain(llm, chain_type="stuff")

def query(question, verbose=False):
    retriever = vector_db.as_retriever()
    docs = retriever.get_relevant_documents(question)
    return chain.run(input_documents=docs, question=question).strip()

In [0]:
question = "What is an LRG?"

answer = query(question)

print(f"Llama2 // Question: {question}\n\nAnswer: {answer}")