### **Question Answering (QA) System**

### Installation of libraries

In [None]:
!pip install -U transformers accelerate langchain einops xformers bitsandbytes weaviate-client sentence_transformers gdown

In [None]:
!pip install -U unstructured pdf2image pdfminer pdfminer.six opencv-python-headless

In [None]:
!pip install -U git+https://github.com/huggingface/peft.git

### Import libraries

In [None]:
from getpass import getpass
from os import environ
from pprint import pprint
import gdown
from huggingface_hub import notebook_login

from langchain.chains import LLMChain, RetrievalQA
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Weaviate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    BitsAndBytesConfig,
    StoppingCriteria,
    StoppingCriteriaList
)

from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)

import torch
from torch import (
    cuda,
    bfloat16
)

### Load HuggingFace Token

In [None]:
notebook_login()

### Upload files

In [None]:
prefix = "https://drive.google.com/uc?export=download&id="

In [None]:
print("Before typing in the URL, please ensure that the sharing access to the PDF file is changed to 'Anyone with the link'.")
pdf_url = input("Please type in the URL of the PDF file which is saved on Google Drive and you would like to ask questions about: ")

In [None]:
pdf_id = pdf_url.split("/")[-2]

In [None]:
pdf_file = gdown.download(prefix + pdf_id)

### Information Extraction from documents

In [None]:
pdf_data = UnstructuredPDFLoader(pdf_file)

In [None]:
load_pdf_data = pdf_data.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=20)
texts = text_splitter.split_documents(load_pdf_data)

In [None]:
texts[12]

In [None]:
texts[11]

### Create and store embeddings in vector database

In [None]:
embeddings_model = "sentence-transformers/all-MiniLM-L6-v2"
encode_kwargs = {"normalize_embeddings": True}
create_embeddings = HuggingFaceEmbeddings(
    model_name=embeddings_model,
    encode_kwargs=encode_kwargs
)

In [None]:
WEAVIATE_URL = getpass("WEAVIATE_URL:")

In [None]:
environ["WEAVIATE_API_KEY"] = getpass("WEAVIATE_API_KEY:")

In [None]:
vector_db = Weaviate.from_documents(texts, create_embeddings, weaviate_url=WEAVIATE_URL, by_text=False)

### Input user's question

In [None]:
question = input("Please type your question: ").strip()

### Loading trained model

In [None]:
device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=bfloat16
)

In [None]:
peft_model = "aiknight87/llama-2-7b-hf-300d"

config = PeftConfig.from_pretrained(peft_model)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
model = PeftModel.from_pretrained(model, peft_model)

In [None]:
stop_token_ids = [
    tokenizer.convert_tokens_to_ids(x) for x in [
        ["User", ":"], ["Assistant", ":"]
    ]
]

print(stop_token_ids)

In [None]:
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
print(stop_token_ids)

In [None]:
class StopOnTokens(StoppingCriteria):
  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
    for stop_ids in stop_token_ids:
      if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
        return True
    return False

In [None]:
stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [None]:
generate_text = pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    task="text-generation",
    device_map="auto",
    do_sample=True,
    temperature=1,
    top_k=3,
    top_p=0.2,
    max_new_tokens=512,
    repetition_penalty=1.2,
    stopping_criteria=stopping_criteria
)

In [None]:
llm = HuggingFacePipeline(pipeline=generate_text)

### Standard Prompting

In [None]:
prompt_template = """
You are an AI assistant and you are responsible to answer questions asked by users.

You will have to carry out proper reasonings based on the context given by users and provide a final answer to a question asked by users.

You should not include the section(s) of the context in your final answer such as "According to Section 82(2)," unless mentioned.

However, if you are not sure about the answer to the question, please do not make up an answer and state "I do not know the answer".

User:
Context - {context}
Based on this given context, please answer my question below.
Question - {question}

Assistant:
""".strip()

In [None]:
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

### Inference

In [None]:
chain_type_kwargs = {"prompt": prompt}
search_kwargs = {"k": 2}

In [None]:
qa_llm_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_db.as_retriever(search_kwargs=search_kwargs), chain_type_kwargs=chain_type_kwargs)

In [None]:
output = qa_llm_chain(question)

In [None]:
print(output["result"])