In [None]:
#@title Install Dependencies
!pip install langchain
!pip install sentence_transformers
!pip install xformers
!pip install bitsandbytes accelerate transformers
!pip install faiss-gpu
!pip install accelerate
!pip install pymupdf

In [None]:
#@title Import Libraries
import io
import fitz
import torch
import textwrap
from google.colab import files
from huggingface_hub import login
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from transformers import pipeline
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema.document import Document
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")


In [3]:
#@title Important functions
def get_text(filename):

    """
    Load pdf and extract the text

    Parameters
    ----------
    Filename : str
        Path of the file uploaded

    Returns
    -------
    text : str
        Extracted text form the pdf provided

    """

    doc = fitz.open(filename)
    text_list = []
    for page in doc:
        text = page.get_text()
        text_list.append(text)
    text = ' '.join(text_list)
    return text

def authenticate():

    """
    Using the Read and write token provided, allows to access the model from huggingface.

    """

    #authentication token
    access_token_read = "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"
    access_token_write = "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"
    login(token = access_token_read)

def get_db(text):

    """
    Creates a vector database from the provided string

    Parameters
    ----------
    text : str
        String of characters

    Returns
    -------
    vectordb
        Vector database of the string provided

    """

    embeddings = HuggingFaceEmbeddings()
    text_splitter = CharacterTextSplitter(separator="",chunk_size=1000, chunk_overlap=100)
    docs = [Document(page_content=x) for x in text_splitter.split_text(text)]
    vectordb = FAISS.from_documents(docs, embeddings)
    return vectordb

def initialize_llm_pipeline():

    """
    Load and initiallize the LLAMA2-7B model and tokenizer, and create a pipeline using the same.

    """

    # Add quantization parameters
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )

    # Load Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        "NousResearch/Llama-2-7b-chat-hf",
        use_auth_token=True
    )

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        "NousResearch/Llama-2-7b-chat-hf",
        device_map = 'auto',
        offload_folder="save_folder",
        torch_dtype=torch.float32,
        use_auth_token=True,
        quantization_config=quantization_config,
    )

    # Create a pipeline using model and tokenizer
    pipe = pipeline("text-generation",
        model=model,
        tokenizer= tokenizer,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        max_new_tokens = 512,
        do_sample=True,
        top_k=30,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id
    )

    llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0.1})


    return llm

def create_retrieval_chain(llm, vectordb):

    """
    Create a retrieval chain using the pipeline and vector database

    Parameters
    ----------
    llm :
        pipeline create from LLM model.

    vectordb :
        Vector database


    Returns
    -------
    qa_chain :
        A question and answering retreival chain created using LLM pipeline and vector database.

    """

    # Prompt formate required for LLAMA2
    SYSTEM_PROMPT = """<<SYS>>\n Use the following pieces of context to answer the question at the end.
    If you don't know the answer, just say that you don't know, don't try to make up an answer. \n<</SYS>>\n\n"""

    INSTRUCTION = """
    {context}

    Question: {question}
    """

    # Prompt Template for the prompts
    TEMPLATE = "[INST]" + SYSTEM_PROMPT + INSTRUCTION + "[/INST]"
    prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])

    # Create a retrieval chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectordb.as_retriever(search_type="similarity", search_kwargs={"k":3}),
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
    )

    return qa_chain

In [None]:
#@title Upload the file and Initialize models and vector database

# Upload the file
uploaded = files.upload()

# Get the filename
filename = list(uploaded.keys())[0]

# Get the text from PDF
text = get_text(filename)

# Authentication to access model from huggingface
authenticate()

# Store the text info in vector database using hugging face embeddings model
vectordb = get_db(text)

# Get the model and tokenizers in a pipeline
llm_pipeline = initialize_llm_pipeline()

# Get the Retreival Chain
QA_chain = create_retrieval_chain(llm_pipeline, vectordb)

In [None]:
#@title Enter Queries
while 1:
    query = str(input("Enter 'END' to exit the program\nEnter your question! - "))
    if query == 'END':
        break
    result = QA_chain({"query": query})
    print(f"Answer - {textwrap.fill(result['result'], width=150)}\n")