In [2]:
import torch
device=torch.device("cuda:2")

### Step1: Import Laibraries

In [4]:
# Install Pytorch & other libraries
! pip install "torch==2.1.2" tensorboard

# Install Hugging Face libraries
! pip install  --upgrade \
  "transformers==4.38.2" \
  "datasets==2.16.1" \
  "accelerate==0.26.1" \
  "evaluate==0.4.1" \
  "bitsandbytes==0.42.0" \
  "trl==0.7.11" \
  "peft==0.8.2" \
    "langchain" \
"sentence-transformers" \
"faiss-cpu"
! pip install unstructured
! pip install pdfminer
! pip install pdfminer.six
! pip install -U langchain-community==0.2.4
! pip install -U langchain-huggingface

[31mERROR: Could not find a version that satisfies the requirement torch==2.1.2 (from versions: 2.2.0, 2.2.1, 2.2.2, 2.3.0, 2.3.1)[0m[31m
[0m[31mERROR: No matching distribution found for torch==2.1.2[0m[31m
Collecting langchain-community==0.2.4
  Using cached langchain_community-0.2.4-py3-none-any.whl.metadata (2.4 kB)
Using cached langchain_community-0.2.4-py3-none-any.whl (2.2 MB)
Installing collected packages: langchain-community
  Attempting uninstall: langchain-community
    Found existing installation: langchain-community 0.2.6
    Uninstalling langchain-community-0.2.6:
      Successfully uninstalled langchain-community-0.2.6
Successfully installed langchain-community-0.2.4


### Step2: Import Model and Tokenizer

In [6]:
import torch
from IPython.display import display_markdown
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import pipeline
import transformers
import time
from langchain.document_loaders import UnstructuredPDFLoader,PDFMinerLoader,TextLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
 

# Hugging Face model id
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
hf_token = "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"   # get authontication permisson from haggingface

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,token=hf_token,
    model_kwargs={
        "torch_dtype": torch.float16,
        "quantization_config": {"load_in_4bit": True},
        "low_cpu_mem_usage": True,
    },
    device_map="cuda:2"
)

terminators =  [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
    

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.44s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### Step3: FAISS (Building a vector storage) 

In [7]:
from langchain_community.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
import faiss
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
# Initialize an empty FAISS index
dimension = embeddings.client.get_sentence_embedding_dimension()
index = faiss.IndexFlatL2(dimension)

docstore = InMemoryDocstore()

### for semantic cache
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=docstore,
    index_to_docstore_id={}
)

  warn_deprecated(


### Step4 : Load and Split DataSet 

In [8]:
### Pdf file Path for RAG
pdf_file_path = "/home/praveent/End-To-End-Madical-Chatbot-NLP-Project-5/data/Medical_book.pdf"

In [9]:
### this class used to retrieve the text from pdf and chunk it 
class Langchain_RAG:
    def __init__(self, pdf_file_path):
        self.embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
        self.pdf_file_path = pdf_file_path
        print("Loading PDF file, this may take time to process...")
        self.loader = PDFMinerLoader(self.pdf_file_path)
        self.data = self.loader.load()
        print("PDF file loaded.")
        print("Chunking...")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separators=[" ", ",", "\n"])
        self.texts = text_splitter.split_documents(self.data)
        print("Chunking completed.")
        self.get_vec_value = FAISS.from_documents(self.texts, self.embeddings)
        print("Vector values saved.")
        self.retriever = self.get_vec_value.as_retriever(search_kwargs={"k": 4})

    def __call__(self, query):
        relevant_docs = self.retriever.get_relevant_documents(query)
        return "".join([doc.page_content for doc in relevant_docs])

### Step5: Start Training

In [10]:
import time

# This class is used to generate responses from an LLM model
class Llama3_8B_gen:
    def __init__(self, pipeline, embeddings, vector_store, threshold):
        self.pipeline = pipeline
        self.embeddings = embeddings
        self.vector_store = vector_store
        self.threshold = threshold
        
    @staticmethod
    def generate_prompt(query,retrieved_text):    # Generate Prompt of my query data and recevied text
        messages = [
            {"role": "system", "content": "Answer the Question for the Given below context and information and not prior knowledge, only give the output result \n\ncontext:\n\n{}".format(retrieved_text) },
            {"role": "user", "content": query},]
        return pipeline.tokenizer.apply_chat_template(messages, tokenize=False,add_generation_prompt=True)
    
    def semantic_cache(self, query, prompt):                               # findout Similarity between query vectore and vectore_store
        query_embedding = self.embeddings.embed_documents([query])
        similar_docs = self.vector_store.similarity_search_with_score_by_vector(query_embedding[0], k=1) 
        
        if similar_docs and similar_docs[0][1] <self.threshold:            # if similarity less than my set thresold then return response from my cache data
            self.print_bold_underline("---->> From Cache")                 # otherwise generate text from pipeline and data to vectore_store
            return similar_docs[0][0].metadata['response']
        else:
            self.print_bold_underline("---->> From LLM")
            output = self.pipeline(prompt, max_new_tokens=512, eos_token_id=terminators, do_sample=True, temperature=0.7, top_p=0.9)
            
            response = output[0]["generated_text"][len(prompt):]
            self.vector_store.add_texts(texts = [query], 
                       metadatas = [{'response': response},])
            
            return response
            
    def generate(self, query, retrieved_context):
        start_time = time.time()                                          
        
        prompt = self.generate_prompt(query, retrieved_context)           # Generate prompt
        res = self.semantic_cache(query, prompt)                          # lokking Similarity and findout respose according to similarity from pipeline or verctore_sore(cache)
        
        end_time = time.time()
        execution_time = end_time - start_time                            # Claculate the running Time
        self.print_bold_underline(f"LLM generated in {execution_time:.6f} seconds")
        
        return res

    @staticmethod
    def print_bold_underline(text):
        print(f"\033[1m\033[4m{text}\033[0m")

In [11]:
text_gen = Llama3_8B_gen(pipeline=pipeline,embeddings=embeddings,
                         vector_store=vector_store,threshold=0.1)
retriever = Langchain_RAG(pdf_file_path=pdf_file_path)

Loading PDF file, this may take time to process...
PDF file loaded.
Chunking...
Chunking completed.
Vector values saved.


In [14]:
def Rag_qa(query):
    retriever_context = retriever(query)
    result = text_gen.generate(query,retriever_context)
    return result
Rag_qa("What are Allergies")

[1m[4m---->> From Cache[0m
[1m[4mLLM generated in 0.009066 seconds[0m


'An allergy is a type of immune reaction. Normally, the immune system responds to foreign microorganisms or particles by producing specific proteins called antibodies. These antibodies are capable of binding to identifying molecules, or antigens, on the foreign particle. This reaction between antibody and antigen sets off a series of chemical reactions designed to protect the body from infection.'

In [15]:
while True:
    user_input_query=input(f"Input Prompt:")
    print("user_input_query : ", user_input_query)
    if user_input_query == "Exit":
        break
    result=Rag_qa(user_input_query)
    print("Chatbot_Response : ", result)

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


user_input_query :  what is Acne??
[1m[4m---->> From LLM[0m
[1m[4mLLM generated in 4.189719 seconds[0m
Chatbot_Response :  According to the provided context, Acne is a disease that affects the skin, characterized by the formation of comedones (blackheads and whiteheads), papules, pustules, nodules, and cysts. It is caused by a combination of factors, including hormonal changes, genetics, diet, stress, and personal hygiene.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


user_input_query :  who is Praveen
[1m[4m---->> From LLM[0m
[1m[4mLLM generated in 4.743698 seconds[0m
Chatbot_Response :  There is no mention of a person named Praveen in the provided context. The text discusses the concepts of Ayurveda, doshas, and the effects of imbalance on the human body, as well as a brief description of Acute Poststreptococcal Glomerulonephritis (APSGN). There is no mention of a person named Praveen.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


user_input_query :  Tell me more about Ayurvedic Medicine
[1m[4m---->> From LLM[0m
[1m[4mLLM generated in 26.185494 seconds[0m
Chatbot_Response :  Ayurvedic medicine is an ancient system of traditional medicine that originated in India over 3,000-5,000 years ago. The term "Ayurveda" comes from the Sanskrit words "ayur" meaning "life" and "veda" meaning "science" or "knowledge". Ayurvedic medicine is based on the idea that the body is composed of three fundamental energies or doshas: Vata, Pitta, and Kapha. Each dosha is associated with specific characteristics, such as elements, qualities, and functions.

Ayurvedic medicine emphasizes the importance of maintaining balance and harmony in the body's systems. It is believed that an imbalance of the doshas can lead to various health problems. Ayurvedic practitioners use a variety of techniques, including herbal remedies, diet, yoga, meditation, and other therapies, to restore balance and promote overall well-being.

Some key principl