<a href="https://colab.research.google.com/github/preetamjumech/LLM/blob/main/RAG_using_Zephyr_7B_Beta_Chromadb_23_10_2024_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install chainlit
!pip install ctransformers
!pip install torch
!pip install sentence_transformers
!pip install chromadb
!pip install langchain
!pip install pypdf
!pip install PyPDF2
!pip install langchain_chroma
!pip install langchain-text-splitters
!pip install langchain_community
!pip install langchain_core
!pip install gradio

In [None]:
#Download the model "zephyr-7b-beta.Q5_K_S.gguf"

In [None]:
#create a folder called stores to store the embeddings in the current directory

In [None]:
#store embeddings in your local directory, run python thisblockcode1.py
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.document_loaders import PyPDFLoader

model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

loader = PyPDFLoader("pet.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(documents)

vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={"hnsw:space": "cosine"}, persist_directory="stores/pet_cosine")

print("Vector Store Created.......")

In [None]:
#python run thisblockcode2.py
from langchain_core.prompts import PromptTemplate #LLMChain
from langchain_community.llms import CTransformers
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA

from io import BytesIO
import gradio as gr


local_llm = "zephyr-7b-beta.Q5_K_S.gguf"

config = {
'max_new_tokens': 1024,
'repetition_penalty': 1.1,
'temperature': 0.1,
'top_k': 50,
'top_p': 0.9,
'stream': True,
'threads': int(os.cpu_count() / 2)
}

llm = CTransformers(
    model=local_llm,
    model_type="mistral",
    lib="avx2", #for CPU use
    **config
)

print("LLM Initialized...")


prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)


prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
load_vector_store = Chroma(persist_directory="stores/pet_cosine", embedding_function=embeddings)
retriever = load_vector_store.as_retriever(search_kwargs={"k":1})

print("######################################################################")

chain_type_kwargs = {"prompt": prompt}


sample_prompts = ["what is the fastest speed for a greyhound dog?", "Why should we not feed chocolates to the dogs?", "Name two factors which might contribute to why some dogs might get scared?"]

def get_response(input):
  query = input
  chain_type_kwargs = {"prompt": prompt}
  qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)
  response = qa(query)
  return response

input = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )

iface = gr.Interface(fn=get_response,
             inputs=input,
             outputs="text",
             title="My Dog PetCare Bot",
             description="This is a RAG implementation based on Zephyr 7B Beta LLM.",
             examples=sample_prompts,
             allow_flagging=False
             )

iface.launch()