<a href="https://colab.research.google.com/github/sainiakhil/RAG-Chat-Bot-Streamlit/blob/main/RAG_Chat_Bot_in_Streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install faiss-gpu
!pip install streamlit
!pip install PyPDF2
!pip install pyngrok
!pip install bitsandbytes
!pip install sentence-transformers
!pip install transformers
!pip install huggingface_hub

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [4]:
from pyngrok import ngrok
import os

In [6]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [7]:
# Set your ngrok authentication token
ngrok.set_auth_token("token")

In [8]:
%%writefile app.py
import streamlit as st
import torch
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
import PyPDF2


device = torch.device("cuda:0")

bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,
    llm_int8_has_fp16_weight=False,
)

llm_model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-2-7b-chat-hf",
        device_map = device,
        quantization_config = bnb_config,
        torch_dtype=torch.float16,
        low_cpu_mem_usage=True,
    )

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Initialize the embedding model
embedding_model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')


# FAISS index and response map global variables
index = None
response_map = {}

st.title("Interactive RAG-Based QA Bot")
st.write("Upload documents and ask questions in real time!")

uploaded_files = st.file_uploader("Upload Documents", accept_multiple_files=True)


if uploaded_files:
  documents = []
  for uploaded_file in uploaded_files:

    pdf_reader = PyPDF2.PdfReader(uploaded_file)
    pdf_text = ""

    # Extract text from each page
    for page_num in range(len(pdf_reader.pages)):
      page = pdf_reader.pages[page_num]
      pdf_text += page.extract_text()

    # Split the PDF content into chunks for better embedding
    documents = [pdf_text[i:i+1000] for i in range(0, len(pdf_text), 1000)]


    # Generate embeddings for uploaded documents
    vectors = embedding_model.encode(documents)
    vectors = np.array(vectors, dtype=np.float32)

    # Initialize FAISS index
    dimension = vectors.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(vectors))

    response_map = {i: documents[i] for i in range(len(documents))}

    st.success("Documents uploaded and indexed successfully!")


# Step 2: Query Input and Processing
query = st.text_input("Enter your question")

if st.button("Get Answer") and query and index is not None:

    query_vector = embedding_model.encode([query])
    query_vector = np.array(query_vector, dtype=np.float32)

      # Perform FAISS search
    k = 3  # Number of nearest neighbors to retrieve
    distances, indices = index.search(np.array(query_vector), k)

    if indices is not None and len(indices) > 0:
        retrieved_doc = response_map[indices[0][0]]

        augmented_input = query + "\n\n Retrieved Information:\n" + retrieved_doc + "\n\n Final Answer: \n"
        inputs = tokenizer(augmented_input, return_tensors="pt", truncation=True, max_length=500)

        output = llm_model.generate(**inputs, max_new_tokens=500, temperature=0.5, top_p=0.85)
        response = tokenizer.decode(output[0], skip_special_tokens=True)


          # Display the result
        st.write("Answer:")
        st.write(response)
    else:
      st.error("No relevant document found.")

Overwriting app.py


In [None]:
# Start ngrok to expose the Streamlit app to the public
public_url = ngrok.connect(addr='8501', proto = 'http',bind_tls = True)
print(f'Streamlit app will be live at: {public_url}')

In [None]:
# Run Streamlit app
!streamlit run app.py