In [1]:
import streamlit as st
from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from transformers import LlamaForCausalLM, LlamaTokenizer, AutoModelForCausalLM, AutoTokenizer
from pyngrok import ngrok
import torch

# Constants
FDA_DOCUMENTS_PATH = "/content/FDA_documents"
VECTOR_DB_PATH = "/content/Vector_db_dir"

# Load and process documents
@st.cache_resource
def load_and_process_documents():
    try:
        loader = DirectoryLoader(
            path=FDA_DOCUMENTS_PATH,
            glob="*.txt",
            loader_cls=UnstructuredFileLoader
        )
        documents = loader.load()

        text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
        text_chunks = text_splitter.split_documents(documents)

        embeddings = HuggingFaceEmbeddings()

        vectordb = Chroma.from_documents(
            documents=text_chunks,
            embedding=embeddings,
            persist_directory=VECTOR_DB_PATH
        )

        return vectordb
    except Exception as e:
        st.error(f"Error loading documents: {str(e)}")
        return None
# Load model directly

# Initialize LLaMA-2 model
@st.cache_resource
def initialize_model():
    try:
        model_name = "meta-llama/Llama-2-7b-chat-hf"
        model = AutoModelForCausalLM.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        return model, tokenizer
    except Exception as e:
        st.error(f"Error initializing LLaMA model: {str(e)}")
        return None, None

from transformers import AutoModel, AutoTokenizer

# # Initialize LLaMA model from TheBloke
# @st.cache_resource
# def initialize_model():
#     try:
#         model_name = "TheBloke/Llama-2-7B-Chat-GGUF"
#         model = AutoModel.from_pretrained(model_name)
#         tokenizer = AutoTokenizer.from_pretrained(model_name)
#         return model, tokenizer
#     except Exception as e:
#         st.error(f"Error initializing LLaMA model: {str(e)}")
#         return None, None

# # Initialize GPT-J model
# @st.cache_resource
# def initialize_model():
#     try:
#         model_name = "EleutherAI/gpt-j-6b"
#         model = AutoModelForCausalLM.from_pretrained(model_name)
#         tokenizer = AutoTokenizer.from_pretrained(model_name)
#         return model, tokenizer
#     except Exception as e:
#         st.error(f"Error initializing GPT-J model: {str(e)}")
#         return None, None


# Generate answer using LLaMA
def generate_answer(query, retrieved_docs, model, tokenizer):
    context = " ".join(retrieved_docs)  # Combine retrieved docs
    prompt = f"Answer the following Question: {query}\n Given the following extracted \nContext: {context}\n\nAnswer:"

    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=500)

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Streamlit app
def main():
    st.title("FDA Document Q&A System")

    vectordb = load_and_process_documents()
    if vectordb is None:
        st.error("Failed to load documents. Please check your document path and try again.")
        return

    model, tokenizer = initialize_model()
    if model is None or tokenizer is None:
        st.error("Failed to initialize LLaMA model. Please check your setup and try again.")
        return

    # Set up the conversational chain
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=model,
        retriever=vectordb.as_retriever(),
        memory=memory
    )

    # Chat interface
    st.sidebar.header("Chat History")
    if "messages" not in st.session_state:
        st.session_state.messages = []

    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    if prompt := st.chat_input("Ask a question about FDA documents"):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        with st.chat_message("assistant"):
            try:
                retrieved_docs = vectordb.as_retriever().retrieve(prompt)
                response = generate_answer(prompt, retrieved_docs, model, tokenizer)
                st.markdown(response)
                st.session_state.messages.append({"role": "assistant", "content": response})
            except Exception as e:
                error_message = f"An error occurred: {str(e)}"
                st.error(error_message)
                st.session_state.messages.append({"role": "assistant", "content": error_message})

    st.sidebar.markdown("\n".join([f"**{m['role']}**: {m['content']}" for m in st.session_state.messages]))

if __name__ == "__main__":
    main()

# Run the Streamlit app
!streamlit run app.py &>/content/logs.txt &
!ngrok config add-authtoken "2mZfx0emkhYFNKedEax3pjieseD_6GA53MUZBosZJGn2YZ1ST"

# Set up ngrok tunnel
public_url = ngrok.connect(8501)

print(f"Public URL: {public_url}")


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Public URL: NgrokTunnel: "https://c4ea-34-16-226-247.ngrok-free.app" -> "http://localhost:8501"


In [2]:
!pkill ngrok



In [1]:
#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
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: fineGrained).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in yo

In [2]:
!pip install langchain chromadb llama-cpp-python streamlit streamlit-chat

Collecting langchain
  Downloading langchain-0.3.1-py3-none-any.whl.metadata (7.1 kB)
Collecting chromadb
  Downloading chromadb-0.5.11-py3-none-any.whl.metadata (6.8 kB)
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.1.tar.gz (63.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.9/63.9 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting streamlit-chat
  Downloading streamlit_chat-0.1.1-py3-none-any.whl.metadata (4.2 kB)
Collecting langchain-core<0.4.0,>=0.3.6 (from langchain)
  Downloading langchain_core-0.3.8-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain)
  Downloading

In [3]:
!pip install transformers sentence-transformers chromadb


Collecting sentence-transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.1.1


In [4]:
!pip install langchain beautifulsoup4 chromadb -q

In [5]:
!pip install pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [6]:
!pip install streamlit==1.38.0 langchain_community==0.2.16 langchain-text-splitters==0.2.4 langchain-chroma==0.1.3 langchain-huggingface==0.0.3 langchain-groq==0.1.9 unstructured==0.15.0 nltk==3.8.1 unstructured[pdf]==0.15.0

Collecting streamlit==1.38.0
  Downloading streamlit-1.38.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting langchain_community==0.2.16
  Downloading langchain_community-0.2.16-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-text-splitters==0.2.4
  Downloading langchain_text_splitters-0.2.4-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-chroma==0.1.3
  Downloading langchain_chroma-0.1.3-py3-none-any.whl.metadata (1.5 kB)
Collecting langchain-huggingface==0.0.3
  Downloading langchain_huggingface-0.0.3-py3-none-any.whl.metadata (1.2 kB)
Collecting langchain-groq==0.1.9
  Downloading langchain_groq-0.1.9-py3-none-any.whl.metadata (2.9 kB)
Collecting unstructured==0.15.0
  Downloading unstructured-0.15.0-py3-none-any.whl.metadata (29 kB)
Collecting watchdog<5,>=2.1.5 (from streamlit==1.38.0)
  Downloading watchdog-4.0.2-py3-none-manylinux2014_x86_64.whl.metadata (38 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community==0.2.16)
  Downloading dataclasses_

In [1]:
!pip install bitsandbytes==0.41.1 sentence_transformers==2.2.2

Collecting bitsandbytes==0.41.1
  Downloading bitsandbytes-0.41.1-py3-none-any.whl.metadata (9.8 kB)
Collecting sentence_transformers==2.2.2
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: sentence_transformers
  Building wheel for sentence_transformers (setup.py) ... [?25l[?25hdone
  Created wheel for sentence_transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125924 sha256=cf49e61d4a70c7f64aaedb21815369040f5646a08315ef922f685b4ea27987ab
  Stored

In [2]:
!pip install -U bitsandbytes accelerate


Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
  Attempting uninstall: bitsandbytes
    Found existing installation: bitsandbytes 0.41.1
    Uninstalling bitsandbytes-0.41.1:
      Successfully uninstalled bitsandbytes-0.41.1
Successfully installed bitsandbytes-0.44.1


In [1]:
import requests
from bs4 import BeautifulSoup
import os

# Function to extract and save content from a given URL
def extract_and_save_content(url, file_name):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract relevant text content
    content = ""
    for p in soup.find_all('p'):
        content += p.text + "\n"

    # Create a directory to store the document if it doesn't exist
    os.makedirs("FDA_documents", exist_ok=True)

    # Save the content to a text file
    file_path = os.path.join("FDA_documents", file_name + ".txt")
    with open(file_path, "w", encoding="utf-8") as file:
        file.write(content)

    print(f"Saved content from {url} to {file_path}")

# List of individual URLs to extract from
urls = [
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.1",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.2",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.3",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.50",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.52",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.53",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.54",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.55",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.60",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.65",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.70",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.71",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.72",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.80",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.81",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.90",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.92",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.93",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.94",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.95",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.96",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.97",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.98",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.99",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.100",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.101",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.102",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.103",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.104",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.105",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.106",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.107",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.108",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.110",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.122",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.125",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.126",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.127",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.150",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.151",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.152",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.153",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.160",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.161",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.162",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.170",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.200",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.201",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.235",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.410",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.420",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.430",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.440",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.445",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.500",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.510",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.520",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.530",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.540",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.550",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.560",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.600",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.610",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.620",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.630",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.640",
    "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.650"
    # Add more URLs here
]

# Step through each URL, extract content, and save
for idx, url in enumerate(urls):
    extract_and_save_content(url, f"document_{idx+1}")

print("All documents have been saved.")


Saved content from https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.1 to FDA_documents/document_1.txt
Saved content from https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.2 to FDA_documents/document_2.txt
Saved content from https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.3 to FDA_documents/document_3.txt
Saved content from https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.50 to FDA_documents/document_4.txt
Saved content from https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.52 to FDA_documents/document_5.txt
Saved content from https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.53 to FDA_documents/document_6.txt
Saved content from https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFRSearch.cfm?fr=314.54 to FDA_documents/document_7.txt
Saved content from https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfCFR/CFR

In [2]:
from torch import cuda, bfloat16
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import torch

# Paths
FDA_DOCUMENTS_PATH = "/content/FDA_documents"
VECTOR_DB_PATH = "/content/Vector_db_dir"

# Load and process documents
def load_and_process_documents():
    loader = DirectoryLoader(
        path=FDA_DOCUMENTS_PATH,
        glob="*.txt",
        loader_cls=UnstructuredFileLoader
    )
    documents = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
    text_chunks = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings()

    vectordb = Chroma.from_documents(
        documents=text_chunks,
        embedding=embeddings,
        persist_directory=VECTOR_DB_PATH
    )

    return vectordb

# Quantization configuration using bitsandbytes
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# Initialize GPT-J or LLaMA model with quantization
def initialize_model():
    model_id = "meta-llama/Llama-2-7b-chat-hf"  # Change to LLaMA or GPT-J model ID

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bnb_config,
        device_map="auto",
    )
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    return model, tokenizer

# Main function to retrieve documents and generate response
def main():
    vectordb = load_and_process_documents()
    model, tokenizer = initialize_model()

    query = "What is the full form of ANDA?"  # Example query
    retriever = vectordb.as_retriever()

    # Updated method to invoke the retriever
    retrieved_docs = [doc.page_content for doc in retriever.invoke(query)]

    response = generate_answer(query, retrieved_docs, model, tokenizer)
    print("Response:", response)

# Generate answer using the LLM with max_new_tokens adjustment
def generate_answer(query, retrieved_docs, model, tokenizer):
    context = " ".join(retrieved_docs)  # Combine retrieved docs
    prompt = f"Answer the question based on the context:\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:"

    inputs = tokenizer(prompt, return_tensors="pt")

    # Adjust max_new_tokens instead of max_length
    outputs = model.generate(**inputs, max_new_tokens=200)
        # Decode and return only the generated answer
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Post-process to extract just the part after "Answer:" if needed
    answer = answer.split("Answer:")[-1].strip()

    return answer

    # return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Run
main()



  embeddings = HuggingFaceEmbeddings()
  embeddings = HuggingFaceEmbeddings()
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


.gitattributes:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]



Response: ANDA stands for Abbreviated New Drug Application.


In [5]:
from pyngrok import ngrok


In [6]:
# import streamlit as st
# from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
# from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_community.vectorstores import Chroma
# from transformers import AutoModelForCausalLM, AutoTokenizer
# from torch import cuda, bfloat16
# import transformers
# from langchain.text_splitter import CharacterTextSplitter
# from langchain.memory import ConversationBufferMemory
# from pyngrok import ngrok


# # Constants
# FDA_DOCUMENTS_PATH = "/content/FDA_documents"
# VECTOR_DB_PATH = "/content/Vector_db_dir"

# # Load and process documents
# @st.cache_resource
# def load_and_process_documents():
#     try:
#         loader = DirectoryLoader(
#             path=FDA_DOCUMENTS_PATH,
#             glob="*.txt",
#             loader_cls=UnstructuredFileLoader
#         )
#         documents = loader.load()

#         text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
#         text_chunks = text_splitter.split_documents(documents)

#         embeddings = HuggingFaceEmbeddings()

#         vectordb = Chroma.from_documents(
#             documents=text_chunks,
#             embedding=embeddings,
#             persist_directory=VECTOR_DB_PATH
#         )

#         return vectordb
#     except Exception as e:
#         st.error(f"Error loading documents: {str(e)}")
#         return None

# # Initialize LLaMA or GPT-J model
# @st.cache_resource
# def initialize_model():
#     # Quantization configuration using bitsandbytes
#     bnb_config = transformers.BitsAndBytesConfig(
#         load_in_4bit=True,
#         bnb_4bit_quant_type='nf4',
#         bnb_4bit_use_double_quant=True,
#         bnb_4bit_compute_dtype=bfloat16
#     )

#     try:
#         model_id = "meta-llama/Llama-2-7b-chat-hf"  # You can change to GPT-J if needed
#         model = AutoModelForCausalLM.from_pretrained(
#             model_id,
#             quantization_config=bnb_config,
#             device_map="auto",
#         )
#         tokenizer = AutoTokenizer.from_pretrained(model_id)
#         return model, tokenizer
#     except Exception as e:
#         st.error(f"Error initializing LLaMA model: {str(e)}")
#         return None, None

# # # Generate answer using the LLM
# # def generate_answer(query, retrieved_docs, model, tokenizer):
# #     context = " ".join(retrieved_docs)  # Combine retrieved docs
# #     prompt = f"Answer the following Question: {query}\n Given the following extracted \nContext: {context}\n\nAnswer:"

# #     inputs = tokenizer(prompt, return_tensors="pt")
# #     outputs = model.generate(**inputs, max_new_tokens=200)

# #     return tokenizer.decode(outputs[0], skip_special_tokens=True)

# # Generate answer using the LLM
# def generate_answer(query, retrieved_docs, model, tokenizer):
#     context = " ".join(retrieved_docs)  # Combine retrieved docs into context
#     prompt = f"Answer the question based on the context:\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:"

#     # Tokenize the prompt and convert to tensor
#     inputs = tokenizer(prompt, return_tensors="pt")

#     # Generate only the answer
#     outputs = model.generate(**inputs, max_new_tokens=200)

#     # Decode and return only the generated answer
#     answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

#     # Post-process to extract just the part after "Answer:" if needed
#     answer = answer.split("Answer:")[-1].strip()

#     return answer

# # Streamlit app
# def main():
#     st.title("PANDAbot")

#     vectordb = load_and_process_documents()
#     if vectordb is None:
#         st.error("Failed to load documents. Please check your document path and try again.")
#         return

#     model, tokenizer = initialize_model()
#     if model is None or tokenizer is None:
#         st.error("Failed to initialize model. Please check your setup and try again.")
#         return

#     # Chat interface
#     st.sidebar.header("Chat History")
#     if "messages" not in st.session_state:
#         st.session_state.messages = []

#     for message in st.session_state.messages:
#         with st.chat_message(message["role"]):
#             st.markdown(message["content"])

#     # if prompt := st.chat_input("Ask a question about FDA documents"):
#     #     st.session_state.messages.append({"role": "user", "content": prompt})
#     #     with st.chat_message("user"):
#     #         st.markdown(prompt)

#     #     with st.chat_message("assistant"):
#     #         try:
#     #             retriever = vectordb.as_retriever()
#     #             retrieved_docs = [doc.page_content for doc in retriever.get_relevant_documents(prompt)]
#     #             response = generate_answer(prompt, retrieved_docs, model, tokenizer)
#     #             st.markdown(response)
#     #             st.session_state.messages.append({"role": "assistant", "content": response})
#     #         except Exception as e:
#     #             error_message = f"An error occurred: {str(e)}"
#     #             st.error(error_message)
#     #             st.session_state.messages.append({"role": "assistant", "content": error_message})

#     # st.sidebar.markdown("\n".join([f"**{m['role']}**: {m['content']}" for m in st.session_state.messages]))

#     if prompt := st.chat_input("Ask a question about FDA documents"):
#       st.session_state.messages.append({"role": "user", "content": prompt})
#       with st.chat_message("user"):
#           st.markdown(prompt)

#       with st.chat_message("assistant"):
#           try:
#               retriever = vectordb.as_retriever()
#               retrieved_docs = [doc.page_content for doc in retriever.get_relevant_documents(prompt)]
#               response = generate_answer(prompt, retrieved_docs, model, tokenizer)
#               st.markdown(response)  # Display only the final answer
#               st.session_state.messages.append({"role": "assistant", "content": response})
#           except Exception as e:
#               error_message = f"An error occurred: {str(e)}"
#               st.error(error_message)
#               st.session_state.messages.append({"role": "assistant", "content": error_message})

#       st.sidebar.markdown("\n".join([f"**{m['role']}**: {m['content']}" for m in st.session_state.messages]))

# if __name__ == "__main__":
#     main()

# Run the Streamlit app
!streamlit run app.py &>/content/logs.txt &
!ngrok config add-authtoken "2mZfx0emkhYFNKedEax3pjieseD_6GA53MUZBosZJGn2YZ1ST"

# Set up ngrok tunnel
public_url = ngrok.connect(8501)

print(f"Public URL: {public_url}")


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Public URL: NgrokTunnel: "https://426d-34-16-195-67.ngrok-free.app" -> "http://localhost:8501"


In [7]:
!pkill ngrok