<a href="https://colab.research.google.com/github/manalipatel9/Project_3_AIML/blob/katia/KM_Project3Draft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Import Libraries

!pip install gradio
!pip install faiss-cpu
!pip install python-dotenv
!pip install nltk
!pip install transformers
!pip install sentence-transformers

import os
import gradio as gr
from typing import List, Union
import faiss
import numpy as np
import nltk
from nltk.tokenize import sent_tokenize
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
import shutil
import time

nltk.download('punkt_tab')

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [15]:
# RAG application
class RAGApplication:
  def __init__(self, persist_directory: str = 'chroma_db'):
    self.persist_directory = persist_directory
    self.llm = GPT2LMHeadModel.from_pretrained('gpt2')
    self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    self.sentence_transformer = SentenceTransformer('all-MiniLM-L6-v2')
    self.memory = []
    self.vectorstore = None
    self.chain = None
    self.current_collection = None

    os.makedirs(self.persist_directory, exist_ok = True)

  def initialize_chain(self):
    if self.vectorstore is None:
      raise ValueError('Please add a document')

  def process_file(self, file_path: str):
    try:
      collection_name = f"file_{os.path.basename(file_path)}_{int(time.time())}"
      # Corrected indentation here for the following two lines
      self.vectorstore, document_sentences = self.process_document(file_path, collection_name)
      self.current_collection = collection_name
      self.initialize_chain()
      return f"Successfully processed file: {file_path}"
    except Exception as e:
      return f"Error processing file: {str(e)}"

  def process_document(self, file_path: str, collection_name: str):
    with open(file_path, 'r') as f:
      document_text= f.read()

      document_sentences = sent_tokenize(document_text)
      document_embeddings = self.model.encode(document_sentences)
      document_embeddings = np.array(document_embeddings, dtype=np.float32)

      index = faiss.IndexFlatL2(document_embeddings.shape[1])
      index.add(document_embeddings)

      return index, document_sentences

  def retrieve_relevant_documents (self, question: str):
    if slef.vectorstore is None:
      return 'Please add a document'

    question_embedding = self.model.encode([question])
    question_embedding = np.array(question_embedding, dtype=np.float32)
    D, I = self.vectorstore.search(question_embedding, k=3)
    relevant_sentences = [self.document_sentences[i] for i in I[0]]

    return relevant_sentences

  def generate_answer(self, question: str, relevant_sentences: List[str]) -> str:
        context = ' '.join(relevant_sentences)
        input_text = f'Question: {question}\nContext: {context}\nAnswer:'

        inputs = self.tokenizer.encode(input_text, return_tensors = 'pt')
        outputs = self.llm.generate(inputs, max_length=200, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=self.tokenizer.eos_token_id)
        answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return answer

  def query(self, question: str) -> str:
        if self.vectorstore is None:
            return "Please load some documents first!"

        relevant_sentences = self.retrieve_relevant_document(question)
        return self.generate_answer(question, relevant_sentences)

  def clear_memory_and_storage(self) -> str:
        try:
            self.memory = []
            self.vectorstore = None
            self.chain = None
            if self.current_collection:
                collection_path = os.path.join(self.persist_directory, self.current_collection)
                if os.path.exists(collection_path):
                    shutil.rmtree(collection_path)
                self.current_collection = None
            return "Successfully cleared current documents and chat history"
        except Exception as e:
            return f"Error clearing data: {str(e)}"

  def create_gradio_interface(self):
    rag_app = RAGApplication()

    def process_input(input_type: str, input_value: Union[str, List[str]]) -> str:
        if input_type == "file":
            return rag_app.process_file(input_value)
        elif input_type == 'url':
            return rag_app.process_url(input_value)
        return "Invalid input type"

    def chat(message: str, history: List[List[str]]) -> str:
        return rag_app.query(message)

    with gr.Blocks() as demo:
        gr.Markdown("# RAG Chatbot Application")

        with gr.Tab("Load Data"):
            input_type = gr.Radio(
                choices=["file"],
                label="Input Type",
                value="file"
            )

            input_value = gr.Textbox(
                label="Input (File Path)",
                lines=2
            )

            process_btn = gr.Button("Process Input")
            clear_btn = gr.Button("Clear All Data", variant="secondary")
            process_output = gr.Textbox(label="Processing Result")

            process_btn.click(
                fn=process_input,
                inputs=[input_type, input_value],
                outputs=process_output
            )

            clear_btn.click(
                fn=lambda: rag_app.clear_memory_and_storage(),
                inputs=[],
                outputs=process_output
            )

        with gr.Tab("Chat"):
            chatbot = gr.ChatInterface(
                fn=chat,
                title="Chat with your documents",
                description="Ask questions about the loaded documents"
            )

    return demo

  # Run the Gradio Interface
if __name__ == "__main__":
    load_dotenv()

    OPENAI_API_KEY = 'AIzaSyBH_hYuEEs51taBel5_Q7ZBnIMRg03w2tI'
    if not OPENAI_API_KEY:
        raise ValueError("Please set the OPENAI_API_KEY environment variable")

# Create an instance of the RAGApplication class
    rag_app_instance = RAGApplication()

    # Launch the Gradio interface
    demo = rag_app_instance.create_gradio_interface()
    demo.launch(share=True, server_name="0.0.0.0")



Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://32a45c43afce2f553b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
