<a href="https://colab.research.google.com/github/paharipratyush/intelunnati/blob/main/pdfchatbotopenvino.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***PDF Chatbot using Intel OpenVINO and RAG***

*Install Required Packages*

In [None]:
!pip install -q transformers sentence-transformers faiss-cpu PyPDF2 openvino-nightly
!pip install -q optimum[openvino]
!pip install numpy PyPDF2 sentence-transformers faiss-cpu optimum[intel] transformers nltk gradio

Collecting gradio
  Downloading gradio-4.37.2-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m50.0 MB/s[0m eta [36m0:00:00[0m
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.2 (from gradio)
  Downloading gradio_client-1.0.2-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.2/318.2 kB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

*Import libraries*

In [None]:
import numpy as np
import PyPDF2
from sentence_transformers import SentenceTransformer
import faiss
from transformers import AutoTokenizer
from optimum.intel import OVModelForCausalLM
import gc
import torch
import nltk
import gradio as gr
import tempfile
import os

  from tqdm.autonotebook import tqdm, trange
No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


*Download NLTK data*

In [None]:
nltk.download('punkt', quiet=True)

True

*Define utility functions*

In [None]:
# Function to read PDF Document

def read_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

# Function to create semantic chunks from text

def create_semantic_chunks(text, chunk_size=1000, overlap=200):
    sentences = nltk.sent_tokenize(text)
    chunks = []
    current_chunk = ""
    for sentence in sentences:
        if len(current_chunk) + len(sentence) > chunk_size:
            chunks.append(current_chunk)
            current_chunk = current_chunk[-overlap:] + sentence
        else:
            current_chunk += " " + sentence
    if current_chunk:
        chunks.append(current_chunk)
    return chunks

# Function to create vector store from text

def create_vector_store(text, chunk_size=1000, overlap=200):
    chunks = create_semantic_chunks(text, chunk_size, overlap)
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(chunks)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index, chunks, model

*Setup Chatbot*

In [None]:
def setup_chatbot(pdf_file):
    text = read_pdf(pdf_file.name)
    index, chunks, embedding_model = create_vector_store(text)

    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = OVModelForCausalLM.from_pretrained(model_name, export=True)

    return index, chunks, embedding_model, model, tokenizer

*Chatbot function*

In [None]:
def chatbot(query, index, chunks, embedding_model, llm_model, tokenizer, k=3, max_input_length=1024, max_new_tokens=512):

    # Find relevant chunks

    query_vector = embedding_model.encode([query])
    _, I = index.search(query_vector, k)
    relevant_chunks = [chunks[i] for i in I[0]]

    # Construct Prompt

    context = "\n".join(relevant_chunks)
    prompt = f"Based on the following information from a PDF, answer the question.\n\nInformation: {context}\n\nQuestion: {query}\n\nAnswer:"

    # Generate Response

    input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
    output = llm_model.generate(input_ids, max_new_tokens=max_new_tokens)
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract Answer

    answer_start = response.find("Answer:") + len("Answer:")
    return response[answer_start:].strip()

*Chatbot state management*

In [None]:
class ChatbotState:
    def __init__(self):
        self.index = None
        self.chunks = None
        self.embedding_model = None
        self.llm_model = None
        self.tokenizer = None
        self.pdf_uploaded = False

chatbot_state = ChatbotState()

*Gradio interface functions*

In [None]:
def upload_pdf(pdf_file):
    if pdf_file is None:
        return "Please upload a PDF file."

    try:
        chatbot_state.index, chatbot_state.chunks, chatbot_state.embedding_model, chatbot_state.llm_model, chatbot_state.tokenizer = setup_chatbot(pdf_file)
        chatbot_state.pdf_uploaded = True
        return "PDF uploaded and processed successfully. You can now ask questions about its content."
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

def answer_question(question):
    if not chatbot_state.pdf_uploaded:
        return "Please upload a PDF file first."

    try:
        response = chatbot(question, chatbot_state.index, chatbot_state.chunks, chatbot_state.embedding_model, chatbot_state.llm_model, chatbot_state.tokenizer)
        return response
    except Exception as e:
        return f"Error generating answer: {str(e)}"

def restart_chatbot():
    chatbot_state.index = None
    chatbot_state.chunks = None
    chatbot_state.embedding_model = None
    chatbot_state.llm_model = None
    chatbot_state.tokenizer = None
    chatbot_state.pdf_uploaded = False

    gc.collect()
    torch.cuda.empty_cache() if torch.cuda.is_available() else None

    return "Chatbot restarted. Please upload a new PDF.", None, "", ""

*Gradio interface*

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("# PDF Chatbot")
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_button = gr.Button("Upload and Process")
    upload_output = gr.Textbox(label="Status")

    with gr.Row():
        question_input = gr.Textbox(label="Ask a question about the PDF content")
        answer_button = gr.Button("Get Answer")
    answer_output = gr.Textbox(label="Answer")

    restart_button = gr.Button("Restart")

    upload_button.click(upload_pdf, inputs=[pdf_input], outputs=[upload_output])
    answer_button.click(answer_question, inputs=[question_input], outputs=[answer_output])
    restart_button.click(restart_chatbot, inputs=[], outputs=[upload_output, pdf_input, question_input, answer_output])


*Launch the Gradio interface*

In [None]:
demo.launch(share=True, debug=True)

Compiling the model to CPU ...


In [None]:
# demo.close()

Closing server running on port: 7860
