# Initializing

## Importing necessary Libraries

In [1]:
import gradio as gr
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import qdrant
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextStreamer
from langchain_community.vectorstores import Qdrant
from qdrant_client import QdrantClient
import os
import torch
import time
from langchain.embeddings import HuggingFaceInstructEmbeddings
import qdrant_client

## Loading the .env File from our directory to access API keys and endpoints

In [2]:
load_dotenv()

True

# Text Preprocessing

### Function to parse through PDF and return text

In [3]:
def get_pdf_text(pdf):
    text = ""
    pdf_reader = PdfReader(pdf)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

### Funtion to split full text into text-chunks

In [4]:
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_text(text)
    return chunks

# Creating/fetching Vector Database

### Creating a new vector database

In [5]:
def get_vector_store(chunks, user_id, QDRANT_HOST, QDRANT_API_KEY):
    embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")
    vectorstore = qdrant.Qdrant.from_texts(texts=chunks, embedding=embeddings, url=QDRANT_HOST, api_key=QDRANT_API_KEY, collection_name=user_id)
    return vectorstore

### Fetching database if it already exists

In [6]:
def fetch_vector_store(user_id):
    client = qdrant_client.QdrantClient(
        os.getenv("QDRANT_HOST"),
        api_key=os.getenv("QDRANT_API_KEY")
    )

    embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")    

    vectorstore = qdrant.Qdrant(
        client=client,
        collection_name=user_id,
        embeddings=embeddings
    )

    try:
        collection_info = client.get_collection(collection_name=user_id)
        vectors_count = collection_info.vectors_count if collection_info else 0
        # print(vectors_count)

    except:
        vectors_count=0

    print(f"Vectors count: {vectors_count}")
    
    return vectorstore if vectors_count>0 else None

# Conversation chain using Huggingface pipelines

### Creating a conversation chain

In [7]:
def get_conversation_chain(vectorstore):
    model_path = "TheBloke/Mistral-7B-Instruct-v0.1-AWQ"
    model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    
    generation_params = {
        "do_sample": True,
        "temperature": 0.7,
        "top_p": 0.95,
        "top_k": 40,
        "max_new_tokens": 1000,
        "repetition_penalty": 1.1
    }
    
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, streamer=streamer, **generation_params)
    llm = HuggingFacePipeline(pipeline=pipe)
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,        
        retriever=vectorstore.as_retriever(),
        memory=memory
    )

    return conversation_chain

# Final App Logic

### Processing PDF and generate UI Functions

In [12]:
def process_pdf_and_initialize_chat(pdf, QDRANT_HOST, QDRANT_API_KEY):
    user_id = "agoaoidg-2944agag"  # Unique identifier for the user session
    
    # Process the PDF
    raw_text = get_pdf_text(pdf)
    text_chunks = get_text_chunks(raw_text)
    
    # Create vector store
    vectorstore = fetch_vector_store(user_id)

    if not vectorstore:
        vectorstore = get_vector_store(text_chunks, user_id, QDRANT_HOST, QDRANT_API_KEY)
    
    # Initialize conversation chain
    conversation_chain = get_conversation_chain(vectorstore)
    return conversation_chain

In [9]:
def chat_with_pdf(user_question, conversation_chain):
    prompt_template = f"<s>[INST] {user_question} [/INST]"
    response = conversation_chain({'question': prompt_template})
    
    return response["answer"]


### Main Function

In [10]:
def gradio_app():
    with gr.Blocks() as demo:
        QDRANT_HOST = os.getenv("QDRANT_HOST")
        QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")

        conversation_chain = None  # Store the conversation chain

        with gr.Row():
            gr.Markdown("# Conversational AI ")

        with gr.Row():
            pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        
        start_chat_button = gr.Button("Start Chat")
        
        with gr.Row():
            user_question_input = gr.Textbox(label="Your Question")
            submit_button = gr.Button("Submit")
            ai_response = gr.Textbox(label="AI Response", interactive=False)

        def start_chat(pdf):
            nonlocal conversation_chain
            conversation_chain = process_pdf_and_initialize_chat(pdf, QDRANT_HOST, QDRANT_API_KEY)
            return "Chat initialized. You can now ask questions!"

        def handle_question(user_question):
            if conversation_chain is None:
                return "Please upload a PDF and start the chat first.", "", ""
            
            response = chat_with_pdf(user_question, conversation_chain)
            return response

        start_chat_button.click(start_chat, inputs=[pdf_input], outputs=[ai_response])
        submit_button.click(handle_question, inputs=[user_question_input], outputs=[ai_response])

    demo.launch()

### Running the application

In [None]:
gradio_app()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


load INSTRUCTOR_Transformer
max_seq_length  512
Vectors count: 0
load INSTRUCTOR_Transformer
max_seq_length  512


  warn(
2024-09-19 13:50:05,049 - accelerate.utils.modeling - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
  warn_deprecated(
2024-09-19 13:50:37,171 - httpx - INFO - HTTP Request: POST https://f314c9c2-7b86-4820-9c7f-2ca2816f6e87.europe-west3-0.gcp.cloud.qdrant.io:6333/collections/agoaoidg-2944agag/points/search "HTTP/1.1 200 OK"
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Gross Floor Area (GFA) refers to the total square footage of the interior of a building, excluding areas like elevators, stairs, mechanical and electrical rooms, and loading docks. The calculation of GFA involves adding together the area of all habitable floors in the building, including mezzanine floors, basements, and attic spaces. It also includes the area of any lobbies, corridors, and staircases.

The units used to measure GFA are typically square feet or square meters. In some cases, it may be necessary to convert from one unit to another based on the specific requirements of the project or location. The calculation of GFA should be done carefully, as it is a critical component of determining the size and cost of a building project.


Traceback (most recent call last):
  File "/home/tejasram/miniconda3/envs/langchain_pdf/lib/python3.11/site-packages/gradio/queueing.py", line 521, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tejasram/miniconda3/envs/langchain_pdf/lib/python3.11/site-packages/gradio/route_utils.py", line 276, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tejasram/miniconda3/envs/langchain_pdf/lib/python3.11/site-packages/gradio/blocks.py", line 1935, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tejasram/miniconda3/envs/langchain_pdf/lib/python3.11/site-packages/gradio/blocks.py", line 1513, in call_function
    prediction = await anyio.to_thread.run_sync(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tejasram/miniconda3/envs/langchain_pdf/lib/python3.1