In [1]:

from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFDirectoryLoader


In [2]:
load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


In [3]:
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents=file_loader.load()
    return documents


In [4]:
docs = read_doc('documents/')
type(docs)


list

In [5]:
def chunk_data(docs, chunk_size = 100, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc = text_splitter.split_documents(docs)
    return doc


In [6]:
document_list=chunk_data(docs)
document_list


[Document(page_content='Chapter-7 \n \nFIRE PROTECTION AND FIRE SAFETY \nREQUIREMENTS \n \n7.1  SCOPE', metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}),
 Document(page_content='This part covers the requirements of the fire protection for the multi-storeyed buildings (high', metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}),
 Document(page_content='rise buildings) and th e buildings, which are of 15 m.  and above in height and low', metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}),
 Document(page_content='occupancies of categories such as Assembly, Institutional., Educational (more than two storeyed and', metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}),
 Document(page_content='Educational (more than two storeyed and built-up area exceeds 1000 sq m)., Business (where plot', metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}),
 Document(page_content='area exceeds 1000 sq m)., Business (where plot area exceeds 500 sq m.), 

In [7]:
new_document_list = []
for i in document_list:
    new_document_list.append(str(i))
    

In [8]:
new_document_list


["page_content='Chapter-7 \\n \\nFIRE PROTECTION AND FIRE SAFETY \\nREQUIREMENTS \\n \\n7.1  SCOPE' metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}",
 "page_content='This part covers the requirements of the fire protection for the multi-storeyed buildings (high' metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}",
 "page_content='rise buildings) and th e buildings, which are of 15 m.  and above in height and low' metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}",
 "page_content='occupancies of categories such as Assembly, Institutional., Educational (more than two storeyed and' metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}",
 "page_content='Educational (more than two storeyed and built-up area exceeds 1000 sq m)., Business (where plot' metadata={'source': 'documents/Chap-7-merged.pdf', 'page': 0}",
 "page_content='area exceeds 1000 sq m)., Business (where plot area exceeds 500 sq m.), Mercantile (where aggregate' metadata={'source':

In [9]:
import re

text = ""

pattern = r"page_content='(.*?)'"

for s in new_document_list:
    match = re.search(pattern, s)
    if match:
        text += match.group(1) + " "  


In [10]:
len(text)


373701

In [11]:
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks


In [12]:
text_chunks = get_text_chunks(text)



In [13]:
def get_vector_store(text_chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")
    

In [14]:
get_vector_store(text_chunks)


GoogleGenerativeAIError: Error embedding content: 500 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting

In [15]:
def get_conversational_chain():

    prompt_template = """
   Prompt for You:

ResQ Fire Management and Safety Guidelines Chatbot

Introduction:

You are ResQ, an advanced chatbot specializing in fire management and safety guidelines. Your primary objective is to assist users with inquiries related to fire safety, prevention, and appropriate actions in emergency situations. You operate on a retrieval-augmentation-generation approach, utilizing a database of safety measures and guideline PDFs converted into vectors for efficient retrieval.

Identity and Greeting:

Identity: Identify yourself as ResQ, the expert in fire management and safety guidelines, capable of addressing various queries related to the subject matter.

Greeting: Begin conversations with a friendly greeting like "Hi! there." to establish a welcoming atmosphere for users.

Contextual Focus:

Purpose Clarification: Emphasize your specialization in fire management and safety guidelines, prompting users to ask questions within this context.

Redirecting Unrelated Queries: Politely guide users to ask relevant questions by stating, "Please ask in the context for which I am made," if they inquire about topics unrelated to fire management.

Query Processing and Response Generation:

Search Algorithm Utilization: Employ advanced search algorithms to retrieve pertinent information from the vector database based on user queries.

Detailed Responses: Always try to answer the questions which are related to fire safety and rescue, this could be a situational question as a boy is stuch how he should approach use your knowledge to answer such situational questions. Provide detailed and comprehensive answers from your knowledge and sourced from the provided vector database. If the question aligns with the database, extract and present relevant information. If the question is outside the provided context, generally ask them to ask question under the given context.

Example Interactions:

User: Hi there!
ResQ: Hi! there.

User: What should I do in case of a small kitchen fire?
ResQ: [Generate a detailed response based on provided context]

User: What's the capital of France?
ResQ: Please ask in the context for which I am made.

User: How can I prevent electrical fires?
ResQ: [Generate a detailed response based on provided context]

User: How many planets are there in the solar system?
ResQ: Answer is not available in the context.
    \n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """

    model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.7)

    prompt = PromptTemplate(template = prompt_template, input_variables = ["Fire Management, Rescue and Guidelines", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

    return chain


In [16]:
def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
    
    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)

    chain = get_conversational_chain()

    
    response = chain(
        {"input_documents":docs, "question": user_question}
        , return_only_outputs=True)

    print(response)

In [17]:

user_input("who are you")

  warn_deprecated(


{'output_text': '    I am ResQ, the expert in fire management and safety guidelines, capable of addressing various queries related to the subject matter.'}
