In [7]:
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
import PyPDF2
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import ConversationChain, LLMChain
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv 
from langchain_core.globals import set_llm_cache
from langchain_core.caches import InMemoryCache
import os

In [4]:
load_dotenv()

os.environ['OPENAI_API_KEY']= os.getenv("OPENAI_API_KEY")
os.environ['PINECONE_API_KEY'] =  os.getenv("PINECONE_API_KEY")

In [None]:
#load the pdf file and create text chunks
def load_pdf(data):
    loader = DirectoryLoader(data,
                             glob = "*.pdf",
                             loader_cls= PyPDFLoader)
    documents = loader.load()

    return documents

#Provide_your_PDF_File
extracted_pdf = load_pdf("/Documents/Conversational-Chatbot/Data/") 

#Text Chunks

def text_split(extracted_pdf):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 50)
    text_chunks = text_splitter.split_documents(extracted_pdf)
    return text_chunks

text_chunks = text_split(extracted_pdf)

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 32 0 (offset 0)
Ignoring wrong pointing object 34 0 (offset 0)


In [8]:
#embedding model

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

# chat completion llm
llm = ChatOpenAI(
    openai_api_key= os.environ['OPENAI_API_KEY'],
    model_name='gpt-3.5-turbo-0125',
    temperature=0.0
)

set_llm_cache(InMemoryCache())

In [9]:
db = FAISS.from_documents(text_chunks, embedding_model)
db.save_local("vector_db")

In [10]:
retriever= db.as_retriever()

In [16]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from pydantic import BaseModel, Field, EmailStr
from langchain.chains import create_history_aware_retriever

In [13]:
class PersonalDetails(BaseModel):
    name: str = Field(
        None,
        description="The human name of the user.",
    )
    phone_number: int = Field(
        None,
        description="The contact number of the user.",
    )
    email: EmailStr = Field(
        None,
        description="an email address that the person associates as theirs",
    )

In [19]:
def check_what_is_empty(user_peronal_details):
    ask_for = []
    # Check if fields are empty
    for field, value in user_peronal_details.dict().items():
        if value in [None, "", 0, "a@gmail.com"]:  # You can add other 'empty' conditions as per your requirements
            print(f"Field '{field}' is empty.")
            ask_for.append(f'{field}')
    return ask_for

def add_non_empty_details(current_details: PersonalDetails, new_details: PersonalDetails):
    non_empty_details = {k: v for k, v in new_details.dict().items() if v not in [None, ""]}
    updated_details = current_details.copy(update=non_empty_details)
    return updated_details

In [24]:
# Define the system prompt for the question-answering task

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question "
    "If you don't know the answer, say that you don't know."
    "Use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"
)
  

In [38]:
def create_chatbot_chain(llm, retriever, user_details):
    # Step 1: Define the history-aware retriever
    retriever_prompt = (
        "Given a chat history and the latest user question which might reference context in the chat history, "
        "formulate a standalone question which can be understood without the chat history. "
        "Do NOT answer the question, just reformulate it if needed and otherwise return it as is."
    )
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", retriever_prompt),
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{input}"),
        ]
    )
    history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)

    # Step 2: Define the question-answering chain
    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

    # Step 3: Define the info-gathering chain
    def ask_for_info_chain(ask_for):
        first_prompt = ChatPromptTemplate.from_template(
            "You are an assisor book an appotant for question-answering tasks. "
            "If the user asks you to call intment, there are some things to ask the user in a conversational way. "
            "You should only ask one question at a time even if you don't get all the info. "
            "Don't ask as a list! Don't greet the user! Don't say Hi. Explain you need to get some info. "
            "If the ask_for list is empty, then thank them and ask how you can help them.\n\n"
            "### ask_for list: {ask_for}\n\n"
            "Use three sentences maximum and keep the answer concise. "
            "If you don't know the answer, say that you don't know."
        )
        return LLMChain(llm=llm, prompt=first_prompt)

    # Step 4: Define the overall chatbot logic
    def chatbot_chain(input_question, chat_history):
        # Step 4a: Use the history-aware retriever to get context
        standalone_question = history_aware_retriever.invoke({"chat_history": chat_history, "input": input_question})

        # Step 4b: Answer the question based on documents
        answer_from_docs = question_answer_chain.invoke({"chat_history": chat_history, "input": standalone_question, 
                                                         "context": ""})

        # Step 4c: Check for personal details if booking is requested
        if "book an appointment" in input_question.lower():
            ask_for = check_what_is_empty(user_details)
            if ask_for:
                info_gathering_chain = ask_for_info_chain(ask_for)
                personal_info_response = info_gathering_chain.run(ask_for=ask_for)
                return personal_info_response
            else:
                return "Thank you! How else can I assist you?"

        # Return the answer from documents
        return answer_from_docs

    return chatbot_chain

In [20]:
user_details = PersonalDetails(name="", phone_number= 0, email= 'a@gmail.com')
chat_history = [] 

In [39]:
chatbot_chain = create_chatbot_chain(llm, retriever, user_details)


In [27]:
user_input = "what are the core marketing strategies of Open AI"

In [40]:
response = chatbot_chain(input_question=user_input, chat_history=chat_history)
print(response)

OpenAI is a prominent player in the field of artificial intelligence, known for its innovative technology and significant impact on the market. The company's marketing strategy focuses on open collaboration, sharing research, and models with the community to position itself as a leader in the industry. OpenAI's emphasis on ethical considerations, transparency, and open research sets a benchmark for others in the AI industry.


In [50]:
user_input1 = "i need to book an appointment"
response1 = chatbot_chain(input_question=user_input1, chat_history=chat_history)
print(response1)

Field 'name' is empty.
Field 'phone_number' is empty.
Field 'email' is empty.
What is your name?
