# Setup

In [1]:
# Import necessary modules

import getpass
import os
from enum import Enum
from operator import itemgetter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


Defaulting to user installation because normal site-packages is not writeable


USER_AGENT environment variable not set, consider setting it to identify your requests.


# Loading / Splitting dataset

In [2]:
# Load and split documents
pdf_directory = "./PDF_FILES"
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=25, add_start_index=True)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, add_start_index=True)

all_docs = []
for filename in os.listdir(pdf_directory):
    if filename.endswith(".pdf"):
        pdfloader = PyPDFLoader(os.path.join(pdf_directory, filename))
        docs = pdfloader.load()
        all_docs.extend(docs)

print(len(all_docs))

AS-BIKE-SHOP-WEB-MANUAL.pdf
dyson_contrarotator.pdf
101


# Embedding Phase


In [4]:

# Embedding Phase
sentence_transformer_ef = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma(collection_name="full_documents", embedding_function=sentence_transformer_ef)




# Retrieving Phase

In [5]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore

store = InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)

retriever.add_documents(all_docs)
len(list(store.yield_keys()))



bike. Again be careful not to push the rivetout of thelink!If someone brings a bike in without a chainor you
1.Inspect the chain forrust, broken, twisted or bentlinks. Anoverly rusted chain that cannot bendneedsto be replaced.Achain with "play" in its links is indicative of wearand the chainmay need to be replaced. Use thechain checkertooltomeasure the amount of play in a suspect multi-speedchain.(Ask a fellow mechanic how to use this tool). Usethe chainbreaker tool to break the chain as stated in theSingleSpeedChain Replacement Tutorial. *Do not push the pin allthe wayout.2.Next, get the correct size chain that correspondswith thenumber of speeds on the rear wheel of the bike.3.Again, new chains will likely have excess chains thatneed to beremoved to match the size of the old chain. Lay thenew andold chain next to each other and align each link toidentify thelink where the new chain must be broken. Remove thelinkwith the same technique used to originally removethe chainfrom the bike. 

# Answer generation phase

In [9]:

from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
import gradio as gr


if not os.getenv("OPENAI_API_KEY"):
    # Prompt for the API key if it is empty
    os.environ["OPENAI_API_KEY"] = getpass.getpass(prompt="Enter your OpenAI API key: ")

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
### Contextualize question ###
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)
### Answer question ###
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
### Statefully manage chat history ###
store = {}
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)


def chat(inp):
  """Function to handle user input and return Bard's response"""
  user_input = inp
  if not user_input:
    return None
  answer = conversational_rag_chain.invoke(
      {"input": user_input},
      config={"configurable": {"session_id": "abc123"}},
  )["answer"]
  return f"\nYou: {user_input}\nBard: {answer}"


# Define the chat interface
interface = gr.Interface(
  fn=chat,
  inputs=gr.Textbox(placeholder="Type your question here..."),
  outputs=gr.Textbox(value=""),
  title="Chat with me",
  description="Ask me anything!",
  elem_id="chat-container",
)

# Display the interface
interface.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Parent run dd91b487-0c95-4c7b-aaed-57bb6682f709 not found for run 479b7502-e4c5-4bcb-abfb-5e10012960a3. Treating as a root run.
Parent run 350875c7-e493-48e0-a6f3-ff0d5ce20cc4 not found for run 07cf0aa0-908f-47d5-bba1-9427f4f86c4d. Treating as a root run.
Parent run 285b458e-ba15-4cd7-891a-328e1ab5d647 not found for run 5586c974-8ac5-408c-a7c0-8d2c49f41198. Treating as a root run.
