# Setup

In [25]:
# Import necessary modules
import getpass
import os
from enum import Enum
from operator import itemgetter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
import tkinter as tk
from tkinter import scrolledtext
import gradio as gr
from langchain_community.chat_models import ChatAnthropic

# Loading / Splitting dataset

In [67]:
# Load and split documents
pdf_directory = "./PDF_FILES"
recursive_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000, add_start_index=True)

all_docs = []
for filename in os.listdir(pdf_directory):
    if filename.endswith(".pdf"):
        print(filename)
        pdfloader = PyPDFLoader(os.path.join(pdf_directory, filename))
        docs = pdfloader.load_and_split(text_splitter=recursive_splitter)
        print(type(docs),type(docs[0]))
        docs = [doc.page_content for doc in docs]
        print(type(docs),type(docs[0]))
        all_docs.extend(docs)
        

AS-BIKE-SHOP-WEB-MANUAL.pdf
<class 'list'> <class 'langchain_core.documents.base.Document'>
<class 'list'> <class 'str'>
dyson_contrarotator.pdf
<class 'list'> <class 'langchain_core.documents.base.Document'>
<class 'list'> <class 'str'>


# Embedding Phase


In [72]:

# Embedding Phase
sentence_transformer_ef = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma(collection_name="pdf", embedding_function=sentence_transformer_ef)
vectorstore.add_texts(all_docs)

# Other embedding models are available in:
# - lang_chain_embedding_models
# - sentence-transformers
# - Kaggle (includes all ML models, not just embedding ones)



['b7d9168a-b470-45bb-9306-bc36155698d5',
 '8079bec7-2d19-4c97-9028-1f7c115c63b7',
 '52852bc4-3b0a-4dd3-99f7-5b40f65d49e6',
 'ae8c5064-f642-4245-934a-d049d54d8d28',
 'b4335063-aa61-40e2-af43-1a2aa7c18139',
 '9cb8fff2-b198-4ace-ac3b-f02c16ff96f7',
 '9acb651d-aeb4-419b-87cf-28ff0284eea0',
 'c47497a6-d6c3-45d0-822a-9afa35860bbe',
 '8bca8fa0-e255-41bb-8bf4-9164df994f07',
 '422863b8-7fa2-4a9c-b842-9df5abd02ffa',
 'df564fa1-b058-4576-ace3-2ea7d9f1cec1',
 'abdc596a-2fd2-4c62-b24f-ac2544fae960',
 '86ca2a9f-9b81-40f8-aa5b-9e698b2694c9',
 '14031f1e-fef9-4e08-9fa4-39833eb44568',
 '70b42039-e3ac-4c0d-b62e-b772af1b0f82',
 'a846dbf3-fb83-4f9f-9673-1fa5fbe325aa',
 '3abfe816-200b-4bfc-929c-99e53592fc07',
 '325a5b26-e107-4be7-b8e1-5671c09b5bd3',
 '07a24dc8-af7b-4e1b-89e5-4e0a34975aa7',
 'bb01aa88-136f-47ab-b3f7-0e4514431dd4',
 'ec68cfab-61bc-4ef1-9de8-9d2a6b451ca1',
 '7fa45328-cf9a-4799-a1cd-3a8aee0de907',
 '818c0923-3195-474d-b10f-1a8bcb98a3dc',
 '3022c76c-886a-4167-ac48-ca7a35675788',
 '7f735ca6-d2e0-

# Retrieving Phase

In [94]:
# Retrieving Phase
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# Retrieve test
retrieved_docs = retriever.invoke("what does happen to bracket components over time?")
print("Context retrieved: ")
for retrieved_doc in retrieved_docs:
    print(retrieved_doc.page_content)

Context retrieved: 
bracket bearings. During rotation, the bearings roll on the bottombracket cup surfaces. Over time, the bottom bracketsystem canbecome loose and this can accelerate the wear on thebearings andcup surfaces. It is important to properly maintainbottom bracketcomponents to extend their lifetime.Typically, inthe shop weseepress-fittedone-piece bottom brackets(common onbeach cruisers),English threaded three-piece bottom brackets, andEnglishthreadedcartridge bottom brackets.
One-Piece Bottom Brackets
In one-piece bottom brackets, one S-shaped piece ofsteel forms thecrank arms and goes from the pedal, through the bottombracket, tothe other pedal. This crank also acts as the bottombracket bearingaxle. The crank arm is threaded in the middle andacts as the bottombracket’s spindle. The crank drive side threadingis right-handthreaded, and the non-drive side (left side) is left-handthreaded. It isnecessary to first remove the left side pedal in orderto remove thecrank.
One-Piece 

# Answer generation phase

In [105]:

from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory

import gradio as gr

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

### Contextualize question ###
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)
### Answer question ###
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
### Statefully manage chat history ###
store = {}
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)


def chat(inp):
  """Function to handle user input and return Bard's response"""
  user_input = inp
  if not user_input:
    return None
  answer = conversational_rag_chain.invoke(
      {"input": user_input},
      config={"configurable": {"session_id": "abc123"}},
  )["answer"]
  return f"\nYou: {user_input}\nBard: {answer}"


# Define the chat interface
interface = gr.Interface(
  fn=chat,
  inputs=gr.Textbox(placeholder="Type your question here..."),
  outputs=gr.Textbox(value=""),
  title="Chat with me",
  description="Ask me anything!",
  elem_id="chat-container",
)

# Display the interface
interface.launch()

Running on local URL:  http://127.0.0.1:7876

To create a public link, set `share=True` in `launch()`.




Parent run 341bd1c3-404b-4d8b-98d2-f4c2dea53c2f not found for run 257f8542-844f-4337-b870-f6309312f4ac. Treating as a root run.
Parent run 5a10ac8a-ab02-4804-8476-0288c1206c40 not found for run e48aef15-fdbe-48aa-85a3-5e9f0d814a81. Treating as a root run.
Parent run 3352e945-52db-4b44-921e-f573e49c98fc not found for run 4293bae0-abc5-413c-9a92-9f2199ab5d0c. Treating as a root run.
