# Setup

In [28]:
# Import necessary modules
import getpass
import os
from enum import Enum
from operator import itemgetter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


# Loading / Splitting dataset

In [27]:
# Load and split documents
pdf_directory = "./"
recursive_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000, add_start_index=True)

all_docs = []
for filename in os.listdir(pdf_directory):
    if filename.endswith(".pdf"):
        pdfloader = PyPDFLoader(os.path.join(pdf_directory, filename))
        docs = pdfloader.load_and_split(text_splitter=recursive_splitter)
        all_docs.extend(docs)
        

# Embedding Phase


In [29]:

# Embedding Phase
sentence_transformer_ef = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(documents=all_docs, embedding=sentence_transformer_ef)


# Other embedding models are available in:
# - lang_chain_embedding_models
# - sentence-transformers
# - Kaggle (includes all ML models, not just embedding ones)



# Retrieving Phase

In [30]:
# Retrieving Phase
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# Retrieve test
retrieved_docs = retriever.invoke("what did the dyson engineers discovery?")
print("Context retrieved: ")
for retrieved_doc in retrieved_docs:
    print(retrieved_doc.page_content)

Context retrieved: 
UK  0870 600 2701 
Open 7 days a week, 8am – 8pm
ROI  (01)475 7109
Dyson Ltd  Tetbury Hill  Malmesbury
Wiltshire  SN16 ORP
www.dyson .com 
JN.6596 30.01.02 PN.50351-01-02
UK  0870 600 2701 
Open 7 days a week, 8am – 8pm
ROI  (01)475 7109
Dyson Ltd  Tetbury Hill  Malmesbury
Wiltshire  SN16 ORP
www.dyson .com 
JN.6596 30.01.02 PN.50351-01-02
This user guide also contains tips on effective
washing and important safety notes.Please read this user guide carefully before use.User guide
TM
www.dyson.com
This user guide also contains tips on effective
washing and important safety notes.Please read this user guide carefully before use.User guide
TM
www.dyson.com
The only 2-drum wash action.Dyson ContrarotatorTM
Conventional washing machines may seem convenient, 
but their poor performance lets you down. So James Dysonasked his engineers to experiment with every imaginable wayof washing to design a better washing machine.
Along the way, Dyson engineers made a surprising disco

# Answer generation phase

In [31]:

#get password using GUI
os.environ["OPENAI_API_KEY"] = getpass.getpass()

#class to grade llm answer
class gradeEnum(str, Enum):
    correct = "correct"
    incorrect = "incorrect"

#class to eval answer by question and context
class LLMEvalResult(BaseModel):
    grade: gradeEnum = Field(description="Final grade label. Accepted labels: correct, incorrect")
    description: str = Field(description="Explanation of why the specific grade was assigned. Must be concise. Not more than 2 sentences")
    llm_answer: str = Field(description="The original LLM answer evaluated", default="")
    context: str = Field(description="The context used for the evaluation", default="")

json_parser = JsonOutputParser(pydantic_object=LLMEvalResult)


# LLM for answering questions and evaluating Q-A pairs
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
llm_selfeval = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Define prompts
qa_prompt = ChatPromptTemplate.from_messages([
    ("human", "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Question: {question} Context: {context} Answer:")
])

qa_eval_prompt_text = """
You are a teacher evaluating a test. 
You are provided with a question along with an answer for the question written by a student. Evaluate the question-answer pair and provide feedback.
{format_instructions}
Question: {question}
Answer: {answer}
"""

qa_eval_prompt = PromptTemplate(
    template=qa_eval_prompt_text,
    input_variables=["question", "answer"],
    partial_variables={"format_instructions": json_parser.get_format_instructions()}
)

qa_eval_prompt_with_context_text = """
You are a teacher evaluating a test. 
You are provided with a question along with an answer for the question written by a student. Evaluate the question-answer pair using the provided context and provide feedback. Only mark the answer as correct if it agrees with the provided context.
{format_instructions}
Context: {context}
Question: {question}
Answer: {answer}
"""

qa_eval_prompt_with_context = PromptTemplate(
    template=qa_eval_prompt_with_context_text,
    input_variables=["question", "answer", "context"],
    partial_variables={"format_instructions": json_parser.get_format_instructions()}
)

# Define RAG Chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def retrieve_answer(output):
    return output.content

def process_eval_output(output, llm_answer, context):
    if output['grade'] == "correct":
        output['llm_answer'] = llm_answer
    output['context'] = context
    return output

rag_chain = (
    RunnableParallel(
        context=retriever | format_docs,
        question=RunnablePassthrough()
    ) |
    RunnableParallel(
        answer=qa_prompt | llm | retrieve_answer,
        question=itemgetter("question"),
        context=itemgetter("context")
    ) |
    RunnableParallel(
        eval_result=qa_eval_prompt_with_context | llm_selfeval | json_parser,
        context=itemgetter("context"),
        question=itemgetter("question"),
        answer=itemgetter("answer")
    ) |
    (lambda x: process_eval_output(x['eval_result'], x['answer'], x['context']))
)

# Get user prompt and invoke RAG chain
user_prompt = "what did the dyson engineers discovery?"
json_answer = rag_chain.invoke(user_prompt)
print(f"Prompt: {user_prompt}\n\nContext: {json_answer['context']}\n\nAnswer: {json_answer['llm_answer']}\n\nEvaluation: {json_answer['grade']}\n\nAnswer description: {json_answer['description']}")


Prompt: what did the dyson engineers discovery?

Context: UK  0870 600 2701 
Open 7 days a week, 8am – 8pm
ROI  (01)475 7109
Dyson Ltd  Tetbury Hill  Malmesbury
Wiltshire  SN16 ORP
www.dyson .com 
JN.6596 30.01.02 PN.50351-01-02

UK  0870 600 2701 
Open 7 days a week, 8am – 8pm
ROI  (01)475 7109
Dyson Ltd  Tetbury Hill  Malmesbury
Wiltshire  SN16 ORP
www.dyson .com 
JN.6596 30.01.02 PN.50351-01-02

This user guide also contains tips on effective
washing and important safety notes.Please read this user guide carefully before use.User guide
TM
www.dyson.com

This user guide also contains tips on effective
washing and important safety notes.Please read this user guide carefully before use.User guide
TM
www.dyson.com

The only 2-drum wash action.Dyson ContrarotatorTM
Conventional washing machines may seem convenient, 
but their poor performance lets you down. So James Dysonasked his engineers to experiment with every imaginable wayof washing to design a better washing machine.
Along the wa