In [1]:
import getpass
import os
from langchain_openai import ChatOpenAI
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
import bs4

os.environ["OPENAI_API_KEY"] = getpass.getpass()
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [49]:
llm = ChatOpenAI(model="gpt-4o-mini")
rag_prompt = hub.pull("rlm/rag-prompt")

In [50]:
# task1
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

loader = WebBaseLoader(
    web_paths=urls,
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

In [51]:
#task2

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents=docs)

In [52]:
#task3
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(
    model = "text-embedding-3-small"
))
retriever = vectorstore.as_retriever(
    type = "similarity",
    kwargs={'k':6}
)

In [53]:
#task4

user_query = 'agent memory'
retrieved_chunk = retriever.invoke(user_query)

In [54]:
#task5
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

def evaluate_relevance(query, retrieved_chunk):
    parser = JsonOutputParser()

    evaluating_prompt = """
    You are a system designed to evaluate the relevance of retrieved information with respect to a user query.
    Based on the provided user query and the retrieved chunk, determine if the retrieved chunk is relevant to the query.
    Respond in the JSON format: {{"relevance": "yes"}} or {{"relevance": "no"}}.

    Input:
        • User Query: {user_query}
        • Retrieved Chunk: {retrieved_chunk}

    Output:
    """

    prompt = PromptTemplate(
        template=evaluating_prompt,
        input_variables=["user_query", "retrieved_chunk"]
    )

    chain = prompt | llm | parser

    return chain.invoke({"user_query": query, "retrieved_chunk": retrieved_chunk})


evaluate_relevance(user_query, retriever.invoke(user_query))

{'relevance': 'yes'}

In [69]:
#task6 task7

yes_query = "what is few-shot?"
print(evaluate_relevance(yes_query, retriever.invoke(yes_query)))

no_query = "my name is jacob"
print(evaluate_relevance(no_query, retriever.invoke(no_query)))

{'relevance': 'yes'}
{'relevance': 'no'}


In [71]:
#task8
from langchain_core.runnables import RunnableMap

task4_query = 'agent memory'

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def chat(query, retrieved_chunk):
    chunk_to_use = retrieved_chunk
    if evaluate_relevance(query, retrieved_chunk)['relevance'] == 'yes':
        chunk_to_use = retriever.invoke(task4_query)

    rag_chain = (
        RunnableMap({
            "context": lambda docs: format_docs(chunk_to_use),
            "question": RunnablePassthrough()       
        })
        | rag_prompt
        | llm
        | StrOutputParser()
    )

    return rag_chain.invoke({"context": chunk_to_use, "question": query})


question = "what is few-shot?"
chat(question, retriever.invoke(question))

'The provided context does not define "few-shot." Therefore, I don\'t know the answer.'

In [72]:
#task9

def evaluate_hallucination(generated_answer):
    parser = JsonOutputParser()

    evaluate_hallucination_prompt = """
        You are an AI evaluator tasked with identifying hallucinations in generated answers. 
        A hallucination occurs when the generated answer includes information 
        that is factually incorrect, fabricated, or unsupported by the given context.

        Your task:
        1. Analyze the generated answer based on the provided context.
        2. Determine if the generated answer includes hallucinated content.

        Respond in JSON format:
        - If hallucination is detected: {{"hallucination": "yes"}}
        - If no hallucination is detected: {{"hallucination": "no"}}

        Input:
        - Generated Answer: {generated_answer}

    Output:
    """

    prompt = PromptTemplate(
        template=evaluate_hallucination_prompt,
        input_variables=["generated_answer"]
    )

    chain = prompt | llm | parser

    return chain.invoke({"generated_answer": generated_answer})

evaluate_hallucination(chat(question, retriever.invoke(question)))

{'hallucination': 'no'}

In [77]:
#task10

def task10_chat(query):
    answer = chat(query, retriever.invoke(query))

    if evaluate_hallucination(answer)['hallucination'] == 'yes':
        answer = chat(query)
    
    print(answer)


task10_chat("zero-shot")
        

The term "zero-shot" refers to the ability of a model, such as a large language model (LLM), to perform tasks without specific training on those tasks. It relies on the model's general understanding and reasoning capabilities, allowing it to generate relevant outputs based on its extensive training data. This concept is significant in the context of LLM-powered autonomous agents, where they can tackle various problems without prior explicit examples.
