## Setup and Import Libraries

In [1]:

import os
from langchain_community.document_loaders import WikipediaLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma, FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts.chat import SystemMessagePromptTemplate, ChatPromptTemplate
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chat_models import init_chat_model
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["HUGGINGFACE_API_KEY"] = os.getenv("HUGGINGFACE_API_KEY")

In [5]:
embeddings = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")
llm = init_chat_model(model="groq:gemma2-9b-it")

## Document Loading and Splitting

In [7]:
loader = WikipediaLoader(query="Steve Jobs", load_max_docs=5)
documents = loader.load()

In [8]:
chunk_size = 300
chunk_overlap = 100

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)

chunks = text_splitter.split_documents(documents=documents)
chunks

[Document(metadata={'title': 'Steve Jobs', 'summary': 'Steven Paul Jobs (February 24, 1955 – October 5, 2011) was an American businessman, inventor, and investor best known for co-founding the technology company Apple Inc. Jobs was also the founder of NeXT and chairman and majority shareholder of Pixar. He was a pioneer of the personal computer revolution of the 1970s and 1980s, along with his early business partner and fellow Apple co-founder Steve Wozniak.\nJobs was born in San Francisco in 1955 and adopted shortly afterwards. He attended Reed College in 1972 before withdrawing that same year. In 1974, he traveled through India, seeking enlightenment before later studying Zen Buddhism. He and Wozniak co-founded Apple in 1976 to further develop and sell Wozniak\'s Apple I personal computer. Together, the duo gained fame and wealth a year later with production and sale of the Apple II, one of the first highly successful mass-produced microcomputers. \nJobs saw the commercial potential 

## Build Vector Store and Retriever

In [None]:
vector_store = Chroma.from_documents(
    documents=chunks, 
    embedding=embeddings,
    persist_directory = "output/steve_jobs_for_hyde.db"
)

base_retriever = vector_store.as_retriever(search_kwargs={"k":5})

## Prompt for Generating HyDE

In [10]:
def get_hyde_doc(query):
    template = """Imagine you are an expert writing a detailed explanation on the topic: '{query}'
    create a hypothetical answer for the topic"""

    system_message_prompt = SystemMessagePromptTemplate.from_template(template = template)
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
    messages = chat_prompt.format_prompt(query = query).to_messages()

    print(messages)
    response = llm.invoke(messages)
    hypo_doc = response.content
    
    return hypo_doc

In [11]:
query = 'When was Steve Jobs fired from Apple?'

print(get_hyde_doc(query=query))

[SystemMessage(content="Imagine you are an expert writing a detailed explanation on the topic: 'When was Steve Jobs fired from Apple?'\n    create a hypothetical answer for the topic", additional_kwargs={}, response_metadata={})]
## The Fall and Rebirth: Steve Jobs' Tumultuous Departure from Apple

Steve Jobs' relationship with Apple was a complex and tumultuous one, marked by both brilliance and conflict. While he is rightfully celebrated as the visionary behind Apple's success, his departure from the company in 1985 remains a significant chapter in the company's history.  

**The Events Leading to the Firing:**

Jobs' departure wasn't a sudden event, but rather the culmination of several factors:

* **Clashing Personalities:** Jobs, known for his demanding and often abrasive management style, clashed frequently with John Sculley, Apple's CEO, whom Jobs himself had hired in 1983. 
* **The Macintosh's Performance:** While the Macintosh was revolutionary and groundbreaking, its initial 

In [None]:
matched_doc = base_retriever.invoke(get_hyde_doc(query))

print(matched_doc)

[SystemMessage(content="Imagine you are an expert writing a detailed explanation on the topic: 'When was Steve Jobs fired from Apple?'\n    create a hypothetical answer for the topic", additional_kwargs={}, response_metadata={})]
[Document(metadata={'summary': 'Steven Paul Jobs (February 24, 1955 – October 5, 2011) was an American businessman, inventor, and investor best known for co-founding the technology company Apple Inc. Jobs was also the founder of NeXT and chairman and majority shareholder of Pixar. He was a pioneer of the personal computer revolution of the 1970s and 1980s, along with his early business partner and fellow Apple co-founder Steve Wozniak.\nJobs was born in San Francisco in 1955 and adopted shortly afterwards. He attended Reed College in 1972 before withdrawing that same year. In 1974, he traveled through India, seeking enlightenment before later studying Zen Buddhism. He and Wozniak co-founded Apple in 1976 to further develop and sell Wozniak\'s Apple I personal 

## Using LangChain Hypothetical Document Embedder

### Document Loading and Text Splitting

In [None]:
loader = TextLoader(file_path="langchain_crewai_dataset.txt")
documents = loader.load()

In [15]:
chunk_size = 300
chunk_overlap = 50

splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)

chunks = splitter.split_documents(documents=documents)

### Base Embedding

In [16]:
base_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

### Hyde Embedding

In [17]:
hyde_embedding = HypotheticalDocumentEmbedder.from_llm(
    llm=llm,
    base_embeddings=base_embeddings,
    prompt_key="web_search"
)

### Vector Store

In [18]:
vector_store = Chroma.from_documents(
    documents=chunks,
    embedding=hyde_embedding,
    persist_directory="output/langchain"
)

### Prompt and RAG Chain

In [19]:
rag_prompt = PromptTemplate.from_template("""
Use the context below to answer the question.

Context:
{context}

Question: {input}
""")

In [20]:
rag_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=rag_prompt
)

### RAG Pipeline

In [21]:
def hyde_rag_pipeline(query):
    matched_documents = vector_store.similarity_search(query, k=4)
    print(matched_documents)

    response = rag_chain.invoke({
        "input": query,
        "context": matched_documents
    })

    return response

### Run Query

In [22]:
query = "What memory modules does LangChain provide?"

answer = hyde_rag_pipeline(query=query)

print("✅ Final Answer:\n", answer)

[Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_content='LangChain offers memory modules like ConversationBufferMemory and ConversationSummaryMemory. These allow the LLM to maintain awareness of previous conversation turns or summarize long interactions to fit within token limits. (v10)'), Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_content='LangChain offers memory modules like ConversationBufferMemory and ConversationSummaryMemory. These allow the LLM to maintain awareness of previous conversation turns or summarize long interactions to fit within token limits. (v2)'), Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_content='LangChain offers memory modules like ConversationBufferMemory and ConversationSummaryMemory. These allow the LLM to maintain awareness of previous conversation turns or summarize long interactions to fit within token limits. (v5)'), Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_cont

### Using Custom Prompt instead of Prompt Key

In [23]:
custom_prompt = PromptTemplate.from_template(
    "Generate a concise hypothetical answer for this topic: {query}"
)

hyde_embedding = HypotheticalDocumentEmbedder.from_llm(
    llm=llm,
    base_embeddings=base_embeddings,
    custom_prompt=custom_prompt
)

In [24]:
vector_store = Chroma.from_documents(
    documents=chunks,
    embedding=hyde_embedding,
    persist_directory="output/langchain_custom_prompt"
)

In [25]:
rag_prompt = PromptTemplate.from_template("""
Use the context below to answer the question.

Context:
{context}

Question: {input}
""")

In [26]:
rag_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=rag_prompt
)

In [27]:
def hyde_rag_pipeline(query):
    matched_documents = vector_store.similarity_search(query, k=4)
    print(matched_documents)

    response = rag_chain.invoke({
        "input": query,
        "context": matched_documents
    })

    return response

In [28]:
query = "What memory modules does LangChain provide?"

answer = hyde_rag_pipeline(query=query)

print("✅ Final Answer:\n", answer)

[Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_content='LangChain offers memory modules like ConversationBufferMemory and ConversationSummaryMemory. These allow the LLM to maintain awareness of previous conversation turns or summarize long interactions to fit within token limits. (v10)'), Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_content='LangChain offers memory modules like ConversationBufferMemory and ConversationSummaryMemory. These allow the LLM to maintain awareness of previous conversation turns or summarize long interactions to fit within token limits. (v7)'), Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_content='LangChain offers memory modules like ConversationBufferMemory and ConversationSummaryMemory. These allow the LLM to maintain awareness of previous conversation turns or summarize long interactions to fit within token limits. (v2)'), Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_cont