In [59]:
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings

In [60]:
doc=WikipediaLoader(query="openai",load_max_docs=10)
documents=doc.load()

In [61]:
documents[0].page_content[:300]
documents[0].metadata

{'title': 'OpenAI',
 'summary': 'OpenAI is an American artificial intelligence (AI) organization headquartered in San Francisco, California. It aims to develop "safe and beneficial" artificial general intelligence (AGI), which it defines as "highly autonomous systems that outperform humans at most economically valuable work". As a leading organization in the ongoing AI boom, OpenAI is known for the GPT family of large language models, the DALL-E series of text-to-image models, and a text-to-video model named Sora. Its release of ChatGPT in November 2022 has been credited with catalyzing widespread interest in generative AI.\nThe organization has a complex corporate structure. As of October 2025, it is led by the non-profit OpenAI Foundation, founded in 2015 and registered in Delaware, which holds a 26% equity stake in OpenAI Group PBC, a for-profit public benefit corporation which commercializes its products. Microsoft invested over $13 billion into OpenAI, and provides Azure cloud com

In [62]:
spliter=RecursiveCharacterTextSplitter(separators=["'",".","/n","/n/n",","],
                                       chunk_size=500,
                                       chunk_overlap=30)
split=spliter.split_documents(documents)

In [84]:
from langchain_openai import OpenAIEmbeddings
import os 

embed=OpenAIEmbeddings(
    api_key=os.getenv("api_key"),
    model="text-embedding-3-small",
    base_url=os.getenv("endpoints")
)

In [85]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

llm=ChatOpenAI(
    model="gpt-5-nano-2025-08-07",
    api_key=os.getenv("api_key"),
    openai_api_base=os.getenv("endpoints")
)

In [86]:
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000267D8553F00>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000267DB150160>, root_client=<openai.OpenAI object at 0x00000267DB200E60>, root_async_client=<openai.AsyncOpenAI object at 0x00000267DAD54230>, model_name='gpt-5-nano-2025-08-07', model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='https://api.euron.one/api/v1/euri')

In [87]:
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder

hyd_embedding_function=HypotheticalDocumentEmbedder.from_llm(
    llm=llm,
    base_embeddings=embed,
    prompt_key="web_search")

In [88]:
from  langchain.vectorstores import Chroma

vector=Chroma.from_documents(
    documents=split,
    embedding=hyd_embedding_function,
    persist_directory="output/langchain"
)

In [89]:
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

template=PromptTemplate.from_template(
    """use the context below to  answer the question.
    context:{context}
    input:{input}"""
)
rag_chain=create_stuff_documents_chain(llm=llm,prompt=template)

In [90]:
def hyd_rag_pipeline(query):
    mathed_doc=vector.similarity_search(query,k=4)
    response=rag_chain.invoke({
        "input":query,
        "context": mathed_doc   
    })

    return response


In [None]:
query="Why is OpenAI the most important company in the world?"
answer=hyd_rag_pipeline(query)

In [92]:
answer

'From the provided context, OpenAI is considered highly important because:\n\n- It is described as a leading organization in the ongoing AI boom.\n- It aims to develop safe and beneficial artificial general intelligence (AGI).\n- It has produced influential technologies, notably the GPT family of large language models, the DALL-E series of text-to-image models, and a text-to-video model named Sora.\n\nNote: The context calls it a leading organization, but it does not explicitly claim it is the single most important company in the world.'