In [57]:
!pip install -q langchain_nvidia_ai_endpoints langchain-community langchain-text-splitters faiss-cpu

In [58]:
from google.colab import userdata
import os
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import chain

In [59]:
os.environ['NVIDIA_API_KEY'] = userdata.get('NVIDIA_API_KEY')

In [60]:
loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = loader.load()

In [61]:
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content="\n\n\n\n\nGet started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentOur Building Ambient Agents with LangGraph course is now available on LangChain Academy!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceGet StartedOn this pageGet started with LangSmith\nLangSmith is a platform for building production-grade LLM applications.\nIt allows you to closely monitor and evaluate your application, so you can ship quickly and w

In [62]:
embeddings = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5")

In [63]:
embeddings

NVIDIAEmbeddings(base_url='https://integrate.api.nvidia.com/v1', model='nvidia/nv-embedqa-e5-v5', truncate='NONE', dimensions=None, max_batch_size=50)

In [64]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)
retriever = vector.as_retriever()

In [65]:
retriever

VectorStoreRetriever(tags=['FAISS', 'NVIDIAEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7b00abbfaa90>, search_kwargs={})

In [66]:
model = ChatNVIDIA(model="openai/gpt-oss-20b")

In [67]:
model

ChatNVIDIA(base_url='https://integrate.api.nvidia.com/v1', model='openai/gpt-oss-20b')

In [68]:
model.invoke("tell me more about you?")

AIMessage(content='I’m ChatGPT, a large‑language‑model (LLM) built by OpenAI. Here’s a quick rundown of what that means and what I can do for you:\n\n| What I am | How I work | What I can do | What I can’t do |\n|-----------|------------|---------------|-----------------|\n| **AI language model** | Trained on a diverse mix of books, websites, code, and other text up to **June\u202f2024**. | • Answer questions, explain concepts, or provide background info.<br>• Help draft emails, essays, poems, or code snippets.<br>• Summarize articles or documents.<br>• Offer study or research assistance.<br>• Simulate conversation, role‑play, or brainstorming. | • I don’t have personal experiences or feelings.<br>• I can’t browse the web or fetch real‑time data.<br>• I can’t store or recall personal data beyond the current session. |\n| **No consciousness** | I generate responses by predicting the next word in a sequence, based on patterns learned during training. | • I can adapt my tone and style to 

In [69]:
hyde_template = """Even if you do not know the full answer, generate a one-paragraph hypothetical answer to the below question:

{question}"""

In [70]:
hyde_prompt = ChatPromptTemplate.from_template(hyde_template)

In [71]:
hyde_query_transformer = hyde_prompt | model | StrOutputParser()

In [72]:
@chain
def hyde_retriever(question):
    hypothetical_document = hyde_query_transformer.invoke({"question": question})
    return retriever.invoke(hypothetical_document)


In [73]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

In [74]:
prompt = ChatPromptTemplate.from_template(template)

In [75]:
answer_chain = prompt | model | StrOutputParser()

In [76]:
@chain
def final_chain(question):
    documents = hyde_retriever.invoke(question)

    # FIXED: Check if any document has non-empty page_content
    if not documents or not any(getattr(doc, "page_content", "").strip() for doc in documents):
        raise ValueError("No valid documents retrieved for the question.")

    for s in answer_chain.stream({"question": question, "context": documents}):
        yield s

In [77]:
print("Retrieved documents:", documents)

Retrieved documents: [Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Get started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith'), Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Skip to main contentOur Building Ambient Agents with LangGraph course is now available on LangChain Academy!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDa

In [82]:
for s in final_chain.stream("how can langsmith help with testing"):
    print(s, end="")

LangSmith helps with testing by giving you a full set of observability and evaluation tools:

* **Tracing & dashboards** – Add tracing to your app and create dashboards that show key metrics such as RPS, error rates, and costs.  
* **Evals** – Use LangSmith’s evaluation framework to build and run high‑quality evaluation datasets and metrics. It lets you score your application’s performance, run tests on production traffic, and gather human feedback on the data.  
* **Prompt engineering** – Iterate on prompts with automatic version control and collaboration features, making it easier to test and refine prompt logic.  

Together, these features let you monitor, evaluate, and improve your LLM application throughout development and production.

In [79]:
import gradio as gr

In [80]:
def ask_langsmith(question):
    return "".join(final_chain.stream(question))

with gr.Blocks() as demo:
    gr.Markdown("## 🧠 GPT-OSS RAG")

    with gr.Row():
        inp = gr.Textbox(placeholder="Ask your question here...", label="Your Question")
    out = gr.Textbox(label="Answer", lines=10)

    btn = gr.Button("Submit")
    btn.click(fn=ask_langsmith, inputs=inp, outputs=out)

In [81]:
demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1e9251b4f8bd4953ea.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


