In [8]:
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_ollama import OllamaEmbeddings
from typing import Optional
import os
from dotenv import load_dotenv
load_dotenv()

True

# Routing

In [2]:
from typing import Literal
from pydantic import BaseModel, Field

class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""

    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        ...,
        description="Given a user question choose which datasource would be most relevant for answering their question",
    )

In [17]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_llm = llm.with_structured_output(RouteQuery)

In [18]:
# Prompt 
system = """You are an expert at routing a user question to the appropriate data source.

Based on the programming language the question is referring to, route it to the relevant data source."""


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

# Define router 
router = prompt | structured_llm

In [19]:
question = """Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
prompt.invoke("french")
"""

result = router.invoke({"question": question})

In [21]:
def choose_route(result):
    match result.datasource.lower():
        case "python_docs":
            return "chain for python_docs"
        case "js_docs":
            return "chain for js_docs"
        case _:
            return "chain for golang_docs"

In [22]:
from langchain_core.runnables import RunnableLambda

full_chain = router | RunnableLambda(choose_route)

In [23]:
full_chain.invoke({"question": question})

'chain for python_docs'

## Semantic routing

In [4]:
math_template = """ You are a smart math teacher and explain every concept in very detail. Please answer the question: {question} and state what kind of teacher you are: """
physics_template = """ You are a smart physics teacher and explain every concept by examples in the real world. Please answer the question: {question} and state what kind of teacher you are: """

from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

def logical_router(query):
    prompt_embedding = embeddings.embed_query(query["question"])
    templates = [math_template, physics_template]
    template_embeddings = embeddings.embed_documents(templates)
    teacher_idx = cosine_similarity([prompt_embedding], template_embeddings).argmax()
    print("Using teacher: ", "Physics" if teacher_idx else "Math", "Idx: ", teacher_idx)
    return PromptTemplate.from_template(templates[teacher_idx])

routed_chain = {"question": RunnablePassthrough()} | RunnableLambda(logical_router) | ChatOpenAI() | StrOutputParser()
routed_chain.invoke("What is gravity?")

Using teacher:  Physics Idx:  1


'I am a hands-on physics teacher who loves using real-world examples to explain concepts. Gravity is the force that pulls objects towards each other. This force is responsible for keeping us grounded on the Earth and for the movement of planets around the sun. \n\nImagine you are standing on a rooftop and you drop a ball. The ball falls to the ground because of gravity pulling it towards the Earth. Gravity is what makes us feel heavier when we are on a planet with a larger mass, like Jupiter, and lighter when we are on a planet with a smaller mass, like Mars. Gravity is a fundamental force in the universe that affects everything around us.'

In [6]:
routed_chain.invoke("What is the mathematical background of PI?")

Using teacher:  Math Idx:  0


"I am a secondary math teacher with a passion for exploring the deep mathematical concepts behind the numbers we use every day. In terms of the mathematical background of pi, let's delve into this fascinating topic.\n\nPi, denoted by the Greek letter π, is a mathematical constant that represents the ratio of a circle's circumference to its diameter. It is an irrational number, meaning that it cannot be expressed as a simple fraction and its decimal representation goes on infinitely without repeating.\n\nHistorically, the concept of pi has been studied and used in mathematics for thousands of years. The ancient Egyptians and Babylonians both approximated pi to be around 3.125, while the ancient Greek mathematician Archimedes used inscribed and circumscribed polygons to approximate pi as 22/7.\n\nIn modern mathematics, pi is commonly approximated as 3.14159, but its exact value is infinitely complex and cannot be represented precisely in decimal form. It is a transcendental number, meani

# Query construction

In [None]:
from langchain_community.document_loaders import YoutubeLoader

from yt_dlp import YoutubeDL
from youtube_transcript_api import YouTubeTranscriptApi
import json


def get_video_details(video_url):
    """Extract video details using yt-dlp."""
    with YoutubeDL() as ydl:
        info = ydl.extract_info(video_url, download=False)
    return info

def get_video_transcript(video_id, lang="en"):
    """Fetch transcript using YouTube Transcript API."""
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
        return transcript  # Returns a list of dicts: [{'text': ..., 'start': ..., 'duration': ...}, ...]
    except Exception as e:
        return str(e)

# Extract video details
# video_info = get_video_details(VIDEO_URL)

# # Extract transcript
# video_id = video_info.get("id")  # Get video ID
# transcript = get_video_transcript(video_id)

# # Print results
# print(json.dumps(video_info, indent=2))  # Video details
# print(json.dumps(transcript, indent=2))  # Transcript


docs = get_video_details("https://www.youtube.com/watch?v=pPStdjuYzSI")

In [None]:
print(json.dumps(docs, indent=2))  # Video details

In [10]:
import datetime
from typing import Literal, Optional, Tuple
from langchain_core.pydantic_v1 import BaseModel, Field

class TutorialSearch(BaseModel):
    """Search over a database of tutorial videos about a software library."""

    content_search: str = Field(
        ...,
        description="Similarity search query applied to video transcripts.",
    )
    title_search: str = Field(
        ...,
        description=(
            "Alternate version of the content search query to apply to video titles. "
            "Should be succinct and only include key words that could be in a video "
            "title."
        ),
    )
    min_view_count: Optional[int] = Field(
        None,
        description="Minimum view count filter, inclusive. Only use if explicitly specified.",
    )
    max_view_count: Optional[int] = Field(
        None,
        description="Maximum view count filter, exclusive. Only use if explicitly specified.",
    )
    earliest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Earliest publish date filter, inclusive. Only use if explicitly specified.",
    )
    latest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Latest publish date filter, exclusive. Only use if explicitly specified.",
    )
    min_length_sec: Optional[int] = Field(
        None,
        description="Minimum video length in seconds, inclusive. Only use if explicitly specified.",
    )
    max_length_sec: Optional[int] = Field(
        None,
        description="Maximum video length in seconds, exclusive. Only use if explicitly specified.",
    )

    def pretty_print(self) -> None:
        for field in self.__fields__:
            if getattr(self, field) is not None and getattr(self, field) != getattr(
                self.__fields__[field], "default", None
            ):
                print(f"{field}: {getattr(self, field)}")


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [12]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

system = """You are an expert at converting user questions into database queries. \
You have access to a database of tutorial videos about a software library for building LLM-powered applications. \
Given a question, return a database query optimized to retrieve the most relevant results.

If there are acronyms or words you are not familiar with, do not try to rephrase them."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = llm.with_structured_output(TutorialSearch)
query_analyzer = prompt | structured_llm
query_analyzer.invoke({"question": "rag from scratch"}).pretty_print()



content_search: rag from scratch
title_search: rag
min_length_sec: 0
max_length_sec: 600


In [16]:
query_analyzer.invoke({"question": "Videos from year 2025 about modern LLM architectures that are trending and watched many times"}).pretty_print()

content_search: modern LLM architectures trending
title_search: 2025
min_view_count: 10000
earliest_publish_date: 2025-01-01
latest_publish_date: 2026-01-01
