In [5]:
%pip install --quiet --upgrade langchain-text-splitters langchain-community langgraph

Note: you may need to restart the kernel to use updated packages.


In [6]:
%pip install -qU "langchain[openai]"
%pip install -U langchain langchain-openai
%pip install -qU langchain-core

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [1]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

 ········


In [2]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:

# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)


In [39]:
loader.__dict__

{'web_paths': ['https://lilianweng.github.io/posts/2023-06-23-agent/'],
 'requests_per_second': 2,
 'default_parser': 'html.parser',
 'requests_kwargs': {},
 'raise_for_status': False,
 'show_progress': True,
 'bs_get_text_kwargs': {},
 'bs_kwargs': {'parse_only': <SoupStrainer name=[] attrs=defaultdict(<class 'list'>, {'class': [<AttributeValueMatchRule string=post-content pattern=None function=None present=None>, <AttributeValueMatchRule string=post-title pattern=None function=None present=None>, <AttributeValueMatchRule string=post-header pattern=None function=None present=None>]}) string=[]>},
 'session': <requests.sessions.Session at 0x10d439a90>,
 'continue_on_failure': False,
 'autoset_encoding': True,
 'encoding': None,
 'trust_env': False}

In [47]:
print(type(docs), 'Number of char' ,len(docs[0].page_content))

<class 'list'> Number of char 43130


In [44]:
print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 66 sub-documents.


In [None]:
# select the LLM

In [6]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

# from langchain.chat_models import init_chat_model
# llm = init_chat_model("gpt-4o-mini", model_provider="openai")

Enter API key for OpenAI:  ········


In [None]:
# for leverage langsmith

In [7]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini",
    temperature=0,
    max_retries=2,)

In [None]:
# Select the Embeddings

In [8]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [None]:
# Select the vector store

In [9]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [14]:
# vector_store.__dict__

In [18]:
help(hub.pull)

Help on function pull in module langchain.hub:

pull(
    owner_repo_commit: 'str',
    *,
    include_model: 'Optional[bool]' = None,
    api_url: 'Optional[str]' = None,
    api_key: 'Optional[str]' = None
) -> 'Any'
    Pull an object from the hub and returns it as a LangChain object.

    :param owner_repo_commit: The full name of the prompt to pull from in the format of
        `owner/prompt_name:commit_hash` or `owner/prompt_name`
        or just `prompt_name` if it's your own prompt.
    :param api_url: The URL of the LangChain Hub API. Defaults to the hosted API service
        if you have an api key set, or a localhost instance if not.
    :param api_key: The API key to use to authenticate with the LangChain Hub API.



In [15]:

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")


In [29]:
# this particular prompt from the hub, is defining the variables Question: {question} Context: {context}  in its prompt

In [21]:

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


In [22]:
# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [36]:
type(graph),  type(graph_builder)

(langgraph.graph.state.CompiledStateGraph, langgraph.graph.state.StateGraph)

In [31]:
# help(StateGraph)

In [18]:
# API Reference:hub | WebBaseLoader | Document | RecursiveCharacterTextSplitter | StateGraph

In [23]:
response = graph.invoke({"question": "What is Task Decomposition?"})
print(response["answer"])

Task Decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be achieved through techniques like Chain of Thought (CoT) prompting, which encourages models to think step by step, or by using task-specific instructions. It allows for better planning and execution by clarifying the logical relationships between tasks.


In [24]:
response = graph.invoke({"question": "Summarize the topics being discussed, and mention 3 important points from each topic"})
print(response["answer"])

The conversation discusses the development of a Super Mario game in Python, focusing on the MVC architecture and keyboard controls. 

**Important points:**
1. The game will be structured using MVC components, which need to be split into separate files.
2. Clarification is needed on the specifics of the game, including level design, characters, and gameplay mechanics.
3. Details regarding the implementation of keyboard controls, such as key mappings and input handling, are also required.


In [37]:
response = graph.invoke({"question": "I want an answer in 10 lines. Summarize what is article is for."})
print(response["answer"])

The article provides detailed instructions for writing code, focusing on implementing a software architecture step by step. It emphasizes the importance of laying out core classes, functions, and methods, along with their purposes, before presenting the complete code in a structured markdown format. The goal is to ensure that every aspect of the architecture is translated into functional code, starting from the entry point and moving through all related files.
