In [1]:
from typing import Annotated, Literal, Sequence, TypedDict
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools.retriever import create_retriever_tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, END, StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition

USER_AGENT environment variable not set, consider setting it to identify your requests.

For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:
from langchain_huggingface import HuggingFaceEmbeddings

In [3]:
embeddings = HuggingFaceEmbeddings(model='all-MiniLM-L6-v2')

In [4]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="gemma2-9b-it")
llm.invoke("Hey how are you doing?")


AIMessage(content="As an AI, I don't have feelings or experiences like humans do. However, I'm here and ready to assist you! How can I help you today?\n", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 38, 'prompt_tokens': 15, 'total_tokens': 53, 'completion_time': 0.069090909, 'prompt_time': 0.00189711, 'queue_time': 0.11147514500000001, 'total_time': 0.070988019}, 'model_name': 'gemma2-9b-it', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None}, id='run--9ae34765-b8df-4753-8185-ed26c2b36d0a-0', usage_metadata={'input_tokens': 15, 'output_tokens': 38, 'total_tokens': 53})

#### Data and Data Source(s)

- Bring in the data source- using Appropriate loaders 
- get the text from the data loader(s)
- run a recursive character text splitter to chunk the text into bits 
- store the chunks into a vector DB (chroma)

In [8]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken"
]

In [9]:
''' requires bs4 '''

docs = [WebBaseLoader(url).load() for url in urls]

In [10]:
docs

[[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final res

In [11]:
docs[0][0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'title': "LLM Powered Autonomous Agents | Lil'Log",
 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n\n\nMemory\

In [12]:
docs_list = [item for sub_list in docs for item in sub_list]

#### Data and Data Source(s)

- Bring in the data source- using Appropriate loaders 
- get the text from the data loader(s)
- run a 

In [13]:
docs_list

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final resu

In [14]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=100, chunk_overlap=5)

In [15]:
docs_split = text_splitter.split_documents(docs_list)
docs_split[:3]

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final resu

In [16]:
import os
os.environ["CHROMA_TELEMETRY_ENABLED"] = "false"

In [19]:
# from chromadb import PersistentClient

# client = PersistentClient(path="./chroma_db")

vectorstore = Chroma.from_documents(
    documents=docs_split,
    collection_name='rag-chrome',
    embedding=embeddings,
    # client=client
)

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [21]:
retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x30ad79310>, search_kwargs={})

In [22]:
''' Creating tools for the Agent '''

retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_blog_posts",
    "Search and return information about the Lilian weng blog posts on LLM agents, Prompt engineering, adversarial attacks on LLMs and counting tokens with tiktoken"
)

In [None]:
tools = [retriever_tool]  # always store tools in lists 

DEFINE NODE FUNCTIONS

In [None]:
def AIAssistant(state):
    print("--- CALL AGENT ---")
    messages = state['messages']
    llm_with_tool = llm.bind_tools(tools)
    response = llm_with_tool.invoke(messages)
    return {"messages": [response]}

In [10]:
def retreive(state):
    pass

In [11]:
def rewrite(state):
    pass

In [12]:
def generate(state):
    pass

In [14]:
class AgentState:
    pass

In [None]:
workflow = StateGraph(AgentState)
workflow.add_node("ai_assistant", AIAssistant)
ToolNode([retriever_tool])
workflow.add_node("retreiver", retreive)
workflow.add_node("rewriter", rewrite)
workflow.add_node("generator", generate)

In [None]:
workflow.add_edge()
workflow.add_conditional_edges()

In [None]:
app = workflow.compile

In [None]:
app.invoke("?")