In [None]:
!pip install langchain
!pip install cassio
!pip install langchain_text_splitters
!pip install langchain_community
!pip install langchain_huggingface

In [None]:
import cassio

## Connection of astra DB
ASTRA_DB_APP_TOKENS = "AstraCS:ZqZadBiesgIkmjTUFNFHHngJ:e8408cd2bd58d44e5d20982b12ee151e7187e13b27062b188f1b67703ab0d99c"
ASTRA_DB_ID = "5d66db91-381d-49fa-a5a4-ef6081e0d88f"
cassio.init(token = ASTRA_DB_APP_TOKENS , database_id = ASTRA_DB_ID)

In [None]:
### Build Index

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader

## docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

#load
docs = [WebBaseLoader(url).load()[0] for url in urls]
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500 , chunk_overlap = 0)
texts = text_splitter.split_documents(docs)

In [None]:

from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [None]:
from langchain_community.vectorstores import Cassandra
astra_vector_store = Cassandra(embedding = embeddings ,
                               table_name = "qa_mini_demo",
                               session = None,
                               keyspace = None)

In [None]:
astra_vector_store.add_documents(texts)
print("Inserted %i headlines." % len(texts))
# To use the vector store as a retriever, you can do this:
astra_retriever = astra_vector_store.as_retriever()

In [None]:
astra_retriever.invoke("what is a agent")

In [None]:
## Langgraph application
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel , Field

In [None]:
## Data model
class RouteQuery(BaseModel):
  """Route a user query to the most relevant datasoucre. """
  datasoucre: Literal["vectorstore" , "wiki_search"] = Field(
      description = "Given a user question choose to route it to a wikipedia or a vectorstore"
  )

In [None]:
from langchain_groq import ChatGroq
from google.colab import userdata
import os
groq_api_key = userdata.get('groq_api_key')


In [None]:
llm = ChatGroq(groq_api_key = groq_api_key , model_name = "llama-3.1-8b-instant")

In [None]:
structured_llm_router = llm.with_structured_output(RouteQuery)

In [None]:
# Prompt
system = """ You are an expert at routing a user question to a vectorstore or wikipedia.
The vectorstore contains documents related to agents , prompt engineering , adversarial attacks.
Use the vectorstore to answer questions on the topics. otherwise , Use wiki-search,"""
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system" , system),
        ("human" , "{question}"),
    ]
)
question_router = route_prompt | structured_llm_router


In [None]:
print(question_router.invoke({"question" : "what is a agent"}))

In [None]:
!pip install wikipedia

In [None]:
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import WikipediaQueryRun
wiki_wrap = WikipediaAPIWrapper()
wiki = WikipediaQueryRun(api_wrapper = wiki_wrap)

In [None]:
## Aiagent Application using Langgraph
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):
  """
  Represent the state of our graph.
  Attributes:
     question : question
     generation: LLM generation
     documents: lisy of documents
  """
  question: str
  generation: str
  documents: List[str]

In [None]:
from langchain_core import documents
from langchain_core.tools import retriever
from langchain.schema import Document

def retrieve(state):
  """
  Retrieve documents

  Args:
    state(dict) : The current graph state

  Return:
    state(dict) : New key added to state , documents , that contains the retrieved documents

  """

  print("---Retrieve---")
  question = state["question"]

  ## Retrieval
  docs = retriever.invoke(question)
  return {"documents": documents, "question":question}


In [None]:
def wiki_search(state):
  """
  wikkipedia search based on the rephrased question

  Args:
    state(dict) : The current graph state

  Return:
    state(dict) : Update documents key with appended web results

  """

  print("---Wikipedia---")
  print("---Hello---")
  question = state["question"]
  print(question)

  ## wiki search
  docs = wiki.invoke({"query": question})
  wiki_results = docs
  wiki_results = Document(page_content = wiki_results)
  return {"documents": wiki_results, "question":question}

In [None]:
### Edges ###


def route_question(state):
    """
    Route question to wiki search or RAG.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    question = state["question"]
    source = question_router.invoke({"question": question})
    if source.datasource == "wiki_search":
        print("---ROUTE QUESTION TO Wiki SEARCH---")
        return "wiki_search"
    elif source.datasource == "vectorstore":
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"

In [None]:
from langgraph import END , START , StateGraph

workflow = StateGraph(GraphState)
##Define the nodes
workflow.add_node("wiki_search" , function = wiki_search)
workflow.add_node("vectorstore" , function = retrieve)

## Build Graph
workflow.add_conditional_edges(
    START ,
    route_question,
    {"wiki_search":"wiki_search"
     ,"vectorstore": "retrieve"},

)
workflow.add_edge("retrieve",END)
workflow.add_edge("wiki_search",END)

## compile
app = workflow.compile()

In [None]:
from IPython.display import Image, display

try:
    display(Image(app.get_graph().draw_mermaid_png()))
except Exception:
    # This requires some extra dependencies and is optional
    pass

In [None]:
from pprint import pprint

# Run
inputs = {
    "question": "Avengers"
}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    pprint("\n---\n")

# Final generation
pprint(value['documents'])