## Utils

In [None]:
!pip install --upgrade langchain_community langchain_core langchain_openai langgraph pypdf langchain_groq fastembed chromadb

In [None]:
import shutil

# Path to the zip file
zip_path = "data.zip"

# Destination folder
extract_to = "./data"

shutil.unpack_archive(zip_path, extract_to)

print("Files extracted successfully!")


In [None]:
from data.helper_functions import *

# Basics

## LangChain and LangGraph

### [LangChain Introduction](https://python.langchain.com/docs/introduction/)

LangChain implements a standard interface for large language models and related technologies, such as embedding models and vector stores, and integrates with hundreds of providers.


```python
    from langchain.chat_models import init_chat_model
    model = init_chat_model("llama3-8b-8192", model_provider="groq")
    model.invoke("Hello, world!")
```


### [LangGraph Introduction](https://langchain-ai.github.io/langgraph/)

Orchestration framework for combining LangChain components into production-ready applications with persistence, streaming, and other key features.

```python
    from typing import Annotated
    from typing_extensions import TypedDict
    from langgraph.graph import StateGraph, START, END
    from langgraph.graph.message import add_messages
    from langchain_anthropic import ChatAnthropic
    llm = ChatAnthropic(model="claude-3-5-sonnet-20240620")
    class State(TypedDict):
        messages: Annotated[list, add_messages]
    def chatbot(state: State):
        return {"messages": [llm.invoke(state["messages"])]}
    graph_builder = StateGraph(State)
    graph_builder.add_node("chatbot", chatbot)
    graph_builder.add_edge(START, "chatbot")
    graph_builder.add_edge("chatbot", END)
```

![Basic LangGraph](./data/imgs/langgraph_basic.png)

In [None]:
import os
os.environ["GROQ_API_KEY"] = groq_key

## AI Agents

![AI Agents](./data/imgs/AI_Agents.jpg)



In [None]:
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import AnyMessage, add_messages

class State(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable, RunnableConfig


class Assistant:
    def __init__(self, runnable: Runnable):
        self.runnable = runnable

    def __call__(self, state: State, config: RunnableConfig):
        while True:
            configuration = config.get("configurable", {})
            result = self.runnable.invoke(state)
            # If the LLM happens to return an empty response, we will re-prompt it
            # for an actual response.
            if not result.tool_calls and (
                not result.content
                or isinstance(result.content, list)
                and not result.content[0].get("text")
            ):
                messages = state["messages"] + [("user", "Respond with a real output.")]
                state = {**state, "messages": messages}
            else:
                break
        return {"messages": result}

In [None]:

from langchain_core.tools import tool

@tool
def get_traffic_conditions(config: RunnableConfig, city):
    """
    Get traffic conditions for a given city.
    """
    traffic_conditions = {
        'berlin': 'normal',
        'munich': 'very busy',
        'frankfurt': 'very busy'
    }
    return traffic_conditions[city.lower()]

@tool
def get_weather_conditions(config: RunnableConfig, city):
    """
    Get weather conditions for a given city.
    """
    weather_conditions = {
        'berlin': '5C, rainy',
        'munich': '2C, windy',
        'frankfurt': '-1C, snowy'
    }
    return weather_conditions[city.lower()]


@tool()
def retrieve(config: RunnableConfig, query: str):
    """Retrieve information related to a query."""
    docs = vector_store.similarity_search(query, k=2)
    retrived_docs = "\n\n".join(
            (f"Source: {doc.metadata['source']}\n" f"Content: {doc.page_content}")
            for doc in docs
    )
    return retrived_docs


In [None]:
from langchain_groq import ChatGroq
llm = ChatGroq(model="llama-3.3-70b-versatile")

primary_assistant_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant who provides traffic and weather information"
            "Use the provided tools to get the traffic and weather conditions for a city."
        ),
        ("placeholder", "{messages}"),
    ]
)

tools = [
    get_traffic_conditions,
    get_weather_conditions,
    retrieve
]

assistant_runnable = primary_assistant_prompt | llm.bind_tools(tools)

In [None]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import tools_condition

builder = StateGraph(State)


# Define nodes: these do the work
builder.add_node("assistant", Assistant(assistant_runnable))
builder.add_node("tools", create_tool_node_with_fallback(tools))
# Define edges: these determine how the control flow moves
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    tools_condition,
)
builder.add_edge("tools", "assistant")

# The checkpointer lets the graph persist its state
# this is a complete memory for the entire graph.
memory = MemorySaver()
graph = builder.compile(checkpointer=memory)

In [None]:
from IPython.display import Image, display

try:
    display(Image(graph.get_graph(xray=True).draw_mermaid_png()))
except Exception:
    # This requires some extra dependencies and is optional
    pass

In [None]:
import uuid

thread_id = str(uuid.uuid4())
config = {
    "configurable": {
        # Checkpoints are accessed by thread_id
        "thread_id": thread_id,
    }
}

questions = [
    "Can you tell me the traffic conditions in Berlin?",
    "I want to travel from munich to frankfurt, do I need an umbrella?",
]

In [None]:
_printed = set()
for question in questions:
    events = graph.stream(
        {"messages": ("user", question)}, config, stream_mode="values"
    )
    for event in events:
        _print_event(event, _printed)

## Retrival Augmented Generation (RAG)

![Retrival Augmented Generation](./data/imgs/RAG.jpg)

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
import os
from glob import glob
from langchain_groq import ChatGroq

In [None]:
import os
os.environ["GROQ_API_KEY"] = groq_key

llm = ChatGroq(model="llama-3.3-70b-versatile")

embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")

vector_store = InMemoryVectorStore(embeddings)

In [None]:
def load_documents(document_path):
    loader = PyPDFLoader(document_path)
    pages = []
    for page in loader.lazy_load():
      pages.append(page)
    _ = vector_store.add_documents(pages)
    print(f"Loaded {len(pages)} pages from {document_path}")
    return 0

In [None]:
load_documents('/content/rag_document.pdf')

In [None]:

from langchain_core.tools import tool

@tool()
def retrieve(config: RunnableConfig, query: str):
    """Retrieve information related to a query."""
    docs = vector_store.similarity_search(query, k=2)
    retrived_docs = "\n\n".join(
            (f"Source: {doc.metadata['source']}\n" f"Content: {doc.page_content}")
            for doc in docs
    )
    return retrived_docs


In [None]:
from langchain_groq import ChatGroq
llm = ChatGroq(model="llama-3.3-70b-versatile")

primary_assistant_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You also answer user's queries based only on the context that you get from the tools."
            "Include the source of the retrived context"
        ),
        ("placeholder", "{messages}"),
    ]
)

tools = [
    retrieve
]

assistant_runnable = primary_assistant_prompt | llm.bind_tools(tools)

In [None]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import tools_condition

builder = StateGraph(State)


# Define nodes: these do the work
builder.add_node("assistant", Assistant(assistant_runnable))
builder.add_node("tools", create_tool_node_with_fallback(tools))
# Define edges: these determine how the control flow moves
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    tools_condition,
)
builder.add_edge("tools", "assistant")

# The checkpointer lets the graph persist its state
# this is a complete memory for the entire graph.
memory = MemorySaver()
graph = builder.compile(checkpointer=memory)

In [None]:
from IPython.display import Image, display

try:
    display(Image(graph.get_graph(xray=True).draw_mermaid_png()))
except Exception:
    # This requires some extra dependencies and is optional
    pass

In [None]:
import uuid

thread_id = str(uuid.uuid4())
config = {
    "configurable": {
        # Checkpoints are accessed by thread_id
        "thread_id": thread_id,
    }
}

questions = [
    "WHat are machine learning algorithms?",
]

In [None]:
_printed = set()
for question in questions:
    events = graph.stream(
        {"messages": ("user", question)}, config, stream_mode="values"
    )
    for event in events:
        _print_event(event, _printed)

# Improving Agent with RAG

![AI Agents with RAG](./data/imgs/RAG_Agent.jpg)

We bring all of the above together, with the following use case.


## Use Case: AI Agent Assistant for modifying/booking flight tickets
The agent we build should be able to do the following:
- the agent should be able to book tickets
- for rescheduling and cancelling the flights, it should reference the documentation for checking the terms and conditions
- the agent should inform the user about the refund/additional charges, and confirm the changes.

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
import os
from glob import glob
from langchain_groq import ChatGroq

In [None]:
import os
os.environ["GROQ_API_KEY"] = groq_key

llm = ChatGroq(model="llama-3.3-70b-versatile")

embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")

vector_store = InMemoryVectorStore(embeddings)

In [None]:
def load_documents(document_path):
    loader = PyPDFLoader(document_path)
    pages = []
    for page in loader.lazy_load():
      pages.append(page)
    _ = vector_store.add_documents(pages)
    print(f"Loaded {len(pages)} pages from {document_path}")
    return 0

In [None]:
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import AnyMessage, add_messages

class State(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable, RunnableConfig


class Assistant:
    def __init__(self, runnable: Runnable):
        self.runnable = runnable

    def __call__(self, state: State, config: RunnableConfig):
        while True:
            configuration = config.get("configurable", {})
            result = self.runnable.invoke(state)
            # If the LLM happens to return an empty response, we will re-prompt it
            # for an actual response.
            if not result.tool_calls and (
                not result.content
                or isinstance(result.content, list)
                and not result.content[0].get("text")
            ):
                messages = state["messages"] + [("user", "Respond with a real output.")]
                state = {**state, "messages": messages}
            else:
                break
        return {"messages": result}

In [None]:
from langchain_core.tools import tool
import sqlite3

from datetime import datetime


db = 'flights.db'

@tool
def get_ticket_info(config: RunnableConfig, ticket_details):
    conn = sqlite3.connect(db)
    cursor = conn.cursor()
    if 'ticket_number' in ticket_details.keys():
      ticket_number = ticket_details['ticket_number']
      query = f"""
      SELECT
          f.flight_number,
          f.departure_airport,
          f.arrival_airport,
          f.departure,
          f.arrival,
          t.passenger_name,
          t.ticket_number,
          t.status,
          t.booking_ts
      FROM
          tickets t
          JOIN flights f ON t.flight_number = f.flight_number
      WHERE
          LOWER(t.ticket_number) = '{ticket_number.lower()}'
      """
    elif 'passenger_name' in ticket_details.keys():
      passenger_name = ticket_details['passenger_name']
      query = f"""
      SELECT
          f.flight_number,
          f.departure_airport,
          f.arrival_airport,
          f.departure,
          f.arrival,
          t.passenger_name,
          t.ticket_number,
          t.status,
          t.booking_ts
      FROM
          tickets t
          JOIN flights f ON t.flight_number = f.flight_number
      WHERE
          LOWER(t.passenger_name) = '{passenger_name.lower()}' AND t.status = 'BOOKED'
      """

    cursor.execute(query)
    rows = cursor.fetchall()
    column_names = [column[0] for column in cursor.description]
    results = [dict(zip(column_names, row)) for row in rows]

    cursor.close()
    conn.close()

    return results

@tool
def get_flight_info(config: RunnableConfig, ticket_details):
    """
    Get flight information
    """
    departure_airport = ticket_details['from']
    arrival_airport = ticket_details['to']
    conn = sqlite3.connect(db)
    cursor = conn.cursor()
    query= f"""
    SELECT *
    FROM flights
    WHERE LOWER(departure_airport) = '{departure_airport.lower()}' AND LOWER(arrival_airport) = '{arrival_airport.lower()}'
    """
    cursor.execute(query)
    rows = cursor.fetchall()
    column_names = [column[0] for column in cursor.description]
    results = [dict(zip(column_names, row)) for row in rows]

    cursor.close()
    conn.close()

    return results


@tool
def book_ticket(config: RunnableConfig, ticket_details):
    """
    Book a ticket.
    """
    ticket_prefix = {
      "economy": "ECO",
      "business": "BUS",
      "flexi": "FLX"
    }
    now = datetime.now()
    flight_number = ticket_details['flight_number']
    ticket_type = ticket_details['ticket_type']
    passenger_name = ticket_details['passenger_name']
    if ticket_type in ticket_prefix.keys():
      ticket_type = ticket_prefix[ticket_type]
    else:
      ticket_type = "UNK"
    ticket_number = ticket_type + "_" + generate_alphanumeric_code(8).upper()
    conn = sqlite3.connect(db)
    cursor = conn.cursor()

    query = f"""
    INSERT INTO tickets (flight_number, passenger_name, ticket_number, status, booking_ts)
        VALUES ('{flight_number}', '{passenger_name}', '{ticket_number}', 'BOOKED', '{now.strftime("%m/%d/%Y, %H:%M:%S")}')
    """
    cursor.execute(query)
    conn.commit()
    conn.close()
    results = {
        "flight_number": flight_number,
        "ticket_number": ticket_number,
        "booking_time": now.strftime("%m/%d/%Y, %H:%M:%S")
    }
    return results

@tool
def cancel_ticket(config: RunnableConfig, ticket_details):
    """
    Cancel a ticket.
    """
    now = datetime.now()
    ticket_number = ticket_details['ticket_number']
    conn = sqlite3.connect(db)
    cursor = conn.cursor()

    query = f"""
    UPDATE tickets
        SET status = 'CANCELLED'
        WHERE ticket_number = '{ticket_number}'
    """
    cursor.execute(query)
    conn.commit()
    conn.close()
    results = {
        "ticket_number": ticket_number,
        "status": "CANCELLED"
    }
    return results

@tool()
def retrieve_policy(config: RunnableConfig, query: str):
    """Retrieve information related to a query."""
    docs = vector_store.similarity_search(query, k=2)
    retrived_docs = "\n\n".join(
            (f"Source: {doc.metadata['source']}\n" f"Content: {doc.page_content}")
            for doc in docs
    )
    return retrived_docs


In [None]:
from langchain_groq import ChatGroq
llm = ChatGroq(model="llama-3.3-70b-versatile")

primary_assistant_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            #TODO: Write your prompt here
        ),
        ("placeholder", "{messages}"),
    ]
)

tools = [
    #TODO: Pass the appropriate tools here
]

assistant_runnable = primary_assistant_prompt | llm.bind_tools(tools)

In [None]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import tools_condition

builder = StateGraph(State)


# Define nodes: these do the work
builder.add_node("assistant", Assistant(assistant_runnable))
builder.add_node("tools", create_tool_node_with_fallback(tools))
# Define edges: these determine how the control flow moves
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    tools_condition,
)
builder.add_edge("tools", "assistant")

# The checkpointer lets the graph persist its state
# this is a complete memory for the entire graph.
memory = MemorySaver()
graph = builder.compile(checkpointer=memory)

In [None]:
from IPython.display import Image, display

try:
    display(Image(graph.get_graph(xray=True).draw_mermaid_png()))
except Exception:
    # This requires some extra dependencies and is optional
    pass

In [None]:
import uuid

thread_id = str(uuid.uuid4())
config = {
    "configurable": {
        # Checkpoints are accessed by thread_id
        "thread_id": thread_id,
    }
}

questions = [
    "My name is Charlie Davis, can you show me my flight bookings?",
    "Show me flights from Frankfurt to New York",
    "Book a ticket in flight no FL1007, under the name Charlie Davis in Business class",
    "Show me the bookings for Charlie Davis now.",
    "Can I cancel my ticket with ticket number: {ENTER YOUR TICKET NUMBER HERE}"
]

In [None]:
_printed = set()
for question in questions:
    events = graph.stream(
        {"messages": ("user", question)}, config, stream_mode="values"
    )
    for event in events:
        _print_event(event, _printed)