In [2]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [3]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [4]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [5]:
os.environ["LANGSMITH_TRACING"] = "true"
if not os.environ.get("LANGSMITH_API_KEY"):
    os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

In [6]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing_extensions import List, TypedDict

from langchain_community.document_loaders import JSONLoader
import json
from pathlib import Path


path = "web_scrape/web_scrape_output/csci_courses_with_descriptions.json"

#data = json.loads(Path(path).read_text())
loader = JSONLoader(
         file_path=path,
         jq_schema='.courses',
         text_content=False)

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [7]:
from langgraph.graph import MessagesState, StateGraph

graph_builder = StateGraph(MessagesState)

In [8]:
from langchain_core.tools import tool


@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [9]:
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import ToolNode


# Step 1: Generate an AIMessage that may include a tool-call to be sent.
def query_or_respond(state: MessagesState):
    """Generate tool call for retrieval or respond."""
    llm_with_tools = llm.bind_tools([retrieve])
    response = llm_with_tools.invoke(state["messages"])
    # MessagesState appends messages to state instead of overwriting
    return {"messages": [response]}


# Step 2: Execute the retrieval.
tools = ToolNode([retrieve])


# Step 3: Generate a response using the retrieved content.
def generate(state: MessagesState):
    """Generate answer."""
    # Get generated ToolMessages
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else:
            break
    tool_messages = recent_tool_messages[::-1]

    # Format into prompt
    docs_content = "\n\n".join(doc.content for doc in tool_messages)
    system_message_content = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        f"{docs_content}"
    )
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    # Run
    response = llm.invoke(prompt)
    return {"messages": [response]}

In [10]:
from langgraph.graph import END
from langgraph.prebuilt import ToolNode, tools_condition

graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)
graph_builder.add_node(generate)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"},
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)

graph = graph_builder.compile()

In [13]:
input_message = "Tell me good courses on machine learning"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Tell me good courses on machine learning
Tool Calls:
  retrieve (call_rjUmhU7zY8nZTh5sndQ6R4Hz)
 Call ID: call_rjUmhU7zY8nZTh5sndQ6R4Hz
  Args:
    query: good courses on machine learning
Name: retrieve

Source: {'source': '/Users/maxwirattawut/Documents/GitHub/cabAI/web_scrape/web_scrape_output/csci_courses_with_descriptions.json', 'seq_num': 1}
Content: course is designed to help you understand the underlying concepts as well as the promise and pitfalls of deep learning. It also aims at providing hands-on practice of implementing and applying deep learning methods in Python."}, {"key": "2095", "code": "CSCI 1491", "title": "Fairness in Automated Decision Making", "crn": "27672", "no": "S01", "total": "1", "schd": "S", "stat": "F", "hide": "", "isCancelled": "", "meets": "TTh 1-2:20p", "mpkey": "2020", "meetingTimes": "[{\"meet_day\":\"1\",\"start_time\":\"1300\",\"end_time\":\"1420\"},{\"meet_day\":\"3\",\"start_time\":\"1300\",\"end_time\":\"1420\"}]", "instr": "Venkatasubramanian/

In [14]:
input_message = "Can you only filter out for courses with course code that is in the 2000-level?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Can you only filter out for courses with course code that is in the 2000-level?
Tool Calls:
  retrieve (call_3cep3cmzntl8IViMSu1UOusG)
 Call ID: call_3cep3cmzntl8IViMSu1UOusG
  Args:
    query: courses with 2000-level course code
Name: retrieve

Source: {'source': '/Users/maxwirattawut/Documents/GitHub/cabAI/web_scrape/web_scrape_output/csci_courses_with_descriptions.json', 'seq_num': 1}
Content: <a href=\"/search/?p=CSCI%200112\" data-action=\"result-detail\" data-group=\"code:CSCI 0112\"  class=\"notoffered\">CSCI 0112</a>, <a href=\"/search/?p=CSCI%200150\" data-action=\"result-detail\" data-group=\"code:CSCI 0150\"  class=\"notoffered\">CSCI 0150</a>, <a href=\"/search/?p=CSCI%200170\" data-action=\"result-detail\" data-group=\"code:CSCI 0170\"  class=\"notoffered\">0170</a>, or <a href=\"/search/?p=CSCI%200190\" data-action=\"result-detail\" data-group=\"code:CSCI 0190\"  class=\"notoffered\">CSCI 0190</a>. In addition, <a href=\"/search/?p=CSCI%200111\" data-action=\"result-deta

In [15]:
input_message = "Is D. Ritchie teaching any course this semester?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Is D. Ritchie teaching any course this semester?
Tool Calls:
  retrieve (call_mhCEHm3KK7W5JLXO01JKWjac)
 Call ID: call_mhCEHm3KK7W5JLXO01JKWjac
  Args:
    query: D. Ritchie course schedule this semester
Name: retrieve

Source: {'source': '/Users/maxwirattawut/Documents/GitHub/cabAI/web_scrape/web_scrape_output/csci_courses_with_descriptions.json', 'seq_num': 1}
Content: to take the course should contact the instructor."}, {"key": "2077", "code": "CSCI 1340", "title": "Introduction to Software Engineering", "crn": "27035", "no": "S01", "total": "2", "schd": "S", "stat": "F", "hide": "", "isCancelled": "", "meets": "TTh 1-2:20p", "mpkey": "2020", "meetingTimes": "[{\"meet_day\":\"1\",\"start_time\":\"1300\",\"end_time\":\"1420\"},{\"meet_day\":\"3\",\"start_time\":\"1300\",\"end_time\":\"1420\"}]", "instr": "T. Nelson", "start_date": "2025-01-22", "end_date": "2025-05-16", "permreq": "N", "rpt": "N", "cart_opts": "{\"grade_mode\":{\"cart_field\":\"p_gmod\",\"enabled\":true,\"options\":

In [17]:
input_message = "Is there any other course he's teaching asides from CSCI 2240?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Is there any other course he's teaching asides from CSCI 2240?
Tool Calls:
  retrieve (call_HLu3kuNEDeykTeWCgEjbt5Hv)
 Call ID: call_HLu3kuNEDeykTeWCgEjbt5Hv
  Args:
    query: courses taught by CSCI 2240 instructor
Name: retrieve

Source: {'source': '/Users/maxwirattawut/Documents/GitHub/cabAI/web_scrape/web_scrape_output/csci_courses_with_descriptions.json', 'seq_num': 1}
Content: data-action=\"result-detail\" data-group=\"code:CSCI 0320\" >0320</a>, <a href=\"/search/?p=CSCI%200330\" data-action=\"result-detail\" data-group=\"code:CSCI 0330\"  class=\"notoffered\">CSCI 0330</a>, <a href=\"/search/?p=CSCI%201310\" data-action=\"result-detail\" data-group=\"code:CSCI 1310\" >CSCI 1310</a>, OR <a href=\"/search/?p=CSCI%201330\" data-action=\"result-detail\" data-group=\"code:CSCI 1330\"  class=\"notoffered\">CSCI 1330</a> and recommended: one of <a href=\"/search/?p=CSCI%200530\" data-action=\"result-detail\" data-group=\"code:CSCI 0530\"  class=\"notoffered\">CSCI 0530</a>, <a href=\