In [22]:
import os
import getpass
from langchain_ollama import OllamaEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_docling import DoclingLoader

from langchain_together import ChatTogether
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain

from langgraph.graph import MessagesState, StateGraph
from langchain_core.tools import tool

from langchain_core.messages import SystemMessage
from langgraph.prebuilt import ToolNode

from langgraph.graph import END
from langgraph.prebuilt import ToolNode, tools_condition

from langgraph.checkpoint.memory import MemorySaver

from flask import Flask, request, jsonify
from flask_cors import CORS

In [24]:
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Enter API key for langchain")

Enter API key for langchain ········


In [26]:
if not os.environ.get("TOGETHER_API_KEY"):
  os.environ["TOGETHER_API_KEY"] = getpass.getpass("Enter API key for Together AI: ")

llm = ChatTogether(
    model="meta-llama/Llama-3-70b-chat-hf",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=os.environ["TOGETHER_API_KEY"],
    # other params...
)

Enter API key for Together AI:  ········


In [18]:
root_dir = os.getcwd()
FILE_PATH = root_dir + "\Input.docx"

loader = DoclingLoader(file_path=FILE_PATH)
docs = loader.load()

  FILE_PATH = root_dir + "\Input.docx"
Token indices sequence length is longer than the specified maximum sequence length for this model (551 > 512). Running this sequence through the model will result in indexing errors


In [28]:
embeddings = OllamaEmbeddings(model="llama3")
vector_store = InMemoryVectorStore(embeddings)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


graph_builder = StateGraph(MessagesState)



@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs


# Step 1: Generate an AIMessage that may include a tool-call to be sent.
def query_or_respond(state: MessagesState):
    """Generate tool call for retrieval or respond."""
    llm_with_tools = llm.bind_tools([retrieve])
    response = llm_with_tools.invoke(state["messages"])
    # MessagesState appends messages to state instead of overwriting
    return {"messages": [response]}


# Step 2: Execute the retrieval.
tools = ToolNode([retrieve])


# Step 3: Generate a response using the retrieved content.
def generate(state: MessagesState):
    """Generate answer."""
    # Get generated ToolMessages
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else:
            break
    tool_messages = recent_tool_messages[::-1]

    # Format into prompt
    docs_content = "\n\n".join(doc.content for doc in tool_messages)
    system_message_content = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know."
        "\n\n"
        f"{docs_content}"
    )
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    # Run
    response = llm.invoke(prompt)
    return {"messages": [response]}


In [30]:
graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)
graph_builder.add_node(generate)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"},
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)

memory = MemorySaver()
graph = graph_builder.compile(checkpointer=memory)

# Specify an ID for the thread
config = {"configurable": {"thread_id": "abc123"}}

In [32]:
input_message = "Hi when is a CPA a business associate?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    step["messages"][-1].pretty_print()



Hi when is a CPA a business associate?

Under the Health Insurance Portability and Accountability Act (HIPAA), a Certified Public Accountant (CPA) can be considered a Business Associate (BA) in certain circumstances.

A Business Associate is a person or organization that performs certain functions or activities that involve the use or disclosure of protected health information (PHI) on behalf of a Covered Entity (CE). Covered Entities include healthcare providers, health plans, and healthcare clearinghouses.

A CPA may be considered a Business Associate if they provide services to a Covered Entity that involve the use or disclosure of PHI. Here are some examples:

1. **Accounting and auditing services**: If a CPA firm provides accounting or auditing services to a healthcare provider or health plan, and in the course of those services, they have access to PHI, they may be considered a Business Associate.
2. **Financial analysis and consulting**: If a CPA firm provides financial analysi

In [34]:
def stream_graph_response(input_message):
    result = ""
    for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
    ):
        result += step["messages"][-1].pretty_repr()

    return result

In [36]:
app = Flask(__name__)
CORS(app)  # Allows requests from any origin

@app.route('/get-ai-message', methods=['POST'])
def get_ai_message():
    data = request.json
    if not data:
        return jsonify({"error": "Invalid request"}), 400

    user_query = data.get('userQuery')
    response = stream_graph_response(user_query)
    
    return jsonify({"role": "assistant", "content": response})

if __name__ == '__main__':
    app.run(port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off
