In [None]:
import os
import json
import ast
from typing_extensions import TypedDict, List

from langgraph.graph import START, StateGraph, END
from langchain_core.documents import Document
from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import PromptTemplate

from scope_prompt import query_transformation_prompt, scope_prompt
from configs import QDRANT_URL, QDRANT_API_KEY, OLLAMA_URL

In [None]:
class ChatState(TypedDict):
    user_query: str
    transformed_query: str
    metadata: List[dict]
    summaries: List[str]
    similarity_scores: List[float]
    scope: str

In [None]:
query_template = PromptTemplate.from_template(query_transformation_prompt)
scope_template = PromptTemplate(
    input_variables=["context"],
    template=scope_prompt
)

In [None]:
def retrieve_documents(state: ChatState) -> dict:
    hits = vector_store.similarity_search_with_score(
        state["transformed_query"], k=5
    )
    metas, texts, scores = [], [], []
    for doc, score in hits:
        metas.append(doc.metadata)
        texts.append(doc.page_content)
        scores.append(score)
    return {
        "metadata": metas,
        "summaries": texts,
        "similarity_scores": scores,
    }

In [None]:
def extract_scope(state: ChatState) -> dict:
    context_text = "\n\n".join(state["summaries"])
    filled_prompt = scope_template.format(context=context_text)
    print("=== Filled scope prompt ===")
    print(filled_prompt)

    resp = llm.invoke(filled_prompt)
    output = resp.content.strip()

    try:
        parsed = ast.literal_eval(output)
        if isinstance(parsed, (dict, list)):
            return {"scope": json.dumps(parsed, indent=2)}
    except Exception:
        pass

    return {"scope": output}

In [None]:
chat_builder = StateGraph(ChatState)
chat_builder.add_node("query_transformation", query_transformation)
chat_builder.add_node("retrieve_documents", retrieve_documents)
chat_builder.add_node("extract_scope", extract_scope)

chat_builder.add_edge(START, "query_transformation")
chat_builder.add_edge("query_transformation", "retrieve_documents")
chat_builder.add_edge("retrieve_documents", "extract_scope")
chat_builder.add_edge("extract_scope", END)

chat_pipeline = chat_builder.compile()

In [None]:
if __name__ == "__main__":
    init: ChatState = {
        "user_query": "Based on the limitations in this paper, what future research gaps can you identify?",
        "transformed_query": "",
        "metadata": [],
        "summaries": [],
        "similarity_scores": [],
        "scope": "",
    }

    result = chat_pipeline.invoke(init)

    print("\n=== Paper Scope ===")
    print(result["scope"])
    print("\n=== Similarity Scores ===")
    print(result["similarity_scores"])
    print("\n=== Top Document Metadata ===")
    print(json.dumps(result["metadata"][0], indent=2))