## ModuleRAG

In [1]:
from langchain_core.prompts import PromptTemplate
from langchain.schema.runnable import RunnableLambda
from my_utils import extract_json_from_string
from langchain_community.tools.tavily_search import TavilySearchResults


class ModuleRAG: 
    def __init__(self, vllm_model):
        self.vllm_model = vllm_model
        self.runnable = RunnableLambda(extract_json_from_string)
        self.extract_reference_generator = self.create_extract_reference_generator()
        self.sematic_reference_generator = self.create_sematic_reference_generator()
        self.rewrite_clause_generator = self.create_rewrite_clause_generator()
        self.proposition_generator = self.create_proposition_generator()
        self.retrieval_grader = self.create_retrieval_grader()
        self.rag_chain_both = self.create_rag_chain_both()
        self.rag_chain_doc = self.create_rag_chain_doc()
        self.query_transformation_generator = self.create_query_transformation_generator()
        self.hallucination_grader = self.create_hallucination_grader()
        self.answer_grader = self.create_answer_grader()
        self.web_search_tool = self.create_web_search_tool()
        self.grade_web_search_docs = self.create_grade_web_search_docs()

    def create_extract_reference_generator(self):
        extract_reference_template="""<|im_start|>system
You will receive:
1. A passage from a contract that may include references to other sections, articles, clauses, or parts of the contract (e.g., “as specified in Section 2”, “according to the clause above”, “refer to Article 3 below”).
2. Metadata providing context for the current passage, including details such as the current Chapter, Article, and Clause.

### Task:
#### Step 1: Extract References
- Identify and extract all phrases in the contract text that refer to other sections, articles, or clauses.

#### Step 2: Resolve References
- Use the provided metadata to determine the specific Chapter, Article, or Clause that each reference points to.
- Examples:
  -第5項 means clause 5
  - If the metadata indicates that the current passage is in Article 1:
    - "the above article" refers to Article 0 (or the last article of the previous section, if applicable).
    - "clause 3 below" refers to Clause 3 in the subsequent article or section.
- If a reference cannot be precisely resolved, set its "resolved" value to -1.
-"has_reference": "yes" if there is at least one reference, otherwise "no".
-"is_extractable": "yes" if the reference can be extracted and resolved, otherwise "no".
-"references": A list of extracted references, each containing:
-"text": The extracted reference phrase.
-"resolved": The resolved location, with -1 if it cannot be determined.

The response should be in following JSON format. The answer contain json only:
{{
  "has_reference" : "yes" | "no",
  "is_extractable" : "yes" | "no"
  "references": [
    {{
      "text": str,
      "resolved": {{
        "Chapter": int| -1,
        "Article": int| -1,
        "clause": int| -1,
        "sub_clause": int|-1,
      }}
    }}
  ]
}}

<|im_end|>

<|im_start|>user
contract:{document}
metadata:{metadata}
<|im_end|>

<|im_start|>assistant
"""
        extract_reference_prompt = PromptTemplate.from_template(extract_reference_template)
        extract_reference_generator = extract_reference_prompt | self.vllm_model| self.runnable
        return extract_reference_generator

    def get_extract_reference_generator(self):
        return self.extract_reference_generator
    
    def create_sematic_reference_generator(self):
        sematic_reference_template = """<|im_start|>system
You are a language model specializing in analyzing relationships between texts. Your task is to determine whether a referenced text (Referenced Text) provides additional meaning that helps in understanding the main text (Main Text).

Task:
Check whether the referenced text provides necessary additional information to better understand the main text.

If the referenced text contains important supplementary information that clarifies, explains, or provides essential context for understanding the main text, respond with "yes".
If the referenced text does not add significant meaning or merely repeats existing information, respond with "no".

The response must be returned in following JSON format, return json only:
{{
"analysis": "A brief explanation of the relationship between the texts, must not include `"`.",
"provide_additional_meaning": "yes"|"no",

}}
<|im_end|>

<|im_start|>user
Main Text: {main_text}
Referenced Text: {referenced_text}
<|im_end|>

<|im_start|>assistant
"""
        sematic_reference_prompt = PromptTemplate.from_template(sematic_reference_template)
        sematic_reference_generator = sematic_reference_prompt | self.vllm_model| self.runnable
        return sematic_reference_generator

    def get_sematic_reference_generator(self):
        return self.sematic_reference_generator

    def create_rewrite_clause_generator(self):
        rewrite_clause_template = """<|im_start|>system
You are a legal text processing expert. Your task is to merge two contract sections into a single coherent text while ensuring clarity, accuracy, and legal consistency.

main_section: This is the primary contract section, which may reference specific clauses from another section.
refer_section: This section contains detailed information referenced in main_section.
Requirements:
1.If the main_section references a specific clause in the refer_section, replace the reference with the actual content from the refer_section.
2.Completely remove any reference phrases such as "as defined in clause X", "according to article Y", or any similar wording. The final text should only contain the actual content without referring back to any section or clause.
3.Preserve the original meaning of the contract without altering its legal intent.
4.Ensure the final merged text maintains a clear, structured, and legally sound format.
5.The final "generated_clause" must be self-contained and must not contain any reference phrases even if reference phrases are present in the refer_sections.
6.All newlines are represented using "\\n" instead of actual line breaks.
The response must be in following JSON format.:
{{
    "analysis": "Logical reasoning behind how the generated clause was created, including how references were replaced.",
    "generated_section": "The fully rewritten section with all references replaced by full content, properly formatted and structured, without any reference phrases.Ensure that in the JSON response"
}}
<|im_end|>

<|im_start|>user
main section:
{main_section}
refer sections:
{refer_sections}
<|im_end|>
<|im_start|>assistant
"""

        rewrite_clause_prompt = PromptTemplate.from_template(rewrite_clause_template)
        rewrite_clause_generator = rewrite_clause_prompt | self.vllm_model | self.runnable
        return rewrite_clause_generator

    def get_rewrite_clause_generator(self):
        return self.rewrite_clause_generator

    def create_proposition_generator(self): 
        proposition_template="""<|im_start|>system
The following input is a legal document or contract. Your task is to break down the document into simple, self-contained propositions that are easy to understand. Follow these steps:

1. Identify key sentences: 
Read the entire document and extract factual information, rules, or conditions. Ignore introductory or transitional phrases that do not contain substantial content.

2. Ensure completeness of definitions:  
- If a proposition defines a term (for example: "Xとは…"), ensure it contains a complete definition.  
- Do not generate incomplete propositions such as:  
    "A worker is defined as follows." → This does not provide a definition.  
    Instead, include the full definition within the same proposition:  
    "A worker is a person who joins the company according to the procedure set forth in Chapter 2 and meets the following criteria: [list of criteria]."  

3. Make each proposition self-contained:  
- Ensure the proposition is understandable without additional context.  
- Replace pronouns or vague references with explicit terms.  

4. Maintain accuracy and include all necessary details:  
- Retain legal references, dates, and conditions to avoid ambiguity.  
- If a rule contains multiple conditions, split them into separate propositions while preserving logical relationships.  

5. Avoid overlapping or redundant propositions:  
- Do not generate broad statements that are just incomplete versions of other propositions.  

6. Use appropriate legal or technical terms: Write all propositions in janpanese and use proper legal or technical terminology.
The response should be in Japanese and returned in following JSON format:
{{
"propositions": 
    [
    {{"proposition":"Generated proposition in japanese"}}
    ]
}}
<|im_end|>

<|im_start|>user
contract:{document}
<|im_end|>

<|im_start|>assistant
"""
        proposition_prompt = PromptTemplate.from_template(proposition_template)
        proposition_generator = proposition_prompt | self.vllm_model| self.runnable
        return proposition_generator

    def get_proposition_generator(self):
        return self.proposition_generator

    def create_retrieval_grader(self):
        retrieval_grader_template="""<|im_start|>system
You are an evaluator assessing the relevance of a retrieved law to a given contract segment.
Evaluation Criteria:
Answer "yes" only if the retrieved law can be used as direct evidence to determine whether the contract segment complies with the law or violates the law.
In all other cases, including where the law is not directly applicable or does not provide clear evidence of compliance or non-compliance, answer "no".
The law must have a direct and actionable link to the contract segment's compliance or violation of legal standards.

Your response should focus on whether the law can serve as direct evidence of the contract’s legal compliance or non-compliance.

Provide a binary score to indicate relevance:
"yes": The law provides direct evidence of the contract segment’s compliance or violation of the law.
"no": The law does not provide direct evidence regarding compliance or violation.

The response should be returned in following JSON format:
{{
"binary_score": "yes" |"no"
}}
<|im_end|>

<|im_start|>user
Law: [{context}].
Contract Segment: [{contract}]

<|im_end|>

<|im_start|>assistant
"""

        retrieval_grader_prompt = PromptTemplate.from_template(retrieval_grader_template)

        # Chain
        retrieval_grader = retrieval_grader_prompt | self.vllm_model | self.runnable
        return retrieval_grader

    def get_retrieval_grader(self):
        return self.retrieval_grader

    def create_rag_chain_both(self):
        template_both = """<|im_start|>system
You are a legal AI specializing in contract evaluation. Your task is to analyze a given contract clause and determine whether it *complies with, violates, or lacks sufficient information* to conclude compliance based on the provided legal provisions.

### *Inputs:*  
- *Laws:* Relevant legal provisions related to the contract.  
- *Contract Clause:* The specific contract segment to be evaluated.  
- *Rewritten Contract Clause:* An alternative, clearer version of the contract clause for better understanding.

### *Evaluation Criteria:*  
1. *Compliance:* The contract fully adheres to the relevant legal provisions.  
2. *Violation:* The contract contradicts or does not meet the legal requirements.  
3. *Insufficient Information:* The provided contract clause lacks enough details to determine compliance or violation.  

### *Response Format (in Japanese, JSON format):*  
json
{{
"evaluation": "compliance" | "violation" | "insufficient_information",
"explanation": "A concise explanation of why the contract complies, violates, or lacks sufficient information."
}}

<|im_end|>

<|im_start|>user
Laws: [{context}].  
Contract Clause: [{contract}].  
Rewritten Contract Clause: [{new_query}].  
<|im_end|>

<|im_start|>assistant
"""

        # Tạo PromptTemplate
        prompt_both = PromptTemplate.from_template(template_both)

        # Chuỗi chạy RAG
        rag_chain_both = prompt_both | self.vllm_model | self.runnable
        return rag_chain_both

    def get_rag_chain_both(self):
        return self.rag_chain_both

    def create_rag_chain_doc(self):
        template_doc = """<|im_start|>system
You are a legal AI specializing in contract evaluation. Your task is to analyze a given contract clause and determine whether it *complies with, violates, or lacks sufficient information* to conclude compliance based on the provided legal provisions.  

### *Inputs:*  
- *Laws:* Relevant legal provisions related to the contract.  
- *Contract Clause:* The specific contract segment to be evaluated.  

### *Evaluation Criteria:*  
1. *Compliance:* The contract fully adheres to the relevant legal provisions.  
2. *Violation:* The contract contradicts or does not meet the legal requirements.  
3. *Insufficient Information:* The provided contract clause lacks enough details to determine compliance or violation.  

### *Response Format (in Japanese, JSON format):*  
```json
{{
"evaluation": "compliance" | "violation" | "insufficient_information",
"explanation": "A concise explanation of why the contract complies, violates, or lacks sufficient information."
}}
<|im_end|>

<|im_start|>user
Laws: [{context}].
Contract Segment: [{contract}]

<|im_end|>

<|im_start|>assistant
"""

        # Create prompt
        prompt_doc = PromptTemplate.from_template(template_doc)

        rag_chain_doc = prompt_doc | self.vllm_model | self.runnable
        return rag_chain_doc

    def get_rag_chain_doc(self):
        return self.rag_chain_doc

    def create_query_transformation_generator(self):
        query_transformation_template="""<|im_start|>system
You are a natural language processing expert. Your task is to transform the input query into different variations based on the following criteria:  
### Instructions:  
1. **Paraphrasing (1 variations):** Rewrite the query in a different way while keeping the meaning unchanged.  
2. **Granularity Adjustment - More Detailed (1 variations):** Rewrite the query with a higher level of detail by adding specific elements without changing its meaning.  
3. **Granularity Adjustment - Less Detailed (1 variations):** Rewrite the query in a more generalized way by removing details while maintaining the overall meaning.  
4. **Query Expansion (1 variations):** Expand the query by adding relevant information to make it more comprehensive.  

The response must be returned in the following JSON format:
{{
"paraphrased_queries": [
    "Paraphrased query"
],
"detailed_queries": [
    "More detailed query"
],
"generalized_queries": [
    "More generalized query"
],
"expanded_queries": [
    "Expanded query"
]
}}
<|im_end|>

<|im_start|>user
Input Query:{query}  
<|im_end|> 
<|im_start|>assistant
"""

        query_transformation_prompt = PromptTemplate.from_template(query_transformation_template)

        # Chain
        query_transformation_generator = query_transformation_prompt | self.vllm_model| self.runnable
        return query_transformation_generator

    def get_query_transformation_generator(self):
        return self.query_transformation_generator
    
    def create_hallucination_grader(self):
        hallucination_grader_template="""<|im_start|>system
The input consists of laws, a contract, and a generated explanation.  
Your task is to determine whether the generated explanation strictly relies on the provided laws and contract without introducing new context.  

### Evaluation criteria:  
- If the explanation only uses information directly from the laws and contract, return "yes".  
- The explanation may include a conclusion (e.g., stating whether the contract is correct, incorrect, or lacks enough information), and this is **acceptable** as long as the reasoning **before** the conclusion strictly follows the laws and contract.  
- Return "no" **only if** the explanation introduces new context, makes assumptions, or includes information not explicitly stated in the laws and contract.  

### Examples:

#### Example 1 
**Laws:** "Employers must give at least 30 days’ notice before termination."  
**Contract:** "Employees must be given at least 40 days notice before termination."  
**Generated Explanation:**  
"The contract states that employees must be given at least 40 days' notice before termination, which is stricter than the law requiring 30 days. This means the contract is legally valid but more protective than the minimum legal requirement."  

*Analysis: The explanation strictly follows the laws and contract without adding new information. The conclusion is based entirely on the provided text.*  
*is_supported: yes
Your response must be in the following JSON format:  
{{
  "analysis": "A brief explanation of whether the generated explanation strictly follows the laws and contract.",
  "is_supported": "yes" or "no"
}}
<|im_end|>

<|im_start|>user
laws: [{laws}]  
contract: [{contract}]  
generated explanation: [{explanation}]  
<|im_end|>  
<|im_start|>assistant

"""
        hallucination_grader_prompt = PromptTemplate.from_template(hallucination_grader_template)
        # Chain
        hallucination_grader = hallucination_grader_prompt | self.vllm_model| self.runnable
        return hallucination_grader

    def get_hallucination_grader(self): 
        return self.hallucination_grader
    
    def create_answer_grader(self): 
        answer_grader_template="""<|im_start|>system
The input consists of a contract and a generated explanation.
Your task is to determine whether the generated explanation truly explains the contract.

### Evaluation criteria:
- If the explanation correctly interprets or clarifies the contract, return "yes".
- If the explanation does not accurately explain the contract, return "no".

The response must be returned in the following JSON format:
{{
"is_valid_explanation": "yes" or "no",
}}
<|im_end|>

<|im_start|>user
contract: [{contract}]  
generated explanation: [{explanation}]  
<|im_end|> 
<|im_start|>assistant
"""
        answer_grader_prompt = PromptTemplate.from_template(answer_grader_template)
        # Chain
        answer_grader = answer_grader_prompt | self.vllm_model | self.runnable
        return answer_grader

    def get_answer_grander(self):
        return self.answer_grader
    
    def create_web_search_tool(self):
        web_search_tool = TavilySearchResults(max_results=1,include_images=False,)
        return web_search_tool
    
    def get_web_search_tool(self): 
        return self.web_search_tool
    
    def create_grade_web_search_docs(self): 
        websearch_grader_template="""<|im_start|>system
You are an evaluator assessing whether a retrieved reference can serve as **direct legal evidence** to determine if a given contract segment complies with legal standards.

### **Evaluation Criteria:**
- Answer **"yes"** **only if** the retrieved reference is a law, regulation, or legally binding document that provides **direct evidence** for assessing the contract segment’s compliance or violation.
- Answer **"no"** in all other cases, including:
  - The reference is **not an actual law** (e.g., a general explanation, opinion, or guideline).
  - The reference **does not directly address** the criteria mentioned in the contract segment.
  - The reference is **too vague or general** to be used as clear legal evidence.

Your response should be **strictly based on whether the reference can serve as legal proof**.

### **Reasoning Steps:**
Follow these steps to evaluate the relevance of the reference context:

**Step 1: Understand the Contract Segment**
- Read the contract segment carefully to identify its key terms, obligations, rights, or conditions.
- Highlight the main requirements or claims in the segment. For example:
  - What is being promised or agreed upon?
  - Are there specific conditions, deadlines, or obligations?
  - What legal standards or regulations might apply?

**Step 2: Understand the Reference Context**
- Read the reference context carefully to understand the information it provides.
- Identify any legal provisions, rules, or facts that relate to the contract segment. For example:
  - Does the reference context mention laws, regulations, or standards that apply to the contract segment?
  - Does it provide evidence or context that supports or challenges the segment's compliance?

**Step 3: Compare and Analyze**
- Compare the reference context with the contract segment. Look for:
  - **Alignment**: Does the reference context confirm or support the contract segment's compliance?
  - **Contradiction**: Does the reference context contradict or disprove the contract segment's compliance?
  - **Gaps**: Is the reference context insufficient or irrelevant to the contract segment?
- Evaluate whether the reference context provides **direct and actionable evidence** to determine compliance or violation.

### Expected Output:
Provide your response in the following JSON format:
{{
  "analysis": "your step-by-step reasoning and analysis",
  "binary_score": "yes" | "no"
}}

### Examples:
#### Example 1:
- Reference Context: "According to Article 12 of the Labor Law, employees must be provided with a minimum of 12 days of paid annual leave."
- Contract Segment: "The company agrees to provide employees with 10 days of paid annual leave."
- Analysis: "The reference context states that the Labor Law requires a minimum of 12 days of paid annual leave, while the contract segment provides only 10 days. This directly contradicts the legal requirement, proving non-compliance."
- Binary Score: "yes"

#### Example 2:
- Reference Context: "According to Article 15 of the Tax Code, businesses must file their tax returns by March 31st each year."
- Contract Segment: "The company agrees to deliver the product by January 15, 2024."
- Analysis: "The reference context discusses tax filing deadlines, which is unrelated to the contract segment about product delivery. There is no direct link between the two."
- Binary Score: "no"

#### Example 3:
- Reference Context: "According to Article 20 of the Consumer Protection Law, sellers must provide a 30-day return policy for defective products."
- Contract Segment: "The seller agrees to provide a 14-day return policy for defective products."
- Analysis: "The reference context states that the Consumer Protection Law requires a 30-day return policy, while the contract segment provides only 14 days. This directly contradicts the legal requirement, proving non-compliance."
- Binary Score: "yes"

#### Example 4:
- Reference Context: "According to Article 5 of the Environmental Protection Act, companies must reduce carbon emissions by 20% by 2030."
- Contract Segment: "The company agrees to reduce carbon emissions by 15% by 2030."
- Analysis: "The reference context states that the Environmental Protection Act requires a 20% reduction in carbon emissions, while the contract segment commits to only 15%. This directly contradicts the legal requirement, proving non-compliance."
- Binary Score: "yes"

#### Example 5:
- Reference Context: "According to Article 8 of the Data Privacy Law, companies must obtain explicit consent before collecting personal data."
- Contract Segment: "The company agrees to deliver the product within 7 days of purchase."
- Analysis: "The reference context discusses data privacy requirements, which is unrelated to the contract segment about product delivery. There is no direct link between the two."
- Binary Score: "no"

<|im_end|>

<|im_start|>user
Reference Context: [{context}].
Contract Segment: [{contract}].
<|im_end|>

<|im_start|>assistant"""

        websearch_grader_prompt = PromptTemplate.from_template(websearch_grader_template)
        # Chain
        websearch_grader = websearch_grader_prompt | self.vllm_model | self.runnable
        return websearch_grader

    def get_grade_web_search_docs(self): 
        return self.grade_web_search_docs

## ModuleRetrieval

In [2]:
import json
import os
import hashlib
from langchain.vectorstores import Chroma
from langchain_core.documents import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

class ModuleRetrieval:
    def __init__(self, data_path, is_embedding=False, vector_db_path="vectorDB"):
        self.vector_db_path = vector_db_path

        # Nếu không cần tạo embedding, thử tải vectorDB
        if not is_embedding and os.path.exists(vector_db_path):
            self.retriever = Chroma(persist_directory=vector_db_path).as_retriever(search_kwargs={"k": 10})

            print('TRUNGGGG CHUNK 1', len(Chroma(persist_directory=vector_db_path)))
            print("Loaded existing vectorDB.")
            return
        
        # Đọc dữ liệu từ file JSON
        with open(data_path, "r", encoding="utf-8") as f:
            data_docs = json.load(f)

        # Loại bỏ trùng lặp tài liệu dựa trên content & metadata
        seen = set()
        unique_documents = []
        for doc in data_docs:
            doc_str = json.dumps(doc, sort_keys=True)  # JSON hóa tài liệu để hash
            doc_hash = hashlib.md5(doc_str.encode('utf-8')).hexdigest()
            if doc_hash not in seen:
                unique_documents.append(Document(page_content=doc["content"], metadata=doc["metadata"]))
                seen.add(doc_hash)

        print(f"Unique documents: {len(unique_documents)}")

        # Chia nhỏ tài liệu
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
        chunks = text_splitter.split_documents(unique_documents)

        print('TRUNGGGG CHUNK 2', len(chunks))
        # Tạo embeddings
        embedding_function = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
        vector_store = Chroma.from_documents(
            documents=chunks,
            embedding=embedding_function,
            persist_directory=vector_db_path  # Lưu để sử dụng lại
        )

        print('TRUNGGGGG    CHUNK 3', len(vector_store))
        print("Vector database created.")
        self.retriever = vector_store.as_retriever(search_kwargs={"k": 10})

    def get_module(self):
        return self.retriever


## Graph RAG

In [3]:
from copy import deepcopy
from my_utils import retry_failed_batches
from typing_extensions import TypedDict
from langchain_core.documents import Document
from langgraph.graph import END, StateGraph
from typing import List, Dict, Tuple
from collections import defaultdict
from langchain.text_splitter import RecursiveCharacterTextSplitter

class Proposition(TypedDict):
    proposition_content: str # sửa khi trích metadata
    documents: List[Document]  # Sử dụng Document thay vì List[str]
    filtered_documents: List[Document]

class reference(TypedDict):
    text: str
    index_list: List[int]
    
class Question(TypedDict):
    question_content: Document
    rewrite_question_content: str # rewrite with ref
    propositions: List[Proposition]
    ref_propositions: List[Proposition]
    direct_reference:List[Document]
    direct_reference_text:List[reference]
    direct_reference_index: List[int]
    merged_reference_text: str
    generation: str
    all_docs: List[Document] #$%^&*
    web_search_docs: List[Document]
    filtered_web_search_docs: List[Document]
    # regen_count: int


class propositions_to_rewrite(TypedDict):
    question_index: int
    proposition_index: int
    ref_propositions_index: int
    original_proposition: Proposition
    
class RootText(TypedDict):
    Chapter: Dict[int, str]
    Article: Dict[int, str]
    clause: Dict[Tuple[int, int], str]

class GraphState(TypedDict):
    questions: List[Question]  # Lưu danh sách các câu hỏi thay vì chỉ một câu hỏi
    processed_question_index: List[int]
    current_question_index: List[int]
    propositions_to_rewrite : List[propositions_to_rewrite]
    rewrite_count :int # rewrite proposition
    regen_count :int #
    question_to_regen_index: List[int]#
    root_text: RootText


class GraphRAG:
    def __init__(self, extract_reference_generator, sematic_reference_generator, rewrite_clause_generator, proposition_generator, retriever, retrieval_grader, rag_chain_both, rag_chain_doc, query_transformation_generator, hallucination_grader, answer_grader, web_search_tool, grade_web_search_docs):
        self.extract_reference_generator = extract_reference_generator
        self.sematic_reference_generator = sematic_reference_generator
        self.rewrite_clause_generator = rewrite_clause_generator
        self.proposition_generator = proposition_generator
        self.retriever = retriever
        self.retrieval_grader = retrieval_grader
        self.rag_chain_both = rag_chain_both
        self.rag_chain_doc = rag_chain_doc
        self.query_transformation_generator = query_transformation_generator
        self.hallucination_grader = hallucination_grader
        self.answer_grader = answer_grader
        self.web_search_tool = web_search_tool

        ## create app
        workflow = StateGraph(GraphState)

        # # Define the nodes
        workflow.add_node("direct_reference", self.direct_reference)
        workflow.add_node("grade_direct_reference", self.grade_direct_reference)
        workflow.add_node("rewrite_question_content", self.rewrite_question_content)
        workflow.add_node("loopback_node",self.loopback_node) 
        # workflow.add_node("sematic_reference", sematic_reference) 
        workflow.add_node("create_propositions", self.create_propositions) 
        # workflow.add_node("conditional_propositions", conditional_propositions) 
        workflow.add_node("retrieve", self.retrieve) 
        workflow.add_node("grade_documents", self.grade_documents)
        workflow.add_node("generation", self.generate)
        # workflow.add_node("hallucination", hallucination)
        workflow.add_node("answer_grade_node", self.answer_grade_node)

        workflow.add_node("query_transformation", self.query_transformation)
        workflow.add_node("check_empty_propositions", self.check_empty_propositions)
        workflow.add_node("web_search", self.web_search)
        workflow.add_node("grade_web_search_docs", self.grade_web_search_docs)


        workflow.set_entry_point("direct_reference")
        workflow.add_edge("direct_reference", "grade_direct_reference")
        workflow.add_edge("grade_direct_reference", "rewrite_question_content")
        workflow.add_edge("rewrite_question_content", "loopback_node")
        # workflow.add_edge("direct_reference", "grade_direct_reference")
        # workflow.add_edge("sematic_reference", "grade_direct_reference")
        workflow.add_conditional_edges( "loopback_node",
                                    self.should_create_proposition,
                                    {
                                        "STILL HAVE INDEPENDENT CHUNK":"create_propositions",
                                        "NO INDEPENDENT CHUNK":END
                                    })  
        workflow.add_edge("create_propositions", "retrieve")  
        workflow.add_edge("retrieve", "grade_documents")
        workflow.add_edge("grade_documents","check_empty_propositions")
        def check_related_laws(state):
            if len(state["propositions_to_rewrite"])!=0: 
                if state["rewrite_count"]<1:
                    return "CANNOT FIND RELATED LAWS"
                else: return "web_search"   ######################
            else: return "web_search"
        workflow.add_conditional_edges(
            "check_empty_propositions",
            check_related_laws,
            {
                "CANNOT FIND RELATED LAWS":"query_transformation",
                "web_search":"web_search"
            }
        )
        workflow.add_edge("web_search","grade_web_search_docs")
        workflow.add_edge("grade_web_search_docs","generation")

        # def check_hallucination(state):
        #     for question in state["questions"]:
        #         if q_idx in new_graph_state["current_question_index"]:
        #             if question["needed_to_regen"] and question["regen_count"]<=2:
        #                 return "generation"
        #     return "answer_grader"

        def check_answer(state):
            if state["question_to_regen_index"]!=[] and state["regen_count"]<=1:
                    return "UNSATISFACTORY ANSWER"
            return "loopback_node"


        workflow.add_conditional_edges(
            "answer_grade_node",
            check_answer,
            {
                "UNSATISFACTORY ANSWER":"generation",
                "loopback_node":"loopback_node"
            }
        )
        workflow.add_edge("query_transformation","grade_documents")
        workflow.add_edge("generation", "answer_grade_node")

        # workflow.add_edge("hallucination", "loopback_node")

        self.app = workflow.compile()

    def get_app(self):
        return self.app

    def direct_reference(self, state):
        level_order = ["Chapter", "Article", "clause", "sub_clause"]
        level_dict={"Chapter":"chapter_number","Article":"article_number","clause":"clause_number","sub_clause":"sub_clause_number"}

        def get_min_level(metadata):
            """Xác định mức chỉ số nhỏ nhất khác None trong metadata."""
            # Duyệt ngược từ mức cao nhất xuống mức thấp nhất
            # print("get_min_level")
            for level in reversed(level_order):
                if str(metadata[level]) !='-1':
                    # print(f"level: {level}")
                    return level
            return "Chapter"
        def find_sub_refer(refer):
            # print(f"refer {refer}")
            min_level = get_min_level(refer)
            next_min_level=level_order[level_order.index(min_level)+1]
            # print(f"min_level {min_level}")
            # print(f"next_min_level {next_min_level}")
            sub_refer = set()
            for doc in state["questions"]:
                # kiểm tra lọc doc từ chapter đến min_level
                check=True
                if str(refer['Article'])!='-1':
                    for level in level_order[1:level_order.index(min_level)+1]:
                        # print(f"level_dict[level] {level_dict[level]}")
                        # print(f"""doc["question_content"].metadata {doc["question_content"].metadata}""")
                        # print(f"""doc["question_content"].metadata[level_dict[level]] {doc["question_content"].metadata[level_dict[level]]}""")
                        # print(f"""refer[level] {refer[level]}""")
                        # print(f"""level_dict[level] in doc["question_content"].metadata {level_dict[level] in doc["question_content"].metadata}""")
                        # print(f"""doc["question_content"].metadata[level_dict[level]] != refer[level] {doc["question_content"].metadata[level_dict[level]] != str(refer[level])}""")
                        if level_dict[level] in doc["question_content"].metadata and doc["question_content"].metadata[level_dict[level]] != str(refer[level]):
                            check=False
                            break
                else:
                    for level in level_order[:level_order.index(min_level)+1]:
                        # print(f"level_dict[level] {level_dict[level]}")
                        # print(f"""doc["question_content"].metadata {doc["question_content"].metadata}""")
                        # print(f"""doc["question_content"].metadata[level_dict[level]] {doc["question_content"].metadata[level_dict[level]]}""")
                        # print(f"""refer[level] {refer[level]}""")
                        # print(f"""level_dict[level] in doc["question_content"].metadata {level_dict[level] in doc["question_content"].metadata}""")
                        # print(f"""doc["question_content"].metadata[level_dict[level]] != refer[level] {doc["question_content"].metadata[level_dict[level]] != str(refer[level])}""")
                        if level_dict[level] in doc["question_content"].metadata and doc["question_content"].metadata[level_dict[level]] != str(refer[level]):
                            check=False
                            break
                if check:
                    # append vào sub_refer từ chapter đến next_min_level
                    dict_refer = {}
                    # print(f"next_min_level {next_min_level}")
                    for level in level_order[:level_order.index(next_min_level)+1]:
                        # print(f"level {level}")
                        dict_refer[level]=doc["question_content"].metadata[level_dict[level]]
                    # các phần tử sau gán = -1
                    for level in level_order[level_order.index(next_min_level)+1:]:
                        dict_refer[level]=-1
                    sub_refer.add(tuple(dict_refer.items()))
                else:
                    pass
                    # sub_refer.append({"chapter":doc.metadata["chapter_number"],"article":doc.metadata["article_number"],"clause":doc.metadata["clause_number"],"sub_clause":doc.metadata["sub_clause_number"]})
            # chuyển sub_refer từ set sang list
            sub_refer = list(sub_refer)
            # chuyển từng phần tử trong list thành dict
            for i in range(len(sub_refer)):
                sub_refer[i]=dict(sub_refer[i])
            # print(f"sub_refer {sub_refer}")
            return sub_refer
        def refer2refer_text(refer):
            min_level=get_min_level(refer)
            refer_text = ""
            refer_obj={}
            if min_level=="Chapter":
                refer_obj=state["root_text"][min_level][str(refer[min_level])]
                refer_obj["full_text"]=refer_obj["chapter"]+refer_obj["text"]
            elif  min_level=="Article":
                refer_obj=state["root_text"][min_level][str(refer[min_level])]
                refer_obj["full_text"]=refer_obj["chapter"]+refer_obj["article"]+refer_obj["text"]
            elif min_level=="clause":
                if str(refer["Article"])=='-1':
                    print("---CAN'T FIND CLAUSE---")
                else :
                    refer_obj=state["root_text"][min_level][(str(refer["Article"]),str(refer[min_level]))]
                    refer_obj["full_text"]=refer_obj["chapter"]+refer_obj["article"]+refer_obj["clause"]+refer_obj["text"]
            else:
                for ref_q_index,ref_q in enumerate(state["questions"]):
                    if str(refer["Article"])=='-1' or str(refer["clause"])=='-1' :
                        print("---CAN'T FIND SUB_CLAUSE---")
                        break
                    else:    
                        if (ref_q["question_content"].metadata["article_number"] == str(refer["Article"])) and \
                        (ref_q["question_content"].metadata["clause_number"] == str(refer["clause"])) and \
                        (ref_q["question_content"].metadata["sub_clause_number"] == str(refer["sub_clause"])) :
                            refer_obj={
                                "chapter":f"""第{ref_q["question_content"].metadata["chapter_number"]}章　{ref_q["question_content"].metadata["chapter_title"]} \n """,
                                "article":f"""第{ref_q["question_content"].metadata["article_number"]}条（{ref_q["question_content"].metadata["article_title"]}）\n """,
                                "clause":f"""{ref_q["question_content"].metadata["clause_number"]}．""",
                                "sub_clause":f"""{ref_q["question_content"].page_content}""",
                                "text":""}
                            refer_obj["full_text"]=refer_obj["chapter"]+refer_obj["article"]+refer_obj["clause"]+refer_obj["sub_clause"]+refer_obj["text"]
            return refer_obj

        def get_list_index_from_refer(refer):
            # print(refer)
            direct_reference_index=[]
            for ref_q_index,ref_q in enumerate(state["questions"]):
                if str(refer["Article"])!='-1':
                    if (str(refer["Article"])=='-1' or ref_q["question_content"].metadata["article_number"] == str(refer["Article"])) and \
                    (str(refer["clause"])=='-1' or ref_q["question_content"].metadata["clause_number"] == str(refer["clause"])) and \
                    (str(refer["sub_clause"])=='-1' or ref_q["question_content"].metadata["sub_clause_number"] == str(refer["sub_clause"])) :
                        direct_reference_index.append(ref_q_index)
                else:
                    if (str(refer["Chapter"])=='-1' or ref_q["question_content"].metadata["chapter_number"] == str(refer["Chapter"])) and \
                    (str(refer["Article"])=='-1' or ref_q["question_content"].metadata["article_number"] == str(refer["Article"])) and \
                    (str(refer["clause"])=='-1' or ref_q["question_content"].metadata["clause_number"] == str(refer["clause"])) and \
                    (str(refer["sub_clause"])=='-1' or ref_q["question_content"].metadata["sub_clause_number"] == str(refer["sub_clause"])) :
                        direct_reference_index.append(ref_q_index)
            return direct_reference_index
        
        def get_reftext(refer):
            max_length = 512
            refer_obj = refer2refer_text(refer)
            index_list=get_list_index_from_refer(refer)
            if len(refer_obj["full_text"]) < max_length:
                refer_obj["index_list"]=index_list
                return [refer_obj]  # Trả về dưới dạng danh sách
            
            sub_refers = find_sub_refer(refer)
            # print(f"sub_refers {sub_refers}")
            if not sub_refers:
                refer_obj["index_list"]=index_list
                return [refer_obj]  # Trả về dưới dạng danh sách
            ref_texts = []
            for sub_refer in sub_refers:
                indexes= get_list_index_from_refer(sub_refer)
                ref_texts.extend(get_reftext(sub_refer))
            return ref_texts

        print("---DIRECT REFERENCE---")
        """
        Lặp qua các proposition để trích xuất phần reference, sau đó lọc các proposition 
        thật sự liên quan bằng LLM, rồi cập nhật lại state với thông tin tham chiếu.
        đã bỏ qua sự cần thiết của chương bởi chương có thể bị sai, nên nếu có điều thì không cần xét chương
        """
        #Kiểm tra độ dài tham chiếu nếu lớn hơn max_length thì kiểm tra cấp độ nhỏ hơn, nếu không có thì trả về.Ngược lại độ dài bé hơn max_length thì trả về 
        batch_inputs = []
        # batch_map = []  # Mỗi phần tử là (q_idx, p_idx)
        # new_graph_state = {"questions": []}
        new_graph_state = deepcopy(state)
        # Tạo batch_inputs cho LLM
        for q_idx, question in enumerate(new_graph_state["questions"]):
            question_content = question["question_content"]
            # Lấy metadata từ đối tượng, nếu không có thì trả về None
            question_metadata = getattr(question_content, "metadata", None)
            # print(f"question_metadata {question_metadata}")
            metadata_parts = []
            # Sử dụng getattr để lấy thuộc tính nếu tồn tại
            chapter = question_metadata["chapter_number"] if question_metadata["chapter_number"] else None
            if chapter:
                metadata_parts.append(f"current chapter: {chapter}")
            else:
                metadata_parts.append(f'current chapter: ""')

            article = question_metadata["article_number"] if question_metadata["article_number"] else None
            if article:
                metadata_parts.append(f"current article: {article}")
            clause = question_metadata["clause_number"] if question_metadata["clause_number"] else None
            if clause:
                metadata_parts.append(f"current clause: {clause}")
            sub_clause = question_metadata["sub_clause_number"] if question_metadata["sub_clause_number"] else None
            if sub_clause:
                metadata_parts.append(f"current sub-clause: {sub_clause}")
            entry = {"document": question_content.page_content}
            entry["metadata"] = ", ".join(metadata_parts)
            print("entry+++++++", entry)
            batch_inputs.append(entry)
        responses = self.extract_reference_generator.batch(batch_inputs) if batch_inputs else []
        responses =retry_failed_batches(batch_inputs,responses,self.extract_reference_generator)
        print("responses+++++++", responses)
        
        # new_graph_state = {"questions": []}
        count=0
        questions=[]
        # Tạo state mới
        for q_idx, question in enumerate(new_graph_state["questions"]):
                new_question = question.copy()
                new_question["direct_reference"]=[]
                new_question["direct_reference_index"]=[]
                new_question["direct_reference_text"]=[]
                response = responses[q_idx]
                # print(f"response {response}")
                if response['has_reference'] == "yes" and response["is_extractable"] == "yes":
                    # print("=========================")
                    # print(f"response {response}")
                    # print(f"""question {question}""")
                    for reference in response["references"]:
                        if reference["resolved"]["Chapter"]==-1 and reference["resolved"]["Article"]==-1 and reference["resolved"]["clause"]==-1 and reference["resolved"]["sub_clause"]==-1:
                            continue
                        count+=1
                        # teee="No"

                        #####
                
                    
                        resolved = reference["resolved"]
                        metadata = question["question_content"].metadata

                        if resolved["sub_clause"] != -1:
                            resolved["clause"] = resolved["clause"] if resolved["clause"] != -1 else metadata["clause_number"]
                            resolved["Article"] = resolved["Article"] if resolved["Article"] != -1 else metadata["article_number"]

                        if resolved["clause"] != -1 and resolved["Article"] == -1:
                            resolved["Article"] = metadata["article_number"]
                        # print(f"resolved {resolved}")
                        for ref_q_index,ref_q in enumerate(state["questions"]):
                            if reference["resolved"]["Article"]!=-1:
                                if (reference["resolved"]["Article"]==-1 or ref_q["question_content"].metadata["article_number"] == str(reference["resolved"]["Article"])) and \
                                (reference["resolved"]["clause"]==-1 or ref_q["question_content"].metadata["clause_number"] == str(reference["resolved"]["clause"])) and \
                                (reference["resolved"]["sub_clause"]==-1 or ref_q["question_content"].metadata["sub_clause_number"] == str(reference["resolved"]["sub_clause"])) :
                                    new_question["direct_reference"].append(ref_q["question_content"])
                                    new_question["direct_reference_index"].append(ref_q_index)
                                    teee="yes"
                            else:
                                if (reference["resolved"]["Chapter"]==-1 or ref_q["question_content"].metadata["chapter_number"] == str(reference["resolved"]["Chapter"])) and \
                                (reference["resolved"]["Article"]==-1 or ref_q["question_content"].metadata["article_number"] == str(reference["resolved"]["Article"])) and \
                                (reference["resolved"]["clause"]==-1 or ref_q["question_content"].metadata["clause_number"] == str(reference["resolved"]["clause"])) and \
                                (reference["resolved"]["sub_clause"]==-1 or ref_q["question_content"].metadata["sub_clause_number"] == str(reference["resolved"]["sub_clause"])) :
                                    new_question["direct_reference"].append(ref_q["question_content"])
                                    new_question["direct_reference_index"].append(ref_q_index)
                                    teee="yes"
                        if new_question["direct_reference"]:
                        # print(f"""batch_inputs: {batch_inputs[q_idx]}""")
                        # print(f"""reference["resolved"] {reference["resolved"]}""")
                            direct_reference_text=get_reftext(reference["resolved"])
                            new_question["direct_reference_text"]=direct_reference_text

                        # if teee=="No":
                        #     print("|||||||||||||||||||||")
                        #     print(f"response {response}")
                        #     print(f"""question {question}""")
                questions.append(new_question)
                new_graph_state["processed_question_index"]=[]
        new_graph_state["questions"]=questions
        return new_graph_state
    
    def grade_direct_reference(self, state):
        """
        Kiểm tra direct_reference của mỗi câu hỏi:
        - Chỉ thực hiện nếu ít nhất một câu hỏi có hơn 1 direct_reference.
        - Dùng model để kiểm tra độ liên quan của direct_reference.
        - Cập nhật lại graph state với direct_reference đã được lọc.
        """
        print("---GRADE REFERENCE---")

        def nested_defaultdict():
            return {"values": [], "sub": defaultdict(nested_defaultdict)}

        def merge_filtered_list(filtered_list):
            merged_dict = defaultdict(nested_defaultdict)

            for item in filtered_list:
                chapter = item["chapter"]
                article = item["article"] if item["article"] != "-1" else None
                clause = item["clause"] if item["clause"] != "-1" else None
                sub_clause = item["sub_clause"] if item["sub_clause"] != "-1" else None
                value = item["text"].strip()

                if article is None:
                    merged_dict[chapter]["values"].append(value)
                elif clause is None:
                    merged_dict[chapter]["sub"][article]["values"].append(value)
                elif sub_clause is None:
                    merged_dict[chapter]["sub"][article]["sub"][clause]["values"].append(value)
                else:
                    merged_dict[chapter]["sub"][article]["sub"][clause]["sub"][sub_clause]["values"].append(value)

            # Chuyển về định dạng văn bản
            def format_dict(d):
                result = []
                if "values" in d and d["values"]:
                    result.extend(v for v in d["values"])
                for key, sub_d in d["sub"].items():
                    result.append(key)
                    result.extend(format_dict(sub_d))
                return result

            merged_result = []
            for chapter, content in merged_dict.items():
                merged_result.append(chapter)
                merged_result.extend(format_dict(content))

            return "\n".join(merged_result)
        print("---GRADE REFERENCE---")
        new_graph_state = deepcopy(state)  # Tạo bản sao để tránh sửa đổi trực tiếp
    
        batch_inputs = []
        batch_indicates = []
        has_multiple_references = False  # Cờ kiểm tra có câu hỏi nào có >1 reference không
        question_have_ref_count=0
        # Thu thập dữ liệu batch
        temp =False
        for q_idx, question in enumerate(state["questions"]):
            if len(question.get("direct_reference_text", [])) > 1:
                has_multiple_references = True
                for r_idx, direct_reference_text in enumerate(question["direct_reference_text"]):
                    batch_inputs.append({
                        "main_text": question["question_content"].page_content,
                        "referenced_text": direct_reference_text["full_text"]
                    })
                    if not temp:
                        print(f"""
                            "main_text": {question["question_content"].page_content},
                            "referenced_text": {direct_reference_text["full_text"]}
                        """)
                        temp=True
                    batch_indicates.append((q_idx, r_idx))
            elif len(question.get("direct_reference_text", [])) == 1:
                question["merged_reference_text"]=question["direct_reference_text"][0]
            else:
                question["merged_reference_text"]=""
            if len(question.get("direct_reference_text", [])) >= 1:
                question_have_ref_count+=1
        # Nếu không có câu hỏi nào có >1 direct_reference, thoát sớm
        print(f"question_have_ref_count {question_have_ref_count}")
        if not has_multiple_references:
            print("NOTHING TO GRADE")
            return state

        # Chạy mô hình kiểm tra tính liên quan
        responses = self.sematic_reference_generator.batch(batch_inputs) if batch_inputs else []
        responses =retry_failed_batches(batch_inputs,responses,self.sematic_reference_generator)
        # Ánh xạ kết quả vào new_graph_state
        result_map = {batch_indicates[i]: responses[i]["provide_additional_meaning"].strip().lower() == "yes"
                    for i in range(len(responses))}
        question_have_ref_count=0
        for q_idx, question in enumerate(new_graph_state["questions"]):
            if len(question.get("direct_reference_text", [])) > 1:
                # Lọc lại danh sách direct_reference dựa trên kết quả kiểm tra
                question["direct_reference_text"] = [
                    ref for r_idx, ref in enumerate(question["direct_reference_text"])
                    if result_map.get((q_idx, r_idx), False)  # Chỉ giữ lại ref nếu is_related == "yes"
                ]
            if len(question.get("direct_reference_text", [])) != 0:
                question["merged_reference_text"]=merge_filtered_list(question["direct_reference_text"])
                question_have_ref_count+=1
            else:
                question["merged_reference_text"]=""
                
        print(f"question_have_ref_count {question_have_ref_count}")
            
        for q_idx, question in enumerate(new_graph_state["questions"]):
            new_index_list = []
            for direct_reference_text in question["direct_reference_text"]:
                new_index_list.extend(direct_reference_text["index_list"])
            question["direct_reference_index"]=new_index_list
        
        return new_graph_state
    
    def rewrite_question_content(self, state):
        """
        Lặp qua từng câu hỏi và tạo batch_inputs để cập nhật lại nội dung câu hỏi:
        - Với mỗi câu hỏi, tạo batch_inputs chứa {"main_section": question["question_content"], "refer_sections": direct_reference_text}.
        - Chạy batch qua mô hình để tạo câu hỏi rewrite.
        - Lưu kết quả vào question["rewrite_question_content"], là một list.
        """
        print("---REWRITE QUESTION CONTENT---")
        new_graph_state = deepcopy(state)  # Sao chép tránh sửa đổi trực tiếp
        
        batch_inputs = []
        batch_indicates = []
        
        # Thu thập dữ liệu batch
        for q_idx, question in enumerate(state["questions"]):
            new_graph_state["questions"][q_idx]["rewrite_question_content"] = ""
            if question.get("merged_reference_text","") != "":
                batch_inputs.append({
                    "main_section": question["question_content"].page_content,
                    "refer_sections": question["merged_reference_text"]
                })
                batch_indicates.append(q_idx)
        # print(batch_inputs)
        # Nếu không có dữ liệu nào để xử lý, thoát sớm
        if not batch_inputs:
            print("NO QUESTIONS TO REWRITE")
            return state
        
        # Chạy mô hình sinh câu hỏi rewrite
        responses = self.rewrite_clause_generator.batch(batch_inputs) if batch_inputs else []
        responses =retry_failed_batches(batch_inputs,responses,self.rewrite_clause_generator)
        
        # print(f"responses {responses}")
        # Cập nhật kết quả vào new_graph_state
        for i, q_idx in enumerate(batch_indicates):
            new_graph_state["questions"][q_idx]["rewrite_question_content"]=(responses[i]["generated_section"])
        return new_graph_state
    
    def loopback_node(self, graph_state):
        print("--- LOOPBACK NODE ---")
        
        new_graph_state = deepcopy(graph_state)  # Tạo bản sao sâu để tránh sửa đổi trực tiếp vào state gốc
        processed_question_index = graph_state["processed_question_index"]
        next_batch = []
        current_batch =[]
        direct_count=0
        ref_count=0
        for index, question in enumerate(graph_state["questions"]):
            if index not in processed_question_index:
                if len(question["direct_reference_text"])==0:
                    current_batch.append(index)
                    direct_count+=1
                elif all(ref_q in processed_question_index for ref_q in question["direct_reference_index"]):
                    current_batch.append(index)
                    ref_count+=1
                else:
                    next_batch.append(index)  # Chưa đủ điều kiện chạy
                    
                # if len(question["direct_reference_text"])!=0 and (not all(ref_q in processed_question_index for ref_q in question["direct_reference_index"])):
                #     next_batch.append(index)  # Chưa đủ điều kiện chạy
                # else:
                #     current_batch.append(index)
        print(f"direct count {direct_count}")
        print(f"ref count {ref_count}")
        new_graph_state["current_question_index"]=current_batch
        new_graph_state["question_to_regen_index"]=[]
        new_graph_state["regen_count"]=0
        
        return new_graph_state
    
    def answer_grade_node(self, state):   #$%^&*
        def docs2text(docs):
            return '\n'.join([doc.page_content for doc in docs])
        def question2text(question):
            return f"""第{question.metadata["chapter_number"]}章　{question.metadata["chapter_title"]} \n 第{question.metadata["article_number"]}条（{question.metadata["article_title"]}）\n{question.page_content}"""
        
        print("---HALLUCINATION GRADE---")
        new_graph_state = deepcopy(state)
        batch_inputs = []
        batch_indices = []
        for q_idx, question in enumerate(new_graph_state["questions"]):
            if len(new_graph_state["question_to_regen_index"])!=0:
                if q_idx in new_graph_state["question_to_regen_index"]:
                    doc_list = set()  # Dùng set để loại bỏ tài liệu trùng lặp
                    for p_idx, proposition in enumerate(question["propositions"]):
                        if proposition["filtered_documents"]:
                            doc_list.update(str(proposition["filtered_documents"]))
                    if len(question["direct_reference_text"])!=0:
                        # print("OH NOOOOOOOOOOOOOOO")
                        for ref_p_idx, proposition in enumerate(question["ref_propositions"]):
                            if proposition["filtered_documents"]:
                                doc_list.update(str(proposition["filtered_documents"]))
                    doc_list=[Document(doc) for doc in doc_list]
                    batch_inputs.append({"laws":docs2text(doc_list),"contract":question2text(question["question_content"]),"explanation":question["generation"]})
                    batch_indices.append(q_idx)
            else:
                if q_idx in new_graph_state["current_question_index"] and question["generation"]!="""{"evaluation":"insufficient_information",
                            "explanation":"CANNOT FIND ANY RELEVANT DOCUMENT"}""":
                # print(f"""============== {question["generation"]}""")
                    doc_list = set()  # Dùng set để loại bỏ tài liệu trùng lặp
                    for p_idx, proposition in enumerate(question["propositions"]):
                        if proposition["filtered_documents"]:
                            doc_list.update(str(proposition["filtered_documents"]))
                    if len(question["direct_reference_text"])!=0:
                        # print("OH NOOOOOOOOOOOOOOO")
                        for ref_p_idx, proposition in enumerate(question["ref_propositions"]):
                            if proposition["filtered_documents"]:
                                doc_list.update(str(proposition["filtered_documents"]))
                    doc_list=[Document(doc) for doc in doc_list]
                    batch_inputs.append({"laws":docs2text(doc_list),"contract":question2text(question["question_content"]),"explanation":question["generation"]})
                    batch_indices.append(q_idx)
        responses = self.hallucination_grader.batch(batch_inputs)
        responses = retry_failed_batches(batch_inputs, responses, self.hallucination_grader)
        new_graph_state["question_to_regen_index"]=[]
        for idx, response in enumerate(responses):
            q_idx = batch_indices[idx]
            if response["is_supported"].lower()=="no":
                new_graph_state["question_to_regen_index"].append(q_idx)
        print("---GRADE ANSWER---")
        
        grade_batch_inputs =[]
        grade_batch_indicates =[]
        for q_idx, question in enumerate(new_graph_state["questions"]):
            if q_idx in new_graph_state["current_question_index"]:
                if q_idx not in new_graph_state["question_to_regen_index"] and question["generation"]!="""{"evaluation":"insufficient_information",
                            "explanation":"CANNOT FIND ANY RELEVANT DOCUMENT"}""":
                    grade_batch_inputs.append({"contract":question2text(question["question_content"]),"explanation":question["generation"]})
                    grade_batch_indicates.append(q_idx)
        grade_responses = self.answer_grader.batch(grade_batch_inputs)
        grade_responses = retry_failed_batches(grade_batch_inputs, grade_responses, self.answer_grader)
        for idx, response in enumerate(grade_responses):
            q_idx = grade_batch_indicates[idx]
            if response["is_valid_explanation"].lower()=="no":
                new_graph_state["question_to_regen_index"].append(q_idx)
        return new_graph_state
    
    def create_propositions(self, graph_state):   #$%^&*
        def question2text(question):
            return f"""第{question.metadata["chapter_number"]}章　{question.metadata["chapter_title"]} \n 第{question.metadata["article_number"]}条（{question.metadata["article_title"]}）\n{question.page_content}"""
        
        print("---CREATE PROPOSITIONS---")
        new_graph_state = deepcopy(graph_state)  # Tạo bản sao sâu để tránh sửa đổi trực tiếp vào state gốc
        direct_batch_inputs = []
        ref_batch_inputs = []
        direct_batch_map = []
        ref_batch_map = []
        print(f"""len(graph_state["current_question_index"]) {len(graph_state["current_question_index"])}""")
        count=0
        for index, question in enumerate(graph_state["questions"]):
            if index in graph_state["current_question_index"]:
                if len(question["direct_reference_text"])==0:
                    # count+=1
                    direct_batch_inputs.append({"document": question["question_content"].page_content})  # Chạy hàm proposition_gen
                    direct_batch_map.append(index)
                elif  all(ref_q in graph_state["processed_question_index"] for ref_q in question["direct_reference_index"]):
                    count+=1
                    ref_batch_inputs.append({"document": question["rewrite_question_content"]})
                    ref_batch_map.append(index)
                else:
                    print("HAVE SOME TROUBLES")
        print(f"len(direct ) {len(direct_batch_inputs)}")
        print(f"len(ref ) {len(ref_batch_inputs)}")
        print(f"count {count}")
        # Chạy batch proposition
        direct_proposition_responses = self.proposition_generator.batch(direct_batch_inputs)
        direct_proposition_responses =retry_failed_batches(direct_batch_inputs,direct_proposition_responses,self.proposition_generator)

        ref_proposition_responses = self.proposition_generator.batch(ref_batch_inputs)
        ref_proposition_responses =retry_failed_batches(ref_batch_inputs,ref_proposition_responses,self.proposition_generator)
        # print(f"direct_proposition_responses {direct_proposition_responses}")
        for i, proposition_response in enumerate(direct_proposition_responses):
            propositions = [
                {
                    "proposition_content": prop["proposition"],
                    "documents": [],
                    "filtered_documents": []
                }
                for prop in proposition_response['propositions']
            ]
            propositions.append({
                "proposition_content":question2text(new_graph_state["questions"][direct_batch_map[i]]["question_content"]),
                "documents": [],
                "filtered_documents": []
            })
            new_graph_state["questions"][direct_batch_map[i]]["propositions"] = propositions
            new_graph_state["processed_question_index"].append(direct_batch_map[i])
        for i, proposition_response in enumerate(ref_proposition_responses):
            propositions = [
                {
                    "proposition_content": prop["proposition"],
                    "documents": [],
                    "filtered_documents": []
                }
                for prop in proposition_response['propositions']
            ]
            new_graph_state["questions"][ref_batch_map[i]]["ref_propositions"] = propositions
            new_graph_state["processed_question_index"].append(ref_batch_map[i])

        return new_graph_state
    
    def retrieve(self, state):
        print("---RETRIEVE---")
        new_graph_state = deepcopy(state)  # Tạo bản sao sâu để tránh sửa đổi trực tiếp vào state gốc
        count=0
        for index, question in enumerate(new_graph_state["questions"]):
        # for question in new_graph_state["questions"]:
            if index in new_graph_state["current_question_index"]:
                for p_idx, proposition in enumerate(question["propositions"]):
                    retrieved_docs = self.retriever.invoke(proposition["proposition_content"])
                    question["propositions"][p_idx]["documents"] = retrieved_docs  # Cập nhật trực tiếp vào bản sao mới
                    count+=1
                for p_idx, proposition in enumerate(question["ref_propositions"]):
                    retrieved_docs = self.retriever.invoke(proposition["proposition_content"])
                    question["ref_propositions"][p_idx]["documents"]= retrieved_docs
                    count+=1
        print(f"---have retrieved for {count} propositions---")
        new_graph_state["rewrite_count"]=0
        return new_graph_state

    def grade_documents(self, state):   #$%^&*
        def question2text(question):
            return f"""第{question.metadata["chapter_number"]}章　{question.metadata["chapter_title"]} \n 第{question.metadata["article_number"]}条（{question.metadata["article_title"]}）\n{question.page_content}"""
    
        """
        Đánh giá và lọc bỏ các tài liệu không liên quan cho những proposition có filtered_documents == [].
        Những proposition đã có filtered_documents (không rỗng) sẽ được giữ nguyên.

        Args:
            state (Dict): Graph state chứa danh sách các câu hỏi và propositions.
        
        Returns:
            Dict: Graph state mới với các proposition đã được cập nhật filtered_documents.
        """
        print("---GRADE DOCUMENTS---")
        
        new_graph_state = deepcopy(state)  # Tạo bản sao sâu để tránh sửa đổi trực tiếp vào state gốc
        batch_inputs = []
        batch_map = []  # Mỗi phần tử là (q_idx, p_idx, is_ref, doc_idx)
        # if new_graph_state["rewrite_count"]!=0:
        #     for proposition in new_graph_state["propositions_to_rewrite"]:
        #         if proposition["proposition_index"]!=-1:
        #             for doc_idx,doc in new_graph_state["questions"][proposition["question_index"]]["propositions"][proposition["proposition_index"]]["documents"]:
        #                 batch_inputs.append({
        #                     "context": doc.page_content,
        #                     "contract": proposition["original_proposition"]["proposition_content"]
        #                 })
        #                 batch_map.append((proposition["question_index"],proposition["propositions_index"],-1,doc_idx))
        #         else:
        #             for doc_idx,doc in new_graph_state["questions"][proposition["question_index"]]["ref_propositions"][proposition["ref_propositions_index"]]["documents"]:
        #                 batch_inputs.append({
        #                     "context": doc.page_content,
        #                     "contract": proposition["original_proposition"]["proposition_content"]
        #                 })
        #                 batch_map.append((proposition["question_index"],-1,proposition["ref_propositions_index"],doc_idx))
        
        # Duyệt qua state để thu thập các input cần xử lý (chỉ với proposition chưa được đánh giá)
        # else:
        for q_idx, question in enumerate(new_graph_state["questions"]):
            # Xử lý propositions thông thường
            if q_idx in new_graph_state["current_question_index"]:
                if question.get("rewrite_question_content", "")!="":
                    contract= question["rewrite_question_content"]
                else:
                    contract= question2text(question["question_content"])
                for p_idx, proposition in enumerate(question["propositions"]):
                    if proposition.get("filtered_documents", []):
                        continue
                    for doc_idx, doc in enumerate(proposition["documents"]):
                        
                        batch_inputs.append({
                            "context": doc.page_content,
                            # "contract": proposition["proposition_content"]
                            "contract": contract
                        })
                        batch_map.append((q_idx, p_idx, -1, doc_idx))  # is_ref = 0
                
                for ref_p_idx, proposition in enumerate(question.get("ref_propositions", [])):
                    if proposition.get("filtered_documents", []):
                        continue
                    for doc_idx, doc in enumerate(proposition["documents"]):
                        batch_inputs.append({
                            "context": doc.page_content,
                            # "contract": proposition["proposition_content"]
                            "contract": contract
                            
                        })
                        batch_map.append((q_idx, -1, ref_p_idx, doc_idx))  # is_ref = 1
            
        # Gọi batch processing nếu có input; nếu không có, responses là danh sách rỗng
        responses = self.retrieval_grader.batch(batch_inputs) if batch_inputs else []
        responses =retry_failed_batches(batch_inputs,responses,self.retrieval_grader)
        count=0
        # Cập nhật new_graph_state với kết quả đánh giá
        for q_idx, question in enumerate(new_graph_state["questions"]):
            if q_idx in new_graph_state["current_question_index"]:
                for p_idx, proposition in enumerate(question["propositions"]):
                    if proposition.get("filtered_documents", []):
                        continue
                    filtered_docs = []
                    for map_idx, (mq, mp, m_ref, md) in enumerate(batch_map):
                        if mq == q_idx and mp == p_idx and m_ref == -1:
                            response = responses[map_idx]["binary_score"].strip() if responses else ""
                            if response.lower() == "yes":
                                filtered_docs.append(proposition["documents"][md])
                    proposition["filtered_documents"] = filtered_docs
                    count+= len(filtered_docs)
                for ref_p_idx, proposition in enumerate(question.get("ref_propositions", [])):
                    if proposition.get("filtered_documents", []):
                        continue
                    filtered_docs = []
                    for map_idx, (mq, mp, m_ref, md) in enumerate(batch_map):
                        if mq == q_idx and mp == -1 and ref_p_idx == mp:
                            response = responses[map_idx]["binary_score"].strip() if responses else ""
                            if response.lower() == "yes":
                                filtered_docs.append(proposition["documents"][md])
                    proposition["filtered_documents"] = filtered_docs
                    count+= len(filtered_docs)
        print(f"---have filtered and have {count} docs left---")
        return new_graph_state
    
    def generate(self, state):    #$%^&*
        def docs2text(docs):
            return '\n'.join([doc.page_content for doc in docs])
            
        def question2text(question):
            return f"""第{question.metadata["chapter_number"]}章　{question.metadata["chapter_title"]} \n 第{question.metadata["article_number"]}条（{question.metadata["article_title"]}）\n{question.page_content}"""
        
        print("---GENERATE---")
        new_graph_state = deepcopy(state)
        batch_doc_inputs = []
        batch_both_inputs = []
        batch_doc_indices = []
        batch_both_indices = []
        print(f"""current_question_index {new_graph_state["current_question_index"]}""")

        if len(new_graph_state["question_to_regen_index"])!=0 and new_graph_state["regen_count"]<=2:
            for q_idx, question in enumerate(new_graph_state["questions"]):
                if q_idx in new_graph_state["question_to_regen_index"]:
                    if question["all_docs"]!=[]:
                        if len(question["direct_reference_text"])!=0:
                            batch_both_inputs.append({
                                    "context": docs2text(question["all_docs"]),
                                    "contract": question2text(question["question_content"]),
                                    "new_query": question["rewrite_question_content"]
                                })
                            batch_both_indices.append(q_idx)
                        else:
                            batch_doc_inputs.append({
                                    "context": docs2text(question["all_docs"]),
                                    "contract": question2text(question["question_content"])
                                })
                            batch_doc_indices.append(q_idx)
                    elif question["filtered_web_search_docs"]!= []:
                        if len(question["direct_reference_text"])!=0:
                            batch_both_inputs.append({
                                    "context": docs2text(question["filtered_web_search_docs"]),
                                    "contract": question2text(question["question_content"]),
                                    "new_query": question["rewrite_question_content"]
                                })
                            batch_both_indices.append(q_idx)
                        else:
                            batch_doc_inputs.append({
                                    "context": docs2text(question["filtered_web_search_docs"]),
                                    "contract": question2text(question["question_content"])
                                })
                            batch_doc_indices.append(q_idx)
                    else:
                        question["generation"] = """{"evaluation":"insufficient_information",
                                "explanation":"CANNOT FIND ANY RELEVANT DOCUMENT"}"""
                        
                # if q_idx in new_graph_state["question_to_regen_index"]:
                #     doc_list = set()  # Dùng set để loại bỏ tài liệu trùng lặp
                #     for p_idx, proposition in enumerate(question["propositions"]):
                #         if proposition["filtered_documents"]:
                #             doc_list.update(str(proposition["filtered_documents"]))
                    
                #     if len(question["direct_reference_text"])!=0:
                #         # print("OH NOOOOOOOOOOOOOOO")
                #         for ref_p_idx, proposition in enumerate(question["ref_propositions"]):
                #             if proposition["filtered_documents"]:
                #                 doc_list.update(str(proposition["filtered_documents"]))
                #         doc_list=[Document(doc) for doc in doc_list]
                #         if doc_list:
                #             batch_both_inputs.append({
                #                 "context": docs2text(doc_list),
                #                 "contract": question2text(question["question_content"]),
                #                 "new_query": question["rewrite_question_content"]
                #             })
                #             batch_both_indices.append(q_idx)
                #         else:
                #             question["generation"] = """{"evaluation":"insufficient_information",
                #             "explanation":"CANNOT FIND ANY RELEVANT DOCUMENT"}"""
                #     else:
                #         doc_list=[Document(doc) for doc in doc_list]
                #         if doc_list:
                #             batch_doc_inputs.append({
                #                 "context": docs2text(doc_list),
                #                 "contract": question2text(question["question_content"])
                #             })
                #             batch_doc_indices.append(q_idx)
                #         else:
                #             question["generation"] = """{"evaluation":"insufficient_information",
                #             "explanation":"CANNOT FIND ANY RELEVANT DOCUMENT"}"""
            # new_graph_state["question_to_regen_index"]=[]
            new_graph_state["regen_count"]+=1
        else:
            # for q_idx, question in enumerate(new_graph_state["questions"]):
            #     if q_idx in new_graph_state["current_question_index"]:
            #         doc_list = set()  # Dùng set để loại bỏ tài liệu trùng lặp
            #         for p_idx, proposition in enumerate(question["propositions"]):
            #             if proposition["filtered_documents"]:
            #                 doc_list.update(str(proposition["filtered_documents"]))
                    
            #         if len(question["direct_reference_text"])!=0:
            #             # print("OH NOOOOOOOOOOOOOOO")
            #             for ref_p_idx, proposition in enumerate(question["ref_propositions"]):
            #                 if proposition["filtered_documents"]:
            #                     doc_list.update(str(proposition["filtered_documents"]))
            #             doc_list=[Document(doc) for doc in doc_list]
            #             if doc_list:
            #                 batch_both_inputs.append({
            #                     "context": docs2text(doc_list),
            #                     "contract": question2text(question["question_content"]),
            #                     "new_query": question["rewrite_question_content"]
            #                 })
            #                 batch_both_indices.append(q_idx)
            #             else:
            #                 question["generation"] = """{"evaluation":"insufficient_information",
            #                 "explanation":"CANNOT FIND ANY RELEVANT DOCUMENT"}"""
            #         else:
            #             doc_list=[Document(doc) for doc in doc_list]
            #             if doc_list:
            #                 batch_doc_inputs.append({
            #                     "context": docs2text(doc_list),
            #                     "contract": question2text(question["question_content"])
            #                 })
            #                 batch_doc_indices.append(q_idx)
            #             else:
            #                 question["generation"] = """{"evaluation":"insufficient_information",
            #                 "explanation":"CANNOT FIND ANY RELEVANT DOCUMENT"}"""
            for q_idx, question in enumerate(new_graph_state["questions"]):
                if q_idx in new_graph_state["current_question_index"]:
                    if question["all_docs"]!=[]:
                        if len(question["direct_reference_text"])!=0:
                            batch_both_inputs.append({
                                    "context": docs2text(question["all_docs"]),
                                    "contract": question2text(question["question_content"]),
                                    "new_query": question["rewrite_question_content"]
                                })
                            batch_both_indices.append(q_idx)
                        else:
                            batch_doc_inputs.append({
                                    "context": docs2text(question["all_docs"]),
                                    "contract": question2text(question["question_content"])
                                })
                            batch_doc_indices.append(q_idx)
                    elif question["filtered_web_search_docs"]!= []:
                        if len(question["direct_reference_text"])!=0:
                            batch_both_inputs.append({
                                    "context": docs2text(question["filtered_web_search_docs"]),
                                    "contract": question2text(question["question_content"]),
                                    "new_query": question["rewrite_question_content"]
                                })
                            batch_both_indices.append(q_idx)
                        else:
                            batch_doc_inputs.append({
                                    "context": docs2text(question["filtered_web_search_docs"]),
                                    "contract": question2text(question["question_content"])
                                })
                            batch_doc_indices.append(q_idx)
                    else:
                        question["generation"] = """{"evaluation":"insufficient_information",
                                "explanation":"CANNOT FIND ANY RELEVANT DOCUMENT"}"""
                
        print("start both")
        both_responses = self.rag_chain_both.batch(batch_both_inputs)
        both_responses = retry_failed_batches(batch_both_inputs, both_responses, self.rag_chain_both)
        print("end both")
        print("start doc")
        
        doc_responses = self.rag_chain_doc.batch(batch_doc_inputs)
        doc_responses = retry_failed_batches(batch_doc_inputs, doc_responses, self.rag_chain_doc)
        print("end doc")
        
        for idx, response in enumerate(both_responses):
            q_idx = batch_both_indices[idx]
            new_graph_state["questions"][q_idx]["generation"] = response
        
        for idx, response in enumerate(doc_responses):
            q_idx = batch_doc_indices[idx]
            new_graph_state["questions"][q_idx]["generation"] = response
        return new_graph_state
    
    def query_transformation(self, state):
        print("---QUERY TRANSFORMATION---")
        # print(f"graph_state {state}")
        propositions_to_recheck = []
        count = 0
        # 1. Chuẩn bị input cho query transformation
        query_transformation_input = [
            {"query": prop_data["original_proposition"]["proposition_content"]}
            for prop_data in state["propositions_to_rewrite"]
        ]

        # 2. Gọi query transformation
        transformed_queries = self.query_transformation_generator.batch(query_transformation_input)
        transformed_queries =retry_failed_batches(query_transformation_input,transformed_queries,self.query_transformation_generator)
        print("---trieve---")
        # 3. Xử lý retrieval từng query một
        prop_documents = {
            prop_data["original_proposition"]["proposition_content"]: {}
            for prop_data in state["propositions_to_rewrite"]
        }
        
        for i, transformed_query in enumerate(transformed_queries):
            original_prop = query_transformation_input[i]["query"]
            queries = (
                transformed_query["paraphrased_queries"]
                + transformed_query["detailed_queries"]
                + transformed_query["generalized_queries"]
                + transformed_query["expanded_queries"]
            )
            for query in queries:
                retrieved_docs = self.retriever.invoke(query)  # Chạy tuần tự từng query

                for doc in retrieved_docs:
                    # Dùng doc.page_content làm key, và lưu đối tượng document đầy đủ
                    prop_documents[original_prop][doc.page_content] = doc

        # 4. Cập nhật lại graph_state với các documents mới
        updated_state = deepcopy(state)
        old_docs_keys = set()
        for prop_data in state["propositions_to_rewrite"]:
            for d in prop_data["original_proposition"].get('documents', []):
                old_docs_keys.add(d.page_content)

        for prop_data in updated_state["propositions_to_rewrite"]:
            q_idx = prop_data["question_index"]
            p_idx = prop_data["proposition_index"]
            ref_p_idx = prop_data["ref_propositions_index"]
            proposition_text = prop_data["original_proposition"]["proposition_content"]

            new_docs_dict = {
                key: doc for key, doc in prop_documents[proposition_text].items()
                if key not in old_docs_keys
            }
            new_documents = list(new_docs_dict.values())
            count+=len(new_documents)
            # print("len new docs",len(new_documents))
            if new_documents:
                if p_idx!=-1:
                    updated_state["questions"][q_idx]["propositions"][p_idx]["documents"] = new_documents
                else:
                    updated_state["questions"][q_idx]["ref_propositions"][ref_p_idx]["documents"] = new_documents
                # print(updated_state["questions"][q_idx]["propositions"][p_idx])
            else:
                if p_idx!=-1:
                    propositions_to_recheck.append(updated_state["questions"][q_idx]["propositions"][p_idx])
                else:
                    propositions_to_recheck.append(updated_state["questions"][q_idx]["ref_propositions"][ref_p_idx])
        if not updated_state.get("rewrite_count", 0):  # Mặc định là 0 nếu không có
            updated_state["rewrite_count"] = 1
        else:
            updated_state["rewrite_count"] += 1
        updated_state["propositions_to_rewrite"]=[]
        print(f"---new docs num: {count}")
        return updated_state

    def check_empty_propositions(self, state):
        print("---CHECK EMPTY PROPOSITIONS---")
        new_graph_state = deepcopy(state)  # Tạo bản sao sâu để tránh sửa đổi trực tiếp vào state gốc

        # new_graph_state = {"questions": [],"propositions_to_rewrite":[]}
        propositions_to_rewrite = []  # Danh sách propositions cần rewrite
        count = 0
        
        for q_idx, question in enumerate(state["questions"]):
            # new_propositions = []
            if q_idx in new_graph_state["current_question_index"]:
                for p_idx, proposition in enumerate(question["propositions"]):
                    if not proposition["filtered_documents"]:  # Nếu danh sách documents rỗng
                        count+=1
                        propositions_to_rewrite.append({
                            "question_index": q_idx,  # Lưu vị trí question
                            "proposition_index": p_idx,  # Lưu vị trí proposition
                            "ref_propositions_index": -1,  # Lưu vị trí proposition
                            "original_proposition": proposition,  # Lưu thông tin gốc
                        })
                for ref_p_idx, proposition in enumerate(question["ref_propositions"]):
                    if not proposition["filtered_documents"]:  # Nếu danh sách documents rỗng
                        count+=1
                        propositions_to_rewrite.append({
                            "question_index": q_idx,  # Lưu vị trí question
                            "proposition_index": -1,  # Lưu vị trí proposition
                            "ref_propositions_index": ref_p_idx,  # Lưu vị trí proposition
                            "original_proposition": proposition,  # Lưu thông tin gốc
                        })        
                    # else:
                    # new_propositions.append(proposition)  # Giữ lại propositions hợp lệ
                
                # new_graph_state["questions"].append({
                #     "question_content": question["question_content"],
                #     "propositions": new_propositions
                # })
                # new_graph_state["propositions_to_rewrite"]=propositions_to_rewrite
        print(f"---{count}--- propositions that can not find documents")
        new_graph_state["propositions_to_rewrite"]=propositions_to_rewrite
        return new_graph_state
    
    def should_create_proposition(self, graph_state):
        """Kiểm tra xem có cần chạy create_proposition không."""
        if len(graph_state["current_question_index"]) != 0:  # True nếu có câu hỏi có thể chạy
            print("STILL HAVE INDEPENDENT CHUNK")
            return "STILL HAVE INDEPENDENT CHUNK"
        else:
            print("NO INDEPENDENT CHUNK")
            return "NO INDEPENDENT CHUNK"

    def web_search(self, state):   ####
        print("---WEB SEARCH---")
        new_graph_state = deepcopy(state)
        input_batch = []
        batch_indices=[]
        # for q_idx,question in enumerate(new_graph_state["questions"]):
        #     if q_idx in new_graph_state["current_question_index"]:
        #         input_batch.append({"query": question["question_content"].page_content})
        #         batch_indices.append(q_idx)
        # # Web search
        # responses = web_search_tool.batch(input_batch)
        # for idx, response in enumerate(responses):
        #     print(f"""len(response) {len(response)}""")
        #     print(f"response {response}")
        #     q_idx = batch_indices[idx]
        #     for d in response:
        #         new_graph_state["questions"][q_idx]["web_search_docs"].append(Document(page_content=d["content"]))
        #     text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1024, chunk_overlap=30)
        #     new_graph_state["questions"][q_idx]["web_search_docs"] = text_splitter.split_documents(new_graph_state["questions"][q_idx]["web_search_docs"])
        
        search_count=0
        for q_idx, question in enumerate(new_graph_state["questions"]):
                if q_idx in new_graph_state["current_question_index"]:
                    # if question["all_docs"]==[]:
                    #     pass
                    doc_list = set()  # Dùng set để loại bỏ tài liệu trùng lặp
                    for p_idx, proposition in enumerate(question["propositions"]):
                        if proposition["filtered_documents"]:
                            doc_list.update(str(proposition["filtered_documents"]))
                    
                    if len(question["direct_reference_text"])!=0:
                        # print("OH NOOOOOOOOOOOOOOO")
                        for ref_p_idx, proposition in enumerate(question["ref_propositions"]):
                            if proposition["filtered_documents"]:
                                doc_list.update(str(proposition["filtered_documents"]))
                    doc_list=[Document(doc) for doc in doc_list]
                    if doc_list:
                        question["all_docs"]=doc_list
                    else:
                        search_count+=1
                        if question.get("rewrite_question_content", "")!="":
                            input_batch.append({"query": question["rewrite_question_content"][:100]})
                        else:
                            input_batch.append({"query": question["question_content"].page_content[:100]})
                        batch_indices.append(q_idx)
        responses = self.web_search_tool.batch(input_batch)
        print(f"websearch input {input_batch}")
        for idx, response in enumerate(responses):
            # print(f"""len(response) {len(response)}""")
            # print(f"response {response}")
            q_idx = batch_indices[idx]
            for d in response:
                new_graph_state["questions"][q_idx]["web_search_docs"].append(Document(page_content=d["content"]))
            text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=512, chunk_overlap=30)
            new_graph_state["questions"][q_idx]["web_search_docs"] = text_splitter.split_documents(new_graph_state["questions"][q_idx]["web_search_docs"])
        print(f"HAVE SEARCH DOCS FOR: {search_count} CHUNKS")
        return new_graph_state

    def grade_web_search_docs(self, state):
        def question2text(question):
            return f"""第{question.metadata["chapter_number"]}章　{question.metadata["chapter_title"]} \n 第{question.metadata["article_number"]}条（{question.metadata["article_title"]}）\n{question.page_content}"""
    
        print("---GRADE WEB DOCUMENTS---")
        
        new_graph_state = deepcopy(state)  # Tạo bản sao để tránh sửa đổi trực tiếp vào state gốc
        batch_inputs = []
        batch_map = []  # Mỗi phần tử là (q_idx, doc_idx)

        for q_idx, question in enumerate(new_graph_state["questions"]):
            if q_idx in new_graph_state["current_question_index"]:
                contract= ""
                if question.get("rewrite_question_content", "")!="":
                    contract=question["rewrite_question_content"]
                else: 
                    contract= question2text(question["question_content"])
                for doc_idx, doc in enumerate(question["web_search_docs"]):  # Sửa lỗi unpacking
                    batch_inputs.append({
                        "context": doc.page_content,
                        "contract": contract
                    })
                    batch_map.append((q_idx, doc_idx))
        print(f"len(batch_map) {len(batch_map)}")
        # Xử lý batch nếu có dữ liệu
        responses = self.retrieval_grader.batch(batch_inputs) if batch_inputs else []
        responses = retry_failed_batches(batch_inputs, responses, self.retrieval_grader)

        count = 0  # Đếm số tài liệu hợp lệ còn lại

        # Cập nhật new_graph_state với kết quả đánh giá
        for map_idx, (q_idx, doc_idx) in enumerate(batch_map):
            response = responses[map_idx]["binary_score"].strip().lower() if responses else ""
            if response == "yes":
                # Chỉ giữ lại tài liệu được chấp nhận
                new_graph_state["questions"][q_idx]["filtered_web_search_docs"].append(new_graph_state["questions"][q_idx]["web_search_docs"][doc_idx])
                count += 1  # Tăng biến đếm số tài liệu hợp lệ

        print(f"---Have filtered and have {count} docs left---")
        return new_graph_state



## RAGPipeline

In [4]:
import json
import os
from my_utils import normalize_dict
from processor import label_and_parse_text_from_content
from langchain_core.documents import Document
from langchain_community.llms.vllm import VLLM
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnableConfig
# from .GraphRAG import GraphRAG
# from .ModuleRAG import ModuleRAG
# from .ModuleRetrieval import ModuleRetrieval

class RAGPipeline:
    def __init__(self, vllm_model_path, data_path_retrieval):
        self.config = RunnableConfig(recursion_limit=50)
        # Tạo VLLM model
        vllm_model = VLLM(
            model=vllm_model_path,
            tensor_parallel_size=1,
            n=1,
            presence_penalty=0.0,
            frequency_penalty=0.0,
            temperature=0.7,
            top_p=0.6,
            top_k=20,
            stop=None,
            ignore_eos=False,
            max_new_tokens=2048,
            logprobs=None,
            download_dir=None,
            vllm_kwargs={
                "quantization":"gptq",
                "max_model_len": 4096,
                "gpu_memory_utilization":0.5
            }
        )
        # Tạo moduleRAG
        moduleRAG = ModuleRAG(vllm_model)
        moduleRetrieval = ModuleRetrieval(data_path_retrieval, is_embedding=True)
        
        # Khởi tạo app
        graphRAG = GraphRAG(
            extract_reference_generator = moduleRAG.get_extract_reference_generator(),
            sematic_reference_generator = moduleRAG.get_sematic_reference_generator(),
            rewrite_clause_generator = moduleRAG.get_rewrite_clause_generator(),
            proposition_generator = moduleRAG.get_proposition_generator(),
            retriever = moduleRetrieval.get_module(),
            retrieval_grader = moduleRAG.get_retrieval_grader(),
            rag_chain_both = moduleRAG.get_rag_chain_both(),
            rag_chain_doc = moduleRAG.get_rag_chain_doc(),
            query_transformation_generator = moduleRAG.get_query_transformation_generator(),
            hallucination_grader = moduleRAG.get_hallucination_grader(),
            answer_grader=moduleRAG.get_answer_grander(),
            web_search_tool = moduleRAG.get_web_search_tool(),
            grade_web_search_docs= moduleRAG.get_grade_web_search_docs()
        )
        self.app = graphRAG.get_app()

    def processing(self, file_content):
        json_output, root_text = label_and_parse_text_from_content(file_content)
        return json_output, root_text
    
    def create_graph_state(self, json_output, root_text):
        # Chuẩn hóa dữ liệu
        normalized_data= normalize_dict(json_output)
        # Chuyển thành json
        normalized_data_json = json.loads(normalized_data)
        # Lấy số chương cần xử lý
        print("Docs have ",len(normalized_data_json['chapters']),"chapters")
        useful_data = {'chapters':normalized_data_json['chapters']}
        # useful_data = {'chapters':normalized_data_json['chapters'][:3]}
        # print("We using 3 first chapters")
        docs_list = [
            Document(
                page_content=sub_clause.get("sub_clause_content") if sub_clause else clause["clause_title"],
                metadata={
                    "chapter_title": chapter['chapter_title'],
                    "chapter_number": chapter['chapter_number'],
                    # "chapter_text": chapter['chapter_text'],
                    "article_title": article['article_title'], 
                    "article_number": article['article_number'],
                    # "article_text": article['article_text'],
                    "clause_number": clause['clause_number'],
                    "clause_title": clause["clause_title"],
                    # "clause_text": clause["clause_content"],
                    "sub_clause_number": sub_clause.get("sub_clause_number") if sub_clause else "",
                    "sub_clause_content": sub_clause.get("sub_clause_content") if sub_clause else "",
                }
            )
            for chapter in useful_data['chapters'] 
            for article in chapter['articles']
            for clause in article['clauses']
            for sub_clause in (clause["sub_clauses"] or [None])  # Nếu rỗng thì tạo danh sách chứa None
        ]
        # Split
        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            chunk_size=500, chunk_overlap=30
        )

        doc_splits = text_splitter.split_documents(docs_list)
        for i, doc in enumerate(doc_splits):
            doc.metadata['chunk_id'] = i+1 ### adding chunk id

        graph_state = {"questions":
                    [
                        {"question_content":Document(
                            page_content=doc_split.metadata['clause_title'] +"\n"+ doc_split.page_content if doc_split.metadata['sub_clause_content'] else doc_split.page_content,
                            metadata={
                                "chapter_title": doc_split.metadata['chapter_title'],
                                "chapter_number": doc_split.metadata['chapter_number'],
                                # "chapter_text": doc_split.metadata['chapter_text'],
                                "article_title": doc_split.metadata['article_title'], 
                                "article_number": doc_split.metadata['article_number'],
                                # "article_text": doc_split.metadata['article_text'],
                                "clause_number": doc_split.metadata['clause_number'],
                                "clause_title": doc_split.metadata['clause_title'],
                                # "clause_text": doc_split.metadata['clause_text'],
                                "sub_clause_number": doc_split.metadata['sub_clause_number'],
                                "sub_clause_content": doc_split.metadata['sub_clause_content'],
                            }),
                        "propositions":[],
                        "ref_propositions":[],
                        "all_docs":[],
                        "web_search_docs":[],
                        "filtered_web_search_docs": []
                        }
        for doc_split in doc_splits],
                        "root_text":root_text}
        return graph_state
    
    def processsing_final_state(self, final_state):
        response_json = []
        # chạy tất cả các question trong final_state
        for question in final_state["questions"]:
            docs = []
            
            for direct_proposition in question["propositions"]:
                for doc in direct_proposition["filtered_documents"]:
                    doc_dict = {
                        "metadata": doc.metadata,
                        "page_content": doc.page_content
                    }
                    if doc_dict not in docs:  # Tránh trùng lặp
                        docs.append(doc_dict)

            docs_ref = []

            for ref_proposition in question["ref_propositions"]:
                for doc in ref_proposition["filtered_documents"]:
                    doc_dict = {
                        "metadata": doc.metadata,
                        "page_content": doc.page_content
                    }
                    if doc_dict not in docs:  # Tránh trùng lặp
                        docs_ref.append(doc_dict)
            
            doc_web = []
            for doc in question.get("filtered_web_search_docs", []): 
                doc_web.append({
                    "metadata": doc.metadata,
                    "page_content": doc.page_content
                })

            full_docs = docs + docs_ref

            response_json.append({
                # question gốc sau khi xử lý
                "question": {
                        "metadata": question["question_content"].metadata,
                        "page_content": question["question_content"].page_content
                    },
                # luật trực tiếp từ question gốc
                "direct_documents": docs,
                # câu question mà question gốc trỏ đến
                "question_reference": question["merged_reference_text"],
                # những documents kiếm được từ câu viết lại của question và question_reference
                "reference_documents": docs_ref,
                # có sử dụng websearch hay không?
                "message": "Using websearch" if question["web_search_docs"]!=[] else "Using vectorDB",
                "web_search_docs": doc_web,
                "documents": full_docs,
                "response": json.loads(question["generation"]) if type(question["generation"])==str else question["generation"] 
            })

        return response_json
        
    def run(self, file_content):
        # Xử lý dữ liệu thành cấu trúc JSON và root_text
        json_output, root_text = self.processing(file_content)
        
        # Tạo graph state
        graph_state = self.create_graph_state(json_output, root_text)
        print("graph_state", graph_state)
        final_state = self.app.invoke(graph_state,config=self.config)
        print("final_state",final_state)
        response_json = self.processsing_final_state(final_state)
        print("response_json",response_json)
        return response_json

if __name__=="__main__":
    print("Hello World")
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    os.environ['TAVILY_API_KEY'] = "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"
    # Khởi tạo các biến cần thiết
    vllm_model_path = "/home/trung/RAG_ADVANCED/Qwen2.5-14B-Instruct-GPTQ-Int4"
    data_path_retrieval = "/home/trung/RAG_ADVANCED/full_corpus_110225_with_metadata.json"
    # Khởi tạo RAG pipeline
    rag_pipeline = RAGPipeline(vllm_model_path=vllm_model_path, data_path_retrieval=data_path_retrieval)
    # Đọc dữ liệu file txt cần xử lý
    input_file = r"/home/trung/Paper2/gpt_api_update/Full_Module/text_file1_3chapters.txt"
    with open(input_file, 'r', encoding='utf-8') as infile:
        file_content = infile.read()

    # Truyền dữ liệu vào pipeline
    output_data = rag_pipeline.run(file_content)
    # # Xuất kết quả
    # final_state=
    # RAGPipeline.processsing_final_state(final_state)

Hello World


  from .autonotebook import tqdm as notebook_tqdm
2025-03-14 07:04:39,647	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


INFO 03-14 07:04:40 __init__.py:207] Automatically detected platform cuda.
INFO 03-14 07:04:46 config.py:549] This model supports multiple tasks: {'embed', 'reward', 'generate', 'score', 'classify'}. Defaulting to 'generate'.
INFO 03-14 07:04:47 gptq_marlin.py:147] Detected that the model can run with gptq_marlin, however you specified quantization=gptq explicitly, so forcing gptq. Use quantization=gptq_marlin for faster inference
INFO 03-14 07:04:47 llm_engine.py:234] Initializing a V0 LLM engine (v0.7.3) with config: model='/home/trung/RAG_ADVANCED/Qwen2.5-14B-Instruct-GPTQ-Int4', speculative_config=None, tokenizer='/home/trung/RAG_ADVANCED/Qwen2.5-14B-Instruct-GPTQ-Int4', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=gp

Loading safetensors checkpoint shards:   0% Completed | 0/3 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  33% Completed | 1/3 [00:01<00:02,  1.19s/it]
Loading safetensors checkpoint shards:  67% Completed | 2/3 [00:02<00:01,  1.29s/it]
Loading safetensors checkpoint shards: 100% Completed | 3/3 [00:03<00:00,  1.02s/it]
Loading safetensors checkpoint shards: 100% Completed | 3/3 [00:03<00:00,  1.08s/it]



INFO 03-14 07:04:52 model_runner.py:1115] Loading model weights took 9.3794 GB
INFO 03-14 07:04:54 worker.py:267] Memory profiling takes 1.37 seconds
INFO 03-14 07:04:54 worker.py:267] the current vLLM instance can use total_gpu_memory (23.65GiB) x gpu_memory_utilization (0.50) = 11.82GiB
INFO 03-14 07:04:54 worker.py:267] model weights take 9.38GiB; non_torch_memory takes 0.08GiB; PyTorch activation peak memory takes 1.43GiB; the rest of the memory reserved for KV Cache is 0.94GiB.
INFO 03-14 07:04:54 executor_base.py:111] # cuda blocks: 320, # CPU blocks: 1365
INFO 03-14 07:04:54 executor_base.py:116] Maximum concurrency for 4096 tokens per request: 1.25x
INFO 03-14 07:04:58 model_runner.py:1434] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utiliz

Capturing CUDA graph shapes: 100%|██████████| 35/35 [00:22<00:00,  1.54it/s]

INFO 03-14 07:05:20 model_runner.py:1562] Graph capturing finished in 23 secs, took 1.91 GiB
INFO 03-14 07:05:20 llm_engine.py:436] init engine (profile, create kv cache, warmup model) took 27.99 seconds





Unique documents: 4609
TRUNGGGG CHUNK 2 4945


  embedding_function = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")


TRUNGGGGG    CHUNK 3 4945
Vector database created.
type(data) <class 'str'>
Docs have  1 chapters
graph_state {'questions': [{'question_content': Document(metadata={'chapter_title': '', 'chapter_number': '', 'article_title': '目的', 'article_number': '1', 'clause_number': '1', 'clause_title': 'この規程は、株式会社●●●●の賃金規程第21条に基づき、会社の労働者の退職金に関する事項を定めたものである。', 'sub_clause_number': '', 'sub_clause_content': ''}, page_content='この規程は、株式会社●●●●の賃金規程第21条に基づき、会社の労働者の退職金に関する事項を定めたものである。'), 'propositions': [], 'ref_propositions': [], 'all_docs': [], 'web_search_docs': [], 'filtered_web_search_docs': []}], 'root_text': {'Chapter': {'': {'chapter': '\n', 'article': '-1', 'clause': '-1', 'sub_clause': '-1', 'text': '1\u3000この規程は、株式会社●●●●の賃金規程第21条に基づき、会社の労働者の退職金に関する事項を定めたものである。'}}, 'Article': {'1': {'chapter': '\n', 'article': '第1条\u3000目的\n', 'clause': '-1', 'sub_clause': '-1', 'text': '1\u3000この規程は、株式会社●●●●の賃金規程第21条に基づき、会社の労働者の退職金に関する事項を定めたものである。'}}, 'clause': {('1', '1'): {'chapter': '\n', 'article': '第1条\u3

Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.22s/it, est. speed input: 403.72 toks/s, output: 75.64 toks/s]


responses+++++++ [{'has_reference': 'yes', 'is_extractable': 'yes', 'references': [{'text': '株式会社●●●●の賃金規程第21条', 'resolved': {'Chapter': -1, 'Article': -1, 'clause': 21, 'sub_clause': -1}}]}]
---GRADE REFERENCE---
---GRADE REFERENCE---
question_have_ref_count 0
NOTHING TO GRADE
---REWRITE QUESTION CONTENT---
NO QUESTIONS TO REWRITE
--- LOOPBACK NODE ---
direct count 1
ref count 0
STILL HAVE INDEPENDENT CHUNK
---CREATE PROPOSITIONS---
len(graph_state["current_question_index"]) 1
len(direct ) 1
len(ref ) 0
count 0


Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  1.23it/s, est. speed input: 484.49 toks/s, output: 75.20 toks/s]


---RETRIEVE---
---have retrieved for 2 propositions---
---GRADE DOCUMENTS---


Processed prompts: 100%|██████████| 20/20 [00:01<00:00, 14.94it/s, est. speed input: 5565.32 toks/s, output: 149.42 toks/s]


---have filtered and have 0 docs left---
---CHECK EMPTY PROPOSITIONS---
---2--- propositions that can not find documents
---QUERY TRANSFORMATION---


Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.78s/it, est. speed input: 155.50 toks/s, output: 143.95 toks/s]


---trieve---
---new docs num: 14
---GRADE DOCUMENTS---


Processed prompts: 100%|██████████| 14/14 [00:01<00:00, 12.73it/s, est. speed input: 5174.05 toks/s, output: 127.26 toks/s]


---have filtered and have 0 docs left---
---CHECK EMPTY PROPOSITIONS---
---2--- propositions that can not find documents
---WEB SEARCH---
websearch input [{'query': 'この規程は、株式会社●●●●の賃金規程第21条に基づき、会社の労働者の退職金に関する事項を定めたものである。'}]
HAVE SEARCH DOCS FOR: 1 CHUNKS
---GRADE WEB DOCUMENTS---
len(batch_map) 1


Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  4.77it/s, est. speed input: 1541.32 toks/s, output: 48.01 toks/s]


---Have filtered and have 1 docs left---
---GENERATE---
current_question_index [0]
start both
end both
start doc


Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.35s/it, est. speed input: 245.25 toks/s, output: 80.26 toks/s]


end doc
---HALLUCINATION GRADE---


Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.14s/it, est. speed input: 442.63 toks/s, output: 78.16 toks/s]


---GRADE ANSWER---
---GENERATE---
current_question_index [0]
start both
end both
start doc


Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.31s/it, est. speed input: 252.43 toks/s, output: 80.32 toks/s]


end doc
---HALLUCINATION GRADE---


Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.32s/it, est. speed input: 378.57 toks/s, output: 79.34 toks/s]


---GRADE ANSWER---
---GENERATE---
current_question_index [0]
start both
end both
start doc


Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.27s/it, est. speed input: 261.39 toks/s, output: 80.00 toks/s]


end doc
---HALLUCINATION GRADE---


Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  1.03it/s, est. speed input: 511.29 toks/s, output: 77.16 toks/s]

---GRADE ANSWER---
--- LOOPBACK NODE ---
direct count 0
ref count 0
NO INDEPENDENT CHUNK
final_state {'questions': [{'question_content': Document(metadata={'chapter_title': '', 'chapter_number': '', 'article_title': '目的', 'article_number': '1', 'clause_number': '1', 'clause_title': 'この規程は、株式会社●●●●の賃金規程第21条に基づき、会社の労働者の退職金に関する事項を定めたものである。', 'sub_clause_number': '', 'sub_clause_content': ''}, page_content='この規程は、株式会社●●●●の賃金規程第21条に基づき、会社の労働者の退職金に関する事項を定めたものである。'), 'propositions': [{'proposition_content': 'この規程は、株式会社●●●●の賃金規程第21条に基づき、会社の労働者の退職金に関する事項を定める。', 'documents': [Document(metadata={'article_name': '金品の返還', 'article_number': '第二十三条', 'chapter_name': '労働契約', 'chapter_number': '第二章', 'division_name': '', 'division_number': '', 'document_source': 'https://laws.e-gov.go.jp/law/322AC0000000049', 'enactment_year': '令和6年5月31日 施行', 'law_name': '労働基準法', 'law_number': '昭和二十二年法律第四十九号'}, page_content='昭和二十二年法律第四十九号\u3000労働基準法/第二章\u3000労働契約/第二十三条\u3000金品の返還/使用者は、労働者の死亡又は退職の場合において、権利者の請求があつた場合において


