In [None]:
# Install LangExtract
%pip install -q langextract

/bin/bash: line 1: nvidia-smi: command not found


In [None]:
# Set up your Gemini API key
# Get your key from: https://aistudio.google.com/app/apikey
import os
from getpass import getpass

if 'GEMINI_API_KEY' not in os.environ:
    os.environ['GEMINI_API_KEY'] = getpass('Enter your Gemini API key: ')

In [None]:
import operator
from typing import Annotated, List, Dict, Any, Union
from typing_extensions import TypedDict

from langgraph.graph import StateGraph, END
import langextract as lx
import textwrap

# 1. System Instruction: Define the Schema and ID Linking Logic
SYSTEM_PROMPT = textwrap.dedent("""\
    You are an expert forensic analyst building a Knowledge Graph from criminal testimonies.
    
    YOUR GOAL: Extract entities and, crucially, the LINKS (relationships) between them.
    
    ### INSTRUCTIONS:
    1.  **ASSIGN IDs**: Every extracted entity (People, Location, Time, Action) must have a unique `id` (e.g., "p_1", "act_2", "loc_3").
    2.  **LINKING**: 
        -   **Actions** are the "Hubs". They must link to `actor_ids` (People), `location_id`, and `time_id`.
        -   **Emotions** must link to the `experiencer_id` (People) and `trigger_event_id` (Action).
    3.  **RELATIONSHIPS**: Extract static connections (family, ownership) separately from actions.

    ### SCHEMA DEFINITIONS:
    -   **People**: `id`, `name`, `role` (suspect/witness/victim).
    -   **Location**: `id`, `name`, `type`, `container_id` (if inside another location).
    -   **Time**: `id`, `value` (normalized string).
    -   **Action**: `id`, `description`, `actor_ids` (list), `recipient_ids` (list), `location_id`, `time_id`, `cause_event_id` (if caused by another action).
    -   **Emotion**: `id`, `type`, `intensity`, `experiencer_id` (Who felt it?), `trigger_event_id` (What caused it?).
    """)

# 2. Complex Few-Shot Example
# Scenario: A witness observes a drug deal at a bar and flees.

input_text = """
Around 10 PM at the Blue Bar, I saw Hung hand a packet of white powder to Long 'The Scar'. 
Seeing that scene, I felt extremely anxious because I was afraid of getting implicated, so I sneaked out the back door immediately.
"""
example = [
    lx.data.ExampleData(
        text=input_text,
        extractions=[
            # --- 1. ENTITIES (The Nodes) ---
            
            # People (Assigning explicit IDs)
            lx.data.Extraction(
                extraction_class="people", 
                extraction_text="I",
                attributes={"id": "p_me", "role": "witness", "name": "unknown_speaker"}
            ),
            lx.data.Extraction(
                extraction_class="people", 
                extraction_text="Hung",
                attributes={"id": "p_hung", "role": "suspect_1"}
            ),
            lx.data.Extraction(
                extraction_class="people", 
                extraction_text="Long 'The Scar'",
                attributes={"id": "p_long", "role": "suspect_2"}
            ),
            
            # Location
            lx.data.Extraction(
                extraction_class="location", 
                extraction_text="Blue Bar",
                attributes={"id": "loc_bar", "type": "entertainment_venue"}
            ),
            lx.data.Extraction(
                extraction_class="location", 
                extraction_text="back door",
                attributes={"id": "loc_backdoor", "container_id": "loc_bar"} # Linked to parent location
            ),

            # Time
            lx.data.Extraction(
                extraction_class="time", 
                extraction_text="10 PM",
                attributes={"id": "t_10pm", "normalized": "22:00:00"}
            ),
            lx.data.Extraction(
                extraction_class="time", 
                extraction_text="immediately",
                attributes={"id": "t_immediate", "relative_to": "act_leave"}
            ),

            # --- 2. ACTIONS (The Edges/Hubs) ---
            
            # Action 1: The Transaction (Links 2 suspects, object, location, and time)
            lx.data.Extraction(
                extraction_class="action",
                extraction_text="hand a packet of white powder",
                attributes={
                    "id": "act_give",
                    "actor_ids": ["p_hung"],       # Actor: Hung
                    "recipient_ids": ["p_long"],   # Recipient: Long
                    "object": "packet of white powder",
                    "location_id": "loc_bar",
                    "time_id": "t_10pm"
                }
            ),

            # Action 2: Witnessing (Links Witness to the Transaction Event)
            lx.data.Extraction(
                extraction_class="action",
                extraction_text="saw",
                attributes={
                    "id": "act_see",
                    "actor_ids": ["p_me"],
                    "target_event_id": "act_give", # Witness saw the specific 'giving' act
                    "location_id": "loc_bar"
                }
            ),

            # Action 3: Fleeing (Causal Link)
            lx.data.Extraction(
                extraction_class="action",
                extraction_text="sneaked out",
                attributes={
                    "id": "act_leave",
                    "actor_ids": ["p_me"],
                    "from_location_id": "loc_bar",
                    "to_location_id": "loc_backdoor",
                    "time_id": "t_immediate",
                    "cause_event_id": "act_see" # Fled BECAUSE of seeing the act
                }
            ),

            # --- 3. EMOTIONS (Internal State Links) ---
            
            lx.data.Extraction(
                extraction_class="emotion",
                extraction_text="extremely anxious",
                attributes={
                    "id": "emo_anxiety",
                    "experiencer_id": "p_me",      # Who: Me
                    "trigger_event_id": "act_give",# Why: The drug deal
                    "intensity": "high"
                }
            ),
             lx.data.Extraction(
                extraction_class="emotion",
                extraction_text="afraid",
                attributes={
                    "id": "emo_fear",
                    "experiencer_id": "p_me",
                    "reason": "getting implicated"
                }
            ),
        ]
    )
]

In [None]:


# Giả sử biến complex_example của bạn đã được khai báo ở trên
# ... (Code example của bạn ở đây) ...

# --- 2. DEFINE STATE (Trạng thái của Graph) ---
class AgentState(TypedDict):
    input_text: str
    extracted_results: Any # Chứa kết quả từ langextract
    error_log: str         # Ghi lại lỗi ID nếu có
    attempt_count: int     # Đếm số lần thử để tránh loop vô hạn

# --- 3. DEFINE NODES (Các bước xử lý) ---

def extraction_node(state: AgentState):
    """Node thực hiện việc trích xuất."""
    print(f"--- EXTRACTING (Attempt: {state.get('attempt_count', 0) + 1}) ---")
    
    current_prompt = SYSTEM_PROMPT
    
    # Nếu có lỗi từ lần trước, thêm vào prompt để LLM sửa
    if state.get("error_log"):
        current_prompt += f"\n\n### PREVIOUS ERRORS (FIX THESE):\n{state['error_log']}\nEnsure all referenced IDs actually exist in the extracted entities."

    # Gọi langextract (Logic cũ của bạn)
    # Lưu ý: model_id cần set đúng theo key của bạn
    results = lx.extract(
        text_or_documents=state["input_text"],
        prompt_description=current_prompt,
        examples=example, # Biến example của bạn
        model_id="gemini-2.5-flash" 
    )
    
    return {
        "extracted_results": results,
        "attempt_count": state.get("attempt_count", 0) + 1
    }

def validation_node(state: AgentState):
    """Node kiểm tra tính toàn vẹn của Graph (ID Linking)."""
    print("--- VALIDATING GRAPH INTEGRITY ---")
    results = state["extracted_results"]
    
    # 1. Thu thập tất cả ID đã được tạo ra
    existing_ids = set()
    for item in results.extractions:
        if item.attributes and "id" in item.attributes:
            existing_ids.add(item.attributes["id"])
    
    errors = []
    
    # 2. Kiểm tra các liên kết (Links) xem có trỏ đến ID tồn tại không
    for item in results.extractions:
        attrs = item.attributes or {}
        
        # Kiểm tra Action linking
        if item.extraction_class == "action":
            # Check actor_ids (list)
            for actor_id in attrs.get("actor_ids", []):
                if actor_id not in existing_ids:
                    errors.append(f"Action '{attrs.get('id')}' links to non-existent actor_id: '{actor_id}'")
            # Check location_id
            if "location_id" in attrs and attrs["location_id"] not in existing_ids:
                 errors.append(f"Action '{attrs.get('id')}' links to non-existent location_id: '{attrs['location_id']}'")

        # Kiểm tra Emotion linking
        if item.extraction_class == "emotion":
             if "experiencer_id" in attrs and attrs["experiencer_id"] not in existing_ids:
                 errors.append(f"Emotion '{attrs.get('id')}' links to non-existent experiencer_id: '{attrs['experiencer_id']}'")
    
    if errors:
        error_msg = "\n".join(errors)
        print(f"Validation Failed:\n{error_msg}")
        return {"error_log": error_msg}
    else:
        print("Validation Passed!")
        return {"error_log": ""}

# --- 4. DEFINE EDGES (Luồng đi) ---

def should_retry(state: AgentState):
    """Quyết định đi tiếp hay quay lại sửa."""
    error = state.get("error_log", "")
    attempts = state.get("attempt_count", 0)
    
    if error and attempts < 3: # Cho phép thử lại tối đa 3 lần
        return "retry"
    return "end"

# --- 5. BUILD GRAPH ---

workflow = StateGraph(AgentState)

# Thêm Nodes
workflow.add_node("extractor", extraction_node)
workflow.add_node("validator", validation_node)

# Thiết lập điểm bắt đầu
workflow.set_entry_point("extractor")

# Thiết lập Edge cơ bản
workflow.add_edge("extractor", "validator")

# Thiết lập Conditional Edge (Vòng lặp sửa lỗi)
workflow.add_conditional_edges(
    "validator",
    should_retry,
    {
        "retry": "extractor", # Có lỗi -> Quay lại extract
        "end": END            # Xong -> Kết thúc
    }
)

# Compile ứng dụng
app = workflow.compile()

In [None]:
# --- 6. RUN (Chạy thử) ---
test_text = """
I am Nana. I witnessed Steve sitting at the table near the window and using his laptop.
I saw Steve pick up a pencil for me.
I saw Steve receive a phone call and turn away.
I saw Steve look at a brown envelope.
"""

final_state = app.invoke({"input_text": test_text})

# In kết quả cuối cùng
print("\n=== FINAL KNOWLEDGE GRAPH ===")
final_results = final_state["extracted_results"]
for extraction in final_results.extractions:
    print(f"• [{extraction.extraction_class.upper()}] {extraction.extraction_text}")
    print(f"  Attributes: {extraction.attributes}")