In [1]:
import nest_asyncio
import asyncio

# Apply the patch to allow nested event loops
nest_asyncio.apply()
from dotenv import load_dotenv
from minference.threads.inference import InferenceOrchestrator, RequestLimits
from minference.threads.models import ChatMessage, ChatThread, LLMConfig, CallableTool, LLMClient,ResponseFormat, SystemPrompt, StructuredTool, Usage,GeneratedJsonObject
from typing import Literal, List
from minference.ecs.caregistry import CallableRegistry
import time
from minference.clients.utils import msg_dict_to_oai, msg_dict_to_anthropic, parse_json_string
from minference.ecs.entity import EntityRegistry
import os
import logging
import json
import polars as pl

In [2]:
load_dotenv()
EntityRegistry()
CallableRegistry()

<minference.ecs.caregistry.CallableRegistry at 0x1150faa80>

In [3]:
oai_request_limits = RequestLimits(max_requests_per_minute=10000, max_tokens_per_minute=200000000)
# lite_llm_request_limits = RequestLimits(max_requests_per_minute=500, max_tokens_per_minute=200000)
anthropic_request_limits = RequestLimits(max_requests_per_minute=500, max_tokens_per_minute=80000)
vllm_request_limits = RequestLimits(max_requests_per_minute=500000, max_tokens_per_minute=200000000)

In [4]:
anthropic_model = "claude-3-5-sonnet-latest"
vllm_model = "Qwen/Qwen2.5-7B-Instruct"
openai_model = "gpt-4o-mini"

In [5]:
orchestrator = InferenceOrchestrator(oai_request_limits=oai_request_limits, vllm_request_limits=vllm_request_limits)
EntityRegistry.set_inference_orchestrator(orchestrator)
EntityRegistry.set_tracing_enabled(False)

In [6]:
system_string = """ # Narrative Action Extraction System

You are an expert system designed to extract structured actions from narrative text. Your task is to parse narratives and identify concrete physical interactions between entities, mapping them to a structured TextAnalysis model with nested Action objects.

## Extraction Process

1. First, identify all entities (characters, objects, locations) in the text
2. Identify all potential locations where actions take place
3. Extract a list of action names (verbs) that describe physical interactions
4. Create detailed Action objects for each concrete physical interaction
5. Assemble all components into a comprehensive TextAnalysis object

## TextAnalysis Model Structure

The TextAnalysis model contains:
- narrative_entities: List of all named entities in the text
- locations: List of all hierarchical locations in the text
- action_names: List of all action verbs identified in the text
- actions: Dictionary mapping action names to concrete Action objects

## NarrativeEntity Requirements:
- Include only physical entities (people, animals, objects, locations)
- Use specific names when available
- Do not include abstract concepts

## Location Requirements:
- Structure as hierarchical paths from global to local
- Include all distinct locations mentioned in the text
- Format as complete paths (e.g., ["Greece", "Sparta", "royal palace", "vestibule"])

## Action Name Requirements:
- Use simple verb forms (e.g., "speak", "enter", "approach")
- Include only physical actions between concrete entities
- Avoid metaphorical or abstract actions

## Action Object Requirements:
- Both source and target must be physical entities
- Source and target must come from the narrative_entities list
- Location must come from the locations list
- Action name must come from the action_names list
- Assign correct temporal ordering based on narrative sequence

## Do not extract actions where:
- Source or target is abstract or non-physical
- The action is metaphorical rather than literal
- Multiple actions are combined in a complex way

Extract the complete TextAnalysis object with all components according to the Pydantic model specifications and format as Python code."""

In [7]:
from pydantic import BaseModel, Field
from typing import List, Dict, Optional

class Action(BaseModel):
    """
    Represents a concrete physical action between entities in a narrative text.
    
    This model captures the structure of actions in narratives by identifying who 
    performed an action (source), who/what received the action (target), the nature
    of the action itself, and its consequence. Both source and target must be concrete
    physical entities (people, animals, objects, locations).
    
    The model also captures spatial context through hierarchical location information
    and temporal sequence through a temporal order ID.
    
    Example:
        Action(
            source="Telemachus",
            source_type="person",
            source_is_character=True,
            target="horses",
            target_type="animals",
            target_is_character=False,
            action="stop",
            consequence="waiting at vestibule",
            text_describing_the_action="And Telemachus stopped his horses at the vestibule",
            text_describing_the_consequence="The horses waited at the entrance",
            location=["Greece", "Sparta", "royal palace", "vestibule"],
            temporal_order_id=4
        )
    """
    
    source: str = Field(
        ...,
        description="The entity performing the action. Must be a concrete physical entity (person, animal, object, location), not an abstraction. Use specific names for characters when available. Example: 'Telemachus' not 'the visitor'."
    )
    
    source_type: str = Field(
        ...,
        description="The category of the source entity. Common values include: 'person', 'people', 'animal', 'animals', 'object', 'location'. Should match the physical nature of the source. Example: if source is 'Telemachus', source_type should be 'person'."
    )
    
    source_is_character: bool = Field(
        ...,
        description="Indicates whether the source is a named character in the narrative. Set to True for proper characters with names or clearly defined roles in the story, False for generic entities like 'servants' or 'horses'."
    )
    
    target: str = Field(
        ...,
        description="The entity receiving the action. Must be a concrete physical entity (person, animal, object, location), not an abstraction. Use specific names for characters when available. Example: 'royal palace' not 'shelter'."
    )
    
    target_type: str = Field(
        ...,
        description="The category of the target entity. Common values include: 'person', 'people', 'animal', 'animals', 'object', 'location'. Should match the physical nature of the target. Example: if target is 'horses', target_type should be 'animals'."
    )
    
    target_is_character: bool = Field(
        ...,
        description="Indicates whether the target is a named character in the narrative. Set to True for proper characters with names or clearly defined roles in the story, False for generic entities or objects."
    )
    
    action: str = Field(
        ...,
        description="The action performed by the source on the target. Should be a single verb or short phrase that clearly describes the physical interaction. Keep it concise and use base forms of verbs when possible. Example: 'approach', 'speak', 'take', 'stop'."
    )
    
    consequence: str = Field(
        ...,
        description="The immediate outcome or result of the action. Should be concise and focus on the direct effect. Not the broader narrative implications. Example: 'proximity', 'communication established', 'object acquired'."
    )
    
    text_describing_the_action: str = Field(
        ...,
        description="The exact text fragment from the narrative that describes the action. This should be a direct quote that contains the action being performed. Include enough context to understand the action but focus on the specific action fragment."
    )
    
    text_describing_the_consequence: str = Field(
        ...,
        description="A description of the consequence derived from the narrative context. This may be explicit in the text or implied. Should explain the immediate result of the action in plain language."
    )
    
    location: List[str] = Field(
        ...,
        description="Hierarchical location information from global to local, represented as a list of strings. Start with the broadest geographical context (country/region) and narrow down to the specific location of the action. Example: ['Greece', 'Sparta', 'royal palace', 'vestibule']."
    )
    
    temporal_order_id: int = Field(
        ...,
        description="A sequential identifier indicating the chronological order of the action within the analyzed text segment. Actions that occur earlier in the narrative should have lower temporal_order_id values. This helps maintain the narrative sequence when actions are extracted."
    )
    
    def __str__(self) -> str:
        """
        String representation of the Action for human-readable output.
        
        Returns a formatted description of the action that captures the essence of who did what to whom,
        where, and with what result.
        """
        return f"{self.source} ({self.source_type}) {self.action} {self.target} ({self.target_type}) at {self.location[-1]}, resulting in {self.consequence}"


class NarrativeEntity(BaseModel):
    """
    Represents a physical entity identified in the narrative text.
    
    NarrativeEntities are the participants (sources or targets) in actions within the narrative.
    They must be concrete physical entities, not abstractions.
    
    Example:
        NarrativeEntity(
            name="Telemachus",
            entity_type="person",
            is_character=True,
            mentions=["Telemachus", "Telemaco prode", "the son of Odysseus"]
        )
    """
    
    name: str = Field(
        ...,
        description="The canonical name of the entity. For characters, use their primary name. For objects or locations, use the most specific descriptor. Example: 'Telemachus', 'royal palace', 'horses'."
    )
    
    entity_type: str = Field(
        ...,
        description="The category of the entity. Common values include: 'person', 'people', 'animal', 'animals', 'object', 'location'. Should match the physical nature of the entity."
    )
    
    is_character: bool = Field(
        ...,
        description="Indicates whether the entity is a named character in the narrative. Set to True for proper characters with names or clearly defined roles in the story, False for generic entities or objects."
    )
    
    mentions: List[str] = Field(
        ...,
        description="Different textual references to this entity within the narrative. Include variations of names, pronouns when clearly identifiable, or descriptive phrases used to refer to this entity."
    )


class Location(BaseModel):
    """
    Represents a hierarchical location where actions take place in the narrative.
    
    Locations are structured as paths from global to local, providing spatial
    context for the actions in the narrative.
    
    Example:
        Location(
            path=["Greece", "Sparta", "royal palace", "vestibule"],
            description="The entrance area of King Menelaus's palace in Sparta, Greece"
        )
    """
    
    path: List[str] = Field(
        ...,
        description="Hierarchical location information from global to local, represented as a list of strings. Start with the broadest geographical context (country/region) and narrow down to the specific location. Example: ['Greece', 'Sparta', 'royal palace', 'vestibule']."
    )
    
    description: str = Field(
        ...,
        description="A brief description of the location based on information in the narrative. Should provide context about the nature or significance of the location within the story."
    )


class TextAnalysis(BaseModel):
    """
    Comprehensive analysis of narrative text, identifying entities, locations,
    and actions within the text.
    
    This model serves as a container for the complete analysis of a narrative text,
    providing structured access to all components needed for understanding the
    physical interactions between entities in the story.
    
    Example:
        TextAnalysis(
            text_id="odyssey_book4_excerpt",
            narrative_entities=[NarrativeEntity(...)],
            locations=[Location(...)],
            action_names=["arrive", "speak", "approach"],
            actions={"arrive": Action(...), "speak": Action(...)}
        )
    """
    
    text_id: str = Field(
        ...,
        description="A unique identifier for the analyzed text segment. Can be a title, chapter reference, or arbitrary ID to distinguish this analysis from others."
    )
    
    text_content: str = Field(
        ...,
        description="The original text content that was analyzed. Including the full text allows for context verification and reference."
    )
    
    narrative_entities: List[NarrativeEntity] = Field(
        ...,
        description="List of all distinct physical entities identified in the text. Each entity should appear exactly once in this list, even if referenced multiple times in the text."
    )
    
    locations: List[Location] = Field(
        ...,
        description="List of all distinct hierarchical locations identified in the text. Each unique location path should appear exactly once in this list."
    )
    
    action_names: List[str] = Field(
        ...,
        description="List of all action verbs identified in the text that describe physical interactions between entities. Each action name should be a simple verb or short phrase in base form."
    )
    
    actions: Dict[str, List[Action]] = Field(
        ...,
        description="Dictionary mapping action names to lists of Action objects. Each key is an action name from action_names, and each value is a list of all instances of that action occurring in the text. This structure allows easy retrieval of all instances of a particular type of action."
    )
    
    def get_actions_by_entity(self, entity_name: str) -> List[Action]:
        """
        Retrieve all actions where the specified entity is either the source or target.
        
        This method enables filtering actions based on entity participation, making it
        easy to track the involvement of specific characters or objects throughout the narrative.
        
        Args:
            entity_name: The canonical name of the entity to filter by
            
        Returns:
            A list of Action objects where the entity appears as either source or target
        """
        result = []
        for action_list in self.actions.values():
            for action in action_list:
                if action.source == entity_name or action.target == entity_name:
                    result.append(action)
        return sorted(result, key=lambda x: x.temporal_order_id)
    
    def get_actions_at_location(self, location_path: List[str]) -> List[Action]:
        """
        Retrieve all actions occurring at the specified location path.
        
        This method enables spatial filtering of actions, making it easy to
        understand what happened at a particular location in the narrative.
        
        Args:
            location_path: A hierarchical location path to filter by
            
        Returns:
            A list of Action objects occurring at the specified location
        """
        result = []
        path_str = str(location_path)  # Convert to string for comparison
        for action_list in self.actions.values():
            for action in action_list:
                if str(action.location) == path_str:
                    result.append(action)
        return sorted(result, key=lambda x: x.temporal_order_id)
    
    def get_actions_in_sequence(self) -> List[Action]:
        """
        Retrieve all actions in temporal sequence order.
        
        This method returns the complete narrative sequence of actions,
        enabling a chronological walkthrough of the events in the text.
        
        Returns:
            A list of all Action objects sorted by temporal_order_id
        """
        all_actions = []
        for action_list in self.actions.values():
            all_actions.extend(action_list)
        return sorted(all_actions, key=lambda x: x.temporal_order_id)
    
    class Config:
        """Configuration for the TextAnalysis model"""
        
        schema_extra = {
            "example": {
                "text_id": "odyssey_book4_excerpt",
                "text_content": "A Lacedèmone giunser cosí, ne la valle rocciosa...",
                "narrative_entities": [
                    {
                        "name": "Telemachus",
                        "entity_type": "person",
                        "is_character": True,
                        "mentions": ["Telemaco prode", "one of the visitors"]
                    },
                    {
                        "name": "Menelaus",
                        "entity_type": "person",
                        "is_character": True, 
                        "mentions": ["Menelao", "re Menelao", "pastore di genti"]
                    }
                ],
                "locations": [
                    {
                        "path": ["Greece", "Sparta", "royal palace", "vestibule"],
                        "description": "The entrance area of King Menelaus's palace"
                    },
                    {
                        "path": ["Greece", "Sparta", "royal palace", "interior"],
                        "description": "Inside King Menelaus's palace"
                    }
                ],
                "action_names": ["arrive", "speak", "approach", "command"],
                "actions": {
                    "speak": [
                        {
                            "source": "Eteone",
                            "source_type": "person",
                            "source_is_character": True,
                            "target": "Menelaus",
                            "target_type": "person",
                            "target_is_character": True,
                            "action": "speak",
                            "consequence": "communication",
                            "text_describing_the_action": "A lui fattosi presso, veloci parole gli volse",
                            "text_describing_the_consequence": "He conveyed information to Menelaus about the visitors",
                            "location": ["Greece", "Sparta", "royal palace", "interior"],
                            "temporal_order_id": 9
                        }
                    ]
                }
            }
        }

* 'schema_extra' has been renamed to 'json_schema_extra'


In [8]:
action_extractor = StructuredTool.from_pydantic(TextAnalysis)

system_prompt = SystemPrompt(name="Narrative Action Extraction System", content=system_string)

In [9]:
llm_config_vllm_modal = LLMConfig(client=LLMClient.vllm, model=vllm_model, response_format=ResponseFormat.tool,max_tokens=4000)
vllm_thread = ChatThread(
    system_prompt=system_prompt,
    new_message="",
    llm_config=llm_config_vllm_modal,
    forced_output=action_extractor,
    use_schema_instruction=True
)

In [10]:
novels = pl.read_parquet("/Users/tommasofurlanello/Documents/Dev/MarketInference/data/gutenberg_en_novels.parquet")

In [11]:
chunk = novels["TEXT"][0][121:1500]

In [12]:
vllm_thread.new_message = chunk

In [13]:
outs = await orchestrator.run_parallel_ai_completion([vllm_thread])

INFO:root:Starting request #0, with provider https://hk3-lab-team--noparser-vllm-openai-compatible-serve.modal.run/v1/chat/completions
INFO:root:Parallel processing complete. Results saved to /Users/tommasofurlanello/Documents/Dev/MarketInference/outputs/inference_cache/vllm_results_c8b25691-fa5e-45e4-ae92-8ccf97966607_2025-03-24_14-31-56.jsonl
INFO:EntityRegistry:Parsing results from /Users/tommasofurlanello/Documents/Dev/MarketInference/outputs/inference_cache/vllm_results_c8b25691-fa5e-45e4-ae92-8ccf97966607_2025-03-24_14-31-56.jsonl
INFO:EntityRegistry:Processed 1 results from /Users/tommasofurlanello/Documents/Dev/MarketInference/outputs/inference_cache/vllm_results_c8b25691-fa5e-45e4-ae92-8ccf97966607_2025-03-24_14-31-56.jsonl
INFO:EntityRegistry:Processing 1 LLM outputs
INFO:EntityRegistry:
                    START PROCESSING OUTPUTS 

INFO:EntityRegistry:
=== PROCESSING OUTPUT FOR CHAT THREAD ===
Current Thread ID: aefc5837-f81d-4619-b89c-6f604852bc5d
Current History Length: 1

In [14]:
outs[0].json_object

GeneratedJsonObject(id=UUID('47072c01-2d3d-492e-9648-97ffbb31f448'), live_id=UUID('c3df679e-0523-4628-88ff-6f1620ff9180'), created_at=datetime.datetime(2025, 3, 24, 13, 32, 2, 648029), parent_id=None, lineage_id=UUID('7d9b30a0-2f30-443f-a719-7528e4d72bac'), old_ids=[], name='textanalysis', object={'text_id': 'alice_in_wonderland_chapter1', 'text_content': 'Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, “and what is the use of a book,” thought Alice “without pictures or conversations?” So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. There was nothing so very remarkable in that; no

In [15]:
outs[0].json_object.object

{'text_id': 'alice_in_wonderland_chapter1',
 'text_content': 'Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, “and what is the use of a book,” thought Alice “without pictures or conversations?” So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. There was nothing so very remarkable in that; nor did Alice think it so very much out of the way to hear the Rabbit say to itself, “Oh dear! Oh dear! I shall be late!” (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural to her.)',
 'narrative_entities': [