In [12]:
from __future__ import annotations
from pydantic import BaseModel, Field, model_validator, ConfigDict
from typing import Literal, Any, Generic, TypeVar, Optional
from dotenv import load_dotenv
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate, FewShotChatMessagePromptTemplate, AIMessagePromptTemplate, MessagesPlaceholder
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from typing import List, Annotated, Dict
import pandas as pd
from typing_extensions import Self
import json
import os


In [2]:
load_dotenv()

True

In [3]:
data_dir = "./data"
file = "folk_tales_deduplicated.csv"
path = os.path.join(data_dir, file)

df = pd.read_csv(path)

display(df.head())

Unnamed: 0,source,nation,title,text
0,https://fairytalez.com/momotaro/,japanese,Momotaro,If you’ll believe me there was a time when the...
1,https://fairytalez.com/the-birdcatcher/,serbian,The Birdcatcher,Near Constantinople there lived a man who knew...
2,https://fairytalez.com/sharing-joy-and-sorrow/,german,Sharing Joy and Sorrow,"There was once a tailor, who was a quarrelsome..."
3,https://fairytalez.com/the-punishment-of-gangana/,french,The Punishment of Gangana,Once upon a time there lived a king and queen ...
4,https://fairytalez.com/the-peace-with-the-snakes/,north_american_native,The Peace with the Snakes,In those days there was a Piegan chief named O...


In [4]:
for row in df.head(1).itertuples(False):
	print(row.text)

If you’ll believe me there was a time when the fairies were none so shy as they are now. That was the time when beasts talked to men, when there were spells and enchantments and magic every day, when there was great store of hidden treasure to be dug up, and adventures for the asking.At that time, you must know, an old man and an old woman lived alone by themselves. They were good and they were poor and they had no children at all.One fine day, “What are you doing this morning, good man?” says the old woman.“Oh,” says the old man, “I’m off to the mountains with my billhook to gather a faggot of sticks for our fire. And what are you doing, good wife?”“Oh,” says the old woman, “I’m off to the stream to wash clothes. It’s my washing day,” she adds.So the old man went to the mountains and the old woman went to the stream.Now, while she was washing the clothes, what should she see but a fine ripe peach that came floating down the stream? The peach was big enough, and rosy red on both sides.

In [5]:
def pad_or_truncate(lst: List, length: int, pad_value:Any=None):
	if len(lst) < length:
		return lst + [pad_value] * (length - len(lst))
	else:
		return lst[:length]
	
name_regex = r"([A-Z][a-z]*\s*)+"
snake_case_regex = r"^[a-z]+(_[a-z]+)*$"

In [6]:
# T = TypeVar("T", bound=str)

# class InstanceBase(BaseModel, Generic[T]):
# 	class_name: T
# 	instance_name: str = Field(..., pattern=snake_case_regex)

# class InstanceBase(BaseModel):
# 	model_config = ConfigDict(str_strip_whitespace=True)

# 	instance_name: str = Field(
# 		...,
# 		description= "Instance of the class, which describes the entity more accurately.",
# 		pattern=snake_case_regex)
	
class Role(BaseModel):
	model_config = ConfigDict(str_strip_whitespace=True)

	class_name: Literal["main_character", "hero", "antagonist" "villain", "false_hero", "helper", "magical_helper", "prisioner", "princess", "quest_giver", "hero_family"]

class Agent(BaseModel):
	model_config = ConfigDict(str_strip_whitespace=True)

	class_name: Literal["human_being", "anthropomorphic_animal", "magical_creature", "group_of_agents"]
	
	has_role: Role
	gender: Literal["male", "female"]
	age_category: Literal["children", "young", "adult", "senior"]
	has_personality: List[Literal["sociable", "joy", "active", "assertive", "anxious", "depressive", "tense", "aggressive", "cold", "egotism", "impersonal", "impulsive"]]
	lives_in: Optional[int] = Field(None)
	name: Optional[str] = Field(None, pattern=name_regex)

class Place(BaseModel):
	'''A location that appears within the folktale.'''

	model_config = ConfigDict(str_strip_whitespace=True)

	instance_name: str = Field(
		...,
		description= "An specified identifier for the place, written in snake case. It must provide a clear, descriptive name for the location instance.",
		examples=["hero_house", "royal_ballroom", "race_track", "straw_house", "stick_house", "brick_house", "near_forest"],
		pattern=snake_case_regex)

	class_name: Literal["mountain", "forest", "river", "field", "castle", "palace", "house", "hut", "farmhouse", "tower", "shop", "school", "tavern", "village", "town", "city", "kingdom"] = Field(
		description="The category of place this instant represents. Must be one of the predifined types."
	)

class Object(BaseModel):
	model_config = ConfigDict(str_strip_whitespace=True)
	
	class_name: Literal["non_anthropomorphic_animal", "magical_object", "natural_object", "crafted_object"]

class Event(BaseModel):
	model_config = ConfigDict(str_strip_whitespace=True)

	class_name: Literal["setup", "initial_situation", "hero_interdiction", "villainy", "false_matrimony", "expulsion", "kidnapping", "murder", "lack", "lack_of_bride", "lack_of_money", "hero_departure", "struggle", "fight", "branding", "receive_mark", "receive_injury", "connective_incident", "call_for_help", "departure_decision", "villain_gains_information", "conflict", "absentation", "breaking_interdiction", "acquisition", "get_present", "guidance", "return", "make_contact_with_enemy", "mediation", "trickery", "beginning_of_counteraction", "helper_move", "receipt_object", "liquidation_of_lack", "release_from_captivity", "pursuit_and_rescue", "false_hero_make_unfounded_claim", "attempt_at_reconnaissance", "victory", "villain_defeated", "unrecognised_arrival", "home_arrival", "difficult_task_with_solution", "difficult_task", "solution_difficult_task", "recognition", "punishment", "reward", "exposure_of_villain", "transfiguration", "physical_transformation", "psychological_transformation", "wedding_or_throne", "wedding", "get_throne"]

	has_agent: List[int]
	has_object: List[int]

class Folktale(BaseModel):
	model_config = ConfigDict(str_strip_whitespace=True)

	has_genre: Literal["fable", "fairy_tale", "legend", "myth", "tall_tale"]
	agents: List[Agent]
	places: List[Place]
	objects: List[Object]
	events: List[Event]

	@model_validator(mode='after')
	def check_folktale(self) -> Self:
		# for i, agent in enumerate(self.agents):	
			# agent.relationships = pad_or_truncate(agent.relationships, len(self.agents), "none")
			# agent.relationships[i] = "none"

		for i, agent in enumerate(self.agents):
			if agent.lives_in >= len(self.places):
				raise ValueError(f"In agents: {agent.instance_name}. lives_in: {agent.lives_in} is out of bounds and must be a lower number. The places that exist are:\n{"\n".join(f"- Place {i+1}: {place.instance_name}" for i, place in enumerate(self.places))}")
			
			
		for event in self.events:
			for agentIndex in event.has_agent:
				if agentIndex >= len(self.agents):
					raise ValueError(f"In events: {event.instance_name}. has_agent: {agentIndex} is out of bounds and must be a lower number. The agents that exist are:\n{"\n".join(f"- Agent {i+1}: {agent.instance_name}" for i, agent in enumerate(self.agents))}")
				
			for objectIndex in event.has_object:
				if objectIndex >= len(self.objects):
					raise ValueError(f"In events: {event.instance_name}. has_object: {objectIndex} is out of bounds and must be a lower number. The objects that exist are:\n{"\n".join(f"- Object {i+1}: {object.instance_name}" for i, object in enumerate(self.objects))}")

		return self
	
class Places(BaseModel):
	"A collection of locations that appear within the folktale."
	places: List[Place] = Field(
		description="A list of all locations explicitly mentioned in the folktale."
	)

class AnnotatedFolktale(Folktale):	
	uri: str
	nation: str
	title: str

In [8]:
model = ChatOllama(
	base_url=os.environ.get("OLLAMA_HOST"),
	model="llama3.1:8b",
	num_gpu=-1,
	validate_model_on_init=True,
	temperature=0.8
)

In [47]:
places_prompt = ChatPromptTemplate.from_messages(
	[
		SystemMessage(content='''You are an AI that extracts locations from a folktale. Your task is to analyze the folktale and identify the locations that appear in it. Each location must include:
- a category choose strictly from the taxonomy below
- an instance, which you create to describe the specific place in the story
                
TAXONOMY (choose the most specific applicable category):

- natural_place: a geographic or environmental area that exists in nature without being constructed.
    - mountain: a large natural elevation of the earth’s surface.
    - forest: a large area covered mainly by trees.
    - field: a wide, open area of grass and crops.
- building: a structure built by people.
    - dwelling: a place where someone could live or stay.
        - castle: a fortified, large residence, typically for royalty or nobility.
    	- palace: a large and luxurious house belonging to royalty or high-ranking figures.
        - house: a typical residential building for a family or individual.
        - hut: a very small, simple dwelling, often made of natural materials like wood, straw, or mud.
        - farmhouse: a house associated with agricultural life, usually rural.
        - tower
	- community_building: buildings for work, trade, education, or public gatherings.
        - shop: a place where goods are sold.
        - school: a building where teaching and learning happen.
        - tavern: a place where travelers rest, eat or drink.
- settlement: a community where groups of people live.
    - village: a small settlement, usually rural, with few buildings and population.
    - town: a settlement larger than a village, with more buildings and services.
    - city: a large, densely populated settlement with many buildings, districts, and institutions.
    - kingdom: a territory ruled by a kingdom or a queen.
                
GUIDELINES:
- Include only locations explicitly mentioned in the story, ignore generic or irrelevant places.
- Select the most specific category from the taxonomy.
- Provide a concise, story-appropriate name for each instance.
- Do not invent locations, use only the places described in the story.
- List each place only once.
- Identify the minimal set of places needed to represent the story accurately, with a maximum of {max_places}.'''),

		HumanMessagePromptTemplate.from_template(template='''Generate a list of the places that appear in the folktale presented below. For each place, select the most specific category from the taxonomy and create an appropriate instance name.
                                           
Folktale:
{folktale}''')
	]
)

places_chain = places_prompt | model.with_structured_output(Places)

places = places_chain.invoke({"folktale": momotaro_foltkale,
                              "max_places": 5})
print(len(places.places))
print(places)

4
places=[Place(instance_name='the_old_man_and_woman_s_dwelling', class_name='house'), Place(instance_name='the_mountains', class_name='mountain'), Place(instance_name='the_stream', class_name='field'), Place(instance_name='the_ogres_island_castle', class_name='castle')]


In [None]:
path = "./event_tree.json"

with open(path, "r", encoding="utf-8") as f:
    data = json.load(f)
    
print(type(data))
# print(json.dumps(data, indent=4))

<class 'dict'>


In [16]:
class TreeNode(BaseModel):
	id: str
	description: str
	parent: Optional[TreeNode] = None
	children: List[TreeNode] = []
	thoughts: List[str] = []

class EvaluatorTree():
	root_node: TreeNode
	nodes: List[TreeNode]
		
	def __init__(self, data: Dict):
		self.nodes = self.create_internal(data)
		self.root_node = self.nodes[0]

	def create_internal(self, data: Dict, parent: TreeNode | None = None):
		nodes = []
		for event, values in data.items():
			node = TreeNode(id=event,
							description=values.get("description", ""),
							parent=parent)
			nodes.append(node)
			
			children = values.get("children", {})
			if len(children) > 0:
				node.children = self.create_internal(children, node)

		return nodes
	
	def print_internal(self, nodes: List[TreeNode], n_tabs: int):
		for node in nodes:
			print("\t"*n_tabs+f"- {node.id}")
			if len(node.children) > 0:
				self.print_internal(node.children, n_tabs+1)
	
	def print(self):
		self.print_internal(self.nodes, 0)

evaluator_tree = EvaluatorTree(data)
evaluator_tree.print()

- root
	- move
		- setup
			- initial_situation
		- conflict
			- hero_interdiction
				- villainy
				- false_matrimony
				- expulsion
				- kidnapping
				- murder
			- lack
				- lack_of_bride
				- lack_of_money
			- hero_departure
			- struggle
				- fight
			- branding
				- receive_mark
				- receive_injury
			- connective_incident
				- call_for_help
				- departure_decision
			- villain_gains_information
		- preparation
			- absentation
			- breaking_interdiction
			- acquisition
				- get_present
			- guidance
			- return_event
			- make_contact_with_enemy
			- mediation
			- trickery
		- beginning_of_counteraction
		- helper_move
			- receipt_object
			- liquidiation_of_lack
				- release_from_captivity
			- pursuit_and_rescue
		- false_hero_make_unfounded_claim
		- attempt_at_reconnaissance
	- resolution
		- victory
			- villain_defeated
		- arrival
			- unrecognised_arrival
				- home_arrival
		- difficult_task_with_solution
			- difficult_task
			- solution_difficult_tas

In [None]:
examples = [
	{"input": "2+2", "output": "22"},
	{"input": "2+3", "output": "23"},
]

example_prompt = ChatPromptTemplate.from_messages(
	[
		HumanMessagePromptTemplate.from_template(template="What is {input}?"),
		AIMessagePromptTemplate.from_template(template="{output}")
	]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
	example_prompt=example_prompt,
	examples=examples,
)

final_prompt = ChatPromptTemplate.from_messages(
	[
		SystemMessage(content='''"You are a wizard of math."'''),
		few_shot_prompt,
		MessagesPlaceholder(variable_name="messages")
	]
)

messages = [
	HumanMessage(content="What is is 4+4?"),
]

formatted_prompt = final_prompt.format(messages=messages)
print(formatted_prompt)

class Response(BaseModel):
	thinking: str
	response: int

model_output = model.bind_tools([Response])

chain = final_prompt | model_output
response = chain.invoke({"messages": messages})

print(response)

messages += [
	response,
	HumanMessage(content="What is Pedro + Matt?")
]

response = chain.invoke({"messages": messages})

messages += [
	response,
]

for m in messages:
	m.pretty_print()

System: "You are a wizard of math."
Human: What is 2+2?
AI: 22
Human: What is 2+3?
AI: 23
Human: What is is 4+4?
content='' additional_kwargs={} response_metadata={'model': 'llama3.1:8b', 'created_at': '2025-11-27T23:57:43.9711925Z', 'done': True, 'done_reason': 'stop', 'total_duration': 4047702000, 'load_duration': 217242100, 'prompt_eval_count': 200, 'prompt_eval_duration': 817289000, 'eval_count': 23, 'eval_duration': 2973485100, 'logprobs': None, 'model_name': 'llama3.1:8b', 'model_provider': 'ollama'} id='lc_run--a6b3c79f-824c-4b35-a925-ada8875dbcca-0' tool_calls=[{'name': 'Response', 'args': {'result': 8, 'thinking': 'calculation'}, 'id': 'f9209dd6-c313-4bb5-9706-974657eba10c', 'type': 'tool_call'}] usage_metadata={'input_tokens': 200, 'output_tokens': 23, 'total_tokens': 223}

What is is 4+4?
Tool Calls:
  Response (f9209dd6-c313-4bb5-9706-974657eba10c)
 Call ID: f9209dd6-c313-4bb5-9706-974657eba10c
  Args:
    result: 8
    thinking: calculation

What is Pedro + Matt?
Tool Call