In [25]:
from typing import Optional
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
from langchain.schema.runnable import RunnableLambda, RunnableSequence
import re

# -----------------------------
# Define schema
# -----------------------------
class Person(BaseModel):
    """Information about a person."""
    name: Optional[str] = Field(default=None, description="The name of the person.")
    hair_color: Optional[str] = Field(default=None, description="The person's hair color (e.g. brown, black, etc).")
    height_in_meters: Optional[str] = Field(default=None, description="The person's height in meters or feet.")
    age_in_months: Optional[str] = Field(default=None, description="The person's age in months.")

# -----------------------------
# Define prompt
# -----------------------------
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert data extraction system. "
            "Extract ALL of the following fields if mentioned: name, hair_color, height_in_meters and age_in_months. "
            "If any value is not found, output null. "
            "If height is in feet or inches, just return the raw text like '6 feet' — a tool will convert it later to meters."
            "If age is in years, just return the raw text like '20 years' — a tool will convert it later to months."
        ),
        ("human", "{text}"),
    ]
)
# -----------------------------
# Create model and structured output
# -----------------------------
llm = ChatOllama(model="gemma3:4b")
structured_llm = llm.with_structured_output(schema=Person)

# -----------------------------
# Define tool: feet → meters
# -----------------------------
def feet_to_meters_converter(person: Person) -> Person:
    """Converts any 'feet' or 'ft' height in the extracted data to meters."""
    if person.height_in_meters:
        text = person.height_in_meters.lower()
        match = re.search(r"(\d+(?:\.\d+)?)\s*(?:feet|foot|ft)", text)
        if match:
            feet = float(match.group(1))
            meters = round(feet * 0.3048, 2)
            person.height_in_meters = str(meters)
    return person

convert_height_tool = RunnableLambda(feet_to_meters_converter)

# -----------------------------
# Define tool: years → months
# -----------------------------
def years_to_months_converter(person: Person) -> Person:
    """Converts any 'years' or 'year' or 'yrs' or 'yr'; in the extracted data to months."""
    if person.age_in_months:
        text = person.age_in_months.lower()
        match = re.search(r"(\d+(?:\.\d+)?)\s*(?:year|years|yrs|yr)", text)
        if match:
            year = float(match.group(1))
            months = round(year * 12, 2)
            person.age_in_months = str(months)
    return person

convert_age_tool = RunnableLambda(years_to_months_converter)

# -----------------------------
# Build chain using pipes (|)
# -----------------------------
extract_pipeline = prompt_template | structured_llm | convert_height_tool | convert_age_tool

# -----------------------------
# Test it
# -----------------------------
#text = "Alan Smith is 6 feet tall and has blond hair."
text = "Alan Smith is 6 feet tall and has blond hair, and 22 yrs old."
result = extract_pipeline.invoke({"text": text})

print(result)


name='Alan Smith' hair_color='blond' height_in_meters='1.83' age_in_months='264.0'
