## Typed
This notebook demonstrates another approach to the knowledge graph, using strongly typed information.
Idea is that if the entities are known, we can work using this domain language directly, and don't have to use 'nodes' and 'edges' etc.
It's more a relational model than a graph-database.

In [3]:
import os
from langchain_openai import AzureChatOpenAI
from dotenv import load_dotenv

load_dotenv()


llm = AzureChatOpenAI(
            timeout=3*60*1000,
            api_version="2025-02-01-preview",
            azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
            azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
            api_key=os.environ["AZURE_OPENAI_API_KEY"],
            verbose=True,
            reasoning_effort="medium",
         )

Using Azure Document Intelligence for reading documents.

In [4]:
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.core.credentials import AzureKeyCredential

client = document_analysis_client = DocumentIntelligenceClient(os.environ["DOC_INTELLIGENCE_ENDPOINT"],
                                                               credential=AzureKeyCredential(os.environ["DOC_INTELLIGENCE_KEY"]))

In [5]:
from azure.ai.documentintelligence.models import AnalyzeResult, DocumentContentFormat, AnalyzeDocumentRequest

# You can add features like DocumentAnalysisFeature.OCR_HIGH_RESOLUTION, DocumentAnalysisFeature.BARCODES, etc.
features = []

file = "./data/Dog-Breed-Characteristics-Behavior.pdf"

with open(file, "rb") as f:
    analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
    poller = await client.begin_analyze_document(
        model_id="prebuilt-layout",
        body=analyze_request,
        output_content_format=DocumentContentFormat.MARKDOWN,
        features=features
    )
    result = await poller.result()

Create a typed model of the data.

In [6]:
from pydantic import BaseModel, Field
from enum import Enum
from typing import List, Dict, Any, Set


class BreedingGroupEnum(str, Enum):
    """Enum for the different breeding groups defined by AKC."""
    HERDING = "Herding Group"
    HOUND = "Hound Group"
    NON_SPORTING = "Non-Sporting Group"
    SPORTING = "Sporting Group"
    TERRIER = "Terrier Group"
    TOY = "Toy Group"
    WORKING = "Working Group"


class BreedingGroupCharacteristics(BaseModel):
    """Model for the characteristics of a breeding group."""
    group: BreedingGroupEnum = Field(..., description="The breeding group")
    characteristics: List[str] = Field(default_factory=list, description="List of characteristics associated with the breeding group")
    
    class Config:
        json_schema_extra = {
            "example": {
                "group": "Herding Group",
                "characteristics": ["Alert", "Smart", "Independent", "Confident", "Trainable", "Loyal", "Affectionate"]
            }
        }


class BreedFamily(str, Enum):
    """Enum for breed families within groups."""
    # Hound families
    SIGHT_HOUND = "Sight Hound"
    SCENT_HOUND = "Scent Hound"
    # Sporting families
    RETRIEVER = "Retriever"
    SPANIEL = "Spaniel"
    POINTER_SETTER = "Pointer and Setter"
    # Working families
    NORTHERN_BREED = "Northern Breed"
    PROTECTION_BREED = "Protection Breed"
    FLOCK_GUARD = "Flock Guard"
    MOUNTAIN_DOG = "Mountain Dog"
    OTHER = "Other"
    # Add other families as needed


class Origin(BaseModel):
    """Model for the origin of a breed."""
    country: str = Field(description="Country of origin, or unknown")
    region: str = Field(None, description="Specific region within the country if applicable, or unknown")
    purpose: str = Field(None, description="Original purpose for which the breed was developed, or unknown")

    class Config:
        json_schema_extra = {
            "example": {
                "country": "Germany",
                "region": "Bavaria",
                "purpose": "Herding livestock"
            }
        }


class Trait(BaseModel):
    """Model for individual traits that can be associated with breeds."""
    name: str = Field(..., description="Name of the trait")
    description: str = Field(None, description="Description of the trait. If missing, use 'n/a'")
    
    class Config:
        json_schema_extra = {
            "example": {
                "name": "Intelligent",
                "description": "Able to learn commands quickly and solve problems"
            }
        }


class DogBreed(BaseModel):
    """Model for a dog breed."""
    id: str = Field(..., description="Unique identifier for the breed")
    name: str = Field(..., description="Name of the breed")
    breeding_group: BreedingGroupEnum = Field(..., description="The AKC breeding group the breed belongs to. If unknown, use 'Other'")
    family: BreedFamily = Field(None, description="The family within the breeding group, if applicable")
    traits: List[str] = Field(default_factory=list, description="List of traits associated with this breed")
    characteristics: List[str] = Field(default_factory=list, description="List of characteristic behaviors of this breed")
    origin: Origin = Field(None, description="Origin information about the breed")
    size: str = Field(None, description="Size category (small, medium, large). If missing, use 'n/a'")
    # additional_properties: Dict[str, str] = Field(default_factory=dict, description="Any other properties associated with the breed")
    
    class Config:
        json_schema_extra = {
            "example": {
                "required": ["id", "name", "breeding_group"],
                "id": "german_shepherd",
                "name": "German Shepherd",
                "breeding_group": "Herding Group",
                "family": None,
                "traits": ["Intelligent", "Loyal", "Confident", "Courageous", "Steady"],
                "characteristics": ["Alert", "Watchful", "Obedient", "Curious", "Protective"],
                "origin": {
                    "country": "Germany",
                    "purpose": "Herding and guarding sheep"
                },
                "size": "large"
            }
        }


class DogKnowledge(BaseModel):
    """Model for a knowledge graph of dog breeds."""
    breeds: List[DogBreed] = Field(default_factory=list, description="List of dog breeds in the knowledge graph")
    breeding_groups: List[BreedingGroupCharacteristics] = Field(
        default_factory=list, 
        description="List of breeding groups and their characteristics"
    )
    
    

Use schema to extract structured information from the document

In [7]:
from langchain_core.prompts import ChatPromptTemplate 

messages = [
            ("system", "Please extract structured information from following text"),
            ("human", "{input}"),
        ]
prompt = ChatPromptTemplate.from_messages(messages)

In [8]:
structured_llm = llm.with_structured_output(DogKnowledge)

chain = prompt | structured_llm

response = chain.invoke({"input": result.content})

In [2]:


print(response)

NameError: name 'response' is not defined