# LangGraphSemantic Demo Notebook

This notebook demonstrates the integration of Pydantic models with SHACL shapes in RDF stores using the LangGraphSemantic library. It shows how to connect LangChain and LangGraph with semantic data validation and storage.

## Setup

First, let's import the necessary libraries and set up our environment.

In [None]:
import os
import sys
import time
from typing import List, Optional
from pydantic import BaseModel, Field, validator
import rdflib
from rdflib import Graph, Namespace, URIRef, Literal
import requests

# Import LangGraphSemantic
from langgraphsemantic import LangGraphSemantic
from langgraphsemantic.core import ShapeGenerator
from langgraphsemantic.store import FusekiStore

## Check Fuseki Connection

Let's make sure our Fuseki server is running and accessible.

In [None]:
# Define Fuseki connection parameters
FUSEKI_URL = "http://fuseki:3030"
DATASET = "langgraphsemantic"

# Check if Fuseki is running
def check_fuseki():
    try:
        response = requests.get(FUSEKI_URL)
        if response.status_code == 200:
            print("Fuseki server is running.")
            return True
        else:
            print(f"Fuseki server returned status code {response.status_code}.")
            return False
    except requests.exceptions.ConnectionError:
        print("Could not connect to Fuseki server.")
        return False

# Check if dataset exists
def check_dataset():
    try:
        response = requests.get(f"{FUSEKI_URL}/$/datasets")
        if response.status_code == 200:
            datasets = response.json()
            if any(ds["ds.name"] == DATASET for ds in datasets["datasets"]):
                print(f"Dataset '{DATASET}' exists.")
                return True
            else:
                print(f"Dataset '{DATASET}' does not exist.")
                return False
        else:
            print(f"Failed to get datasets: {response.status_code}")
            return False
    except Exception as e:
        print(f"Error checking dataset: {e}")
        return False

# Create dataset if it doesn't exist
def create_dataset():
    try:
        response = requests.post(
            f"{FUSEKI_URL}/$/datasets",
            headers={"Content-Type": "application/x-www-form-urlencoded"},
            data=f"dbName={DATASET}&dbType=tdb"
        )
        if response.status_code == 200:
            print(f"Dataset '{DATASET}' created successfully.")
            return True
        else:
            print(f"Failed to create dataset: {response.status_code}")
            return False
    except Exception as e:
        print(f"Error creating dataset: {e}")
        return False

# Check and setup Fuseki
if check_fuseki():
    if not check_dataset():
        create_dataset()
else:
    print("Please make sure the Fuseki server is running.")

## Define Pydantic Models

Let's define some Pydantic models that we'll use to demonstrate the LangGraphSemantic library.

In [None]:
class Address(BaseModel):
    """A physical address."""
    street: str
    city: str
    state: str = Field(..., min_length=2, max_length=2)
    zip_code: str = Field(..., regex=r'^\d{5}(-\d{4})?$')
    country: str = Field(default="US")
    
    @validator('state')
    def state_must_be_uppercase(cls, v):
        if not v.isupper():
            raise ValueError('State code must be uppercase')
        return v

class Person(BaseModel):
    """A person with contact information."""
    id: str
    name: str = Field(..., min_length=1)
    age: int = Field(..., ge=0, lt=150)
    email: Optional[str] = None
    addresses: List[Address] = []
    tags: List[str] = []
    
    @validator('email')
    def email_must_contain_at(cls, v):
        if v is not None and '@' not in v:
            raise ValueError('Email must contain @')
        return v

# Create some example instances
address1 = Address(
    street="123 Main St",
    city="San Francisco",
    state="CA",
    zip_code="94105"
)

address2 = Address(
    street="456 Market St",
    city="San Francisco",
    state="CA",
    zip_code="94105"
)

person1 = Person(
    id="p1",
    name="John Doe",
    age=30,
    email="john.doe@example.com",
    addresses=[address1, address2],
    tags=["developer", "python"]
)

person2 = Person(
    id="p2",
    name="Jane Smith",
    age=28,
    email="jane.smith@example.com",
    addresses=[address1],
    tags=["manager", "product"]
)

print(f"Created example models: {person1.name} and {person2.name}")

## Initialize LangGraphSemantic

Now let's initialize the LangGraphSemantic library and connect it to our Fuseki server.

In [None]:
# Initialize LangGraphSemantic
semantic = LangGraphSemantic(
    fuseki_url=FUSEKI_URL,
    dataset=DATASET,
    base_namespace="http://example.org/"
)

print("LangGraphSemantic initialized.")

## Register Models and Generate SHACL Shapes

Let's register our Pydantic models with LangGraphSemantic, which will generate SHACL shapes for them.

In [None]:
# Register models
print("Registering Address model...")
address_registered = semantic.register_model(Address)
print(f"Address model registered: {address_registered}")

print("\nRegistering Person model...")
person_registered = semantic.register_model(Person)
print(f"Person model registered: {person_registered}")

# Get the generated SHACL shapes
address_shape = semantic.store.get_shape("Address")
person_shape = semantic.store.get_shape("Person")

print("\nAddress SHACL Shape:")
print(address_shape.serialize(format="turtle"))

print("\nPerson SHACL Shape:")
print(person_shape.serialize(format="turtle"))

## Store and Validate Instances

Now let's store our model instances in the RDF store and validate them against the SHACL shapes.

In [None]:
# Store instances
print("Storing person1...")
person1_stored = semantic.store_instance(person1)
print(f"Person1 stored: {person1_stored}")

print("\nStoring person2...")
person2_stored = semantic.store_instance(person2)
print(f"Person2 stored: {person2_stored}")

# Validate instances
print("\nValidating person1...")
person1_validation = semantic.validate_instance(person1)
print(f"Person1 validation: {person1_validation}")

print("\nValidating person2...")
person2_validation = semantic.validate_instance(person2)
print(f"Person2 validation: {person2_validation}")

## Create Invalid Instance

Let's create an invalid instance to see how validation works.

In [None]:
try:
    # This should fail Pydantic validation
    invalid_address = Address(
        street="789 Broadway",
        city="New York",
        state="ny",  # Should be uppercase
        zip_code="10001"
    )
except ValueError as e:
    print(f"Pydantic validation error: {e}")

# Let's create an invalid instance by modifying a valid one
# This bypasses Pydantic validation but should fail SHACL validation
invalid_person = person1.copy()
invalid_person.age = -5  # Age should be >= 0

# Store and validate the invalid instance
print("\nStoring invalid person...")
invalid_stored = semantic.store_instance(invalid_person)
print(f"Invalid person stored: {invalid_stored}")

print("\nValidating invalid person...")
invalid_validation = semantic.validate_instance(invalid_person)
print(f"Invalid person validation: {invalid_validation}")

## Integration with LangChain

Now let's demonstrate how to integrate LangGraphSemantic with LangChain.

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# Create a semantic memory component
semantic_memory = semantic.create_memory()

# Create a semantic retriever
semantic_retriever = semantic.create_retriever()

# Define a simple LangChain chain that uses semantic memory
prompt_template = """You are an assistant that helps with managing contact information.
The following people are in the database:
{semantic_memory}

User question: {question}
Answer: """

prompt = PromptTemplate(
    input_variables=["question", "semantic_memory"],
    template=prompt_template
)

# Note: In a real application, you would use an actual LLM here
# For this demo, we'll just simulate the LLM response
class MockLLM:
    def __call__(self, prompt):
        return "I found John Doe and Jane Smith in the database. John is a developer and Jane is a manager."

mock_llm = MockLLM()

chain = LLMChain(
    llm=mock_llm,
    prompt=prompt,
    memory=semantic_memory
)

# Run the chain
response = chain.run(question="Who is in the database?")
print("LangChain response:")
print(response)

## Querying the RDF Store

Let's demonstrate how to query the RDF store directly using SPARQL.

In [None]:
# Query all people in the store
query = """
SELECT ?person ?name ?age ?email
WHERE {
  GRAPH ?g {
    ?person a <http://example.org/Person> ;
            <http://example.org/name> ?name ;
            <http://example.org/age> ?age .
    OPTIONAL { ?person <http://example.org/email> ?email }
  }
}
"""

results = semantic.store.query.execute_select(query)

print("People in the RDF store:")
for result in results:
    print(f"Person: {result['name']}")
    print(f"  Age: {result['age']}")
    print(f"  Email: {result.get('email', 'N/A')}")
    print()

## Conclusion

In this notebook, we've demonstrated how to use the LangGraphSemantic library to:

1. Convert Pydantic models to SHACL shapes
2. Store and validate model instances in an RDF store
3. Integrate with LangChain for semantic memory and retrieval
4. Query the RDF store using SPARQL

This provides a powerful foundation for building semantically-aware applications with LangChain and LangGraph.