In [32]:
from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain_core.callbacks.manager import CallbackManagerForChainRun
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from pydantic import Field

SPARQL_GENERATION_TEMPLATE = """
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following 
query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {{
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}}
```
Instructions:
Use only the node types and properties provided in the schema.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes.

Examples:

Schema:
{schema}
Note: Be as concise as possible.
Do not include any explanations or apologies in your responses.
Do not respond to any questions that ask for anything else than 
for you to construct a SPARQL query.
Do not include any text except the SPARQL query generated.

The question is:
{prompt}"""

SPARQL_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "prompt"], template=SPARQL_GENERATION_TEMPLATE
)

In [33]:
from langchain_aws import ChatBedrock

MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"

import os
from dotenv import load_dotenv 
from langchain_aws import ChatBedrock

load_dotenv() 

llm = ChatBedrock(
    model_id=MODEL_ID,
    region_name=os.getenv('AWS_REGION'),
    aws_access_key_id=os.getenv('AWS_KEY_ID'),
    aws_secret_access_key=os.getenv('AWS_SECRET_KEY'),
)

chain = SPARQL_GENERATION_PROMPT | llm

In [None]:
schema = """
In the following, each IRI is followed by the local name and optionally its description in parentheses. 
The graph supports the following node types:
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#RecreationArea> (RecreationArea), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#UsaceRecreationArea> (UsaceRecreationArea), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#ChannelArea> (ChannelArea), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#LeveeArea> (LeveeArea), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#School> (School), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#Project> (Project), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#Reservoir> (Reservoir), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#ChannelReach> (ChannelReach), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#RealProperty> (RealProperty), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#River> (River), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#Hospital> (Hospital), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#SchoolZone> (SchoolZone), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#Watershed> (Watershed), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#LeveedArea> (LeveedArea)

The graph supports the following relationships:
<https://dev-georegistry.geoprism.net/lpg/rdfs#GeoObject-exists> (GeoObject-exists), 
<https://dev-georegistry.geoprism.net/lpg/rdfs#GeoObject-uid> (GeoObject-uid), 
<http://www.w3.org/2000/01/rdf-schema#label> (label), 
<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> (type), 
<https://dev-georegistry.geoprism.net/lpg/rdfs#GeoObject-code> (GeoObject-code), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#HasSchoolZone> (HasSchoolZone), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#ConnectedTo> (ConnectedTo), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#HasFloodRisk> (HasFloodRisk), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#RealProperty-realPropertyType> (RealProperty-realPropertyType), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#RealProperty-realPropertyUse> (RealProperty-realPropertyUse), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#WithinWatershed> (WithinWatershed), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#FlowsInto> (FlowsInto), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#FlowsThrough> (FlowsThrough), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#ChannelHasLevee> (ChannelHasLevee), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#Project-programCode> (Project-programCode), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#Project-programName> (Project-programName), 
<https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#HasFloodZone> (HasFloodZone)

The data model between types is the following:
(Levee)-[ChannelHasLevee]-(ChannelReach),
(LeveedArea)-[HasFloodZone]-(Levee),
(School)-[HasFloodRisk]-(LeveedArea),
(School)-[HasSchoolZone]-(SchoolZone),
"""


In [35]:
response = chain.invoke({
    "schema" : schema,
    "prompt": "What school zones are impacted by a channel reach with the code 'CEMVK_BR_01_FUL_26' flooding?"
})

print(response.content)

```
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <https://dev-georegistry.geoprism.net/lpg/rdfs#>
PREFIX d24: <https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#>

SELECT ?schoolZone
WHERE {
    ?channel rdfs:GeoObject-code "CEMVK_BR_01_FUL_26" .
    ?channel rdf:type d24:ChannelReach .
    ?levee d24:ChannelHasLevee ?channel .
    ?area d24:HasFloodZone ?levee . 
    ?school d24:HasFloodRisk ?area .
    ?schoolZone d24:HasSchoolZone ?school .
}
```
