### Setup & experimentation

In [1]:
from franz.openrdf.connect import ag_connect
import pandas as pd
from pandas import DataFrame
from openai import OpenAI

In [2]:
# Connect to AllegroGraph server
conn = ag_connect("intellikgraph")
print("Number of statements:", conn.size())

Number of statements: 2035


In [3]:
# Execute example query & store in Pandas DataFrame
query = """
    SELECT ?encounter ?start ?end ?zone 
    WHERE {
    ?encounter a :EncounterEvent ;
        :involvesVessel :V014 ;
        :startTime ?start ;
        :endTime ?end ;
        :occurredIn ?zone 
    }
"""

with conn.executeTupleQuery(query) as result:
    df = result.toPandas()
df.head()

Unnamed: 0,encounter,start,end,zone
0,<http://example.org/maritime#Encounter_V014_1>,2025-06-20 18:30:00+00:00,2025-06-20 20:30:00+00:00,<http://example.org/maritime#Zone_NO_EEZ>
1,<http://example.org/maritime#Encounter_V011_1>,2025-06-23 02:00:00+00:00,2025-06-23 04:00:00+00:00,<http://example.org/maritime#Zone_US_EEZ>


In [4]:
PATTERN = r".*#(\w+)>"

### Define methods for knowledge retrieval

In [5]:
def extract_events(ship: str, event_type: str) -> DataFrame:
    """Returns a series of events involving given ship"""

    query = f"""
        SELECT ?event
        WHERE {{
            ?event a :{event_type} ;
                :involvesVessel :{ship} 
        }}
    """
    with conn.executeTupleQuery(query) as result:
        df = result.toPandas()
    
    if df.empty:
        return pd.Series([])
    
    events = df["event"].str.extract(PATTERN, expand = False)
    
    return events

In [6]:
extract_events("V014", "EncounterEvent")

0    Encounter_V014_1
1    Encounter_V011_1
Name: event, dtype: object

In [7]:
def event_info(event: str) -> DataFrame:
    """
    Returns a DataFrame with info about a given event
    Returned columns: start time, end time, vessels involved, 
    """

    query = f"""
        SELECT ?start ?end ?vessel ?zone ?latitude ?longitude
        WHERE {{
            :{event}
                :startTime ?start ;
                :endTime ?end ;
                :involvesVessel ?vessel ;
                :occurredIn ?zone ;
                :latitude ?latitude ;
                :longitude ?longitude 
        }}
    """
    with conn.executeTupleQuery(query) as result:
        df = result.toPandas()

    vessels = df["vessel"].str.extract(PATTERN, expand = False)
    df = df.loc[[0], ["start", "end", "zone", "latitude", "longitude"]]
    df["vessels"] = [vessels.values]
    
    return df

In [8]:
event_info("Encounter_V014_1")

Unnamed: 0,start,end,zone,latitude,longitude,vessels
0,2025-06-20 18:30:00+00:00,2025-06-20 20:30:00+00:00,<http://example.org/maritime#Zone_NO_EEZ>,64.0,6.9,"[V018, V014]"


In [9]:
def ship_info(ship: str) -> DataFrame:
    """
    Returns a dataframe with info about a given ship
    Returned columns: name, type, call sign, country, owner
    """

    query = f"""
        SELECT ?name ?type ?call_sign ?country ?owner
        WHERE {{
            :{ship}
                rdfs:label ?name ;
                :type ?type ;
                :callsign ?call_sign ;
                :flagState ?country ;
                :ownedBy ?owner
        }}
    """
    with conn.executeTupleQuery(query) as result:
        df = result.toPandas()
    return df

In [10]:
ship_info("V014")

Unnamed: 0,name,type,call_sign,country,owner
0,RV Arctic Dawn,research,LNAD,<http://example.org/maritime#Country_NO>,<http://example.org/maritime#PelagosResearch>


In [11]:
def country_info(country: str) -> DataFrame:
    """
    Returns a dataframe with info about given country
    Returned columns: country name
    """

    query = f"""
        SELECT ?name
        WHERE {{
            :{country}
                rdfs:label ?name
        }}
    """

    with conn.executeTupleQuery(query) as result:
        df = result.toPandas()
    return df

In [12]:
country_info("Country_NO")

Unnamed: 0,name
0,Norway


In [13]:
def owner_info(owner: str) -> DataFrame:
    """
    Returns a dataframe with info about given vessel owner
    Returned columns: name, country
    Each row represents an instance of a ship owned by the owner
    """

    query = f"""
        SELECT ?name ?country
        WHERE {{
            :{owner}
                rdfs:label ?name ;
                :countryOfIncorporation ?country
        }}
    """
    with conn.executeTupleQuery(query) as result:
        df = result.toPandas()
    return df

In [14]:
owner_info("PacificLines")

Unnamed: 0,name,country
0,PacificLines,<http://example.org/maritime#Country_MH>
1,PacificLines,<http://example.org/maritime#Country_MH>
2,PacificLines,<http://example.org/maritime#Country_MH>
3,PacificLines,<http://example.org/maritime#Country_MH>
4,PacificLines,<http://example.org/maritime#Country_RU>
5,PacificLines,<http://example.org/maritime#Country_RU>
6,PacificLines,<http://example.org/maritime#Country_RU>
7,PacificLines,<http://example.org/maritime#Country_RU>
8,PacificLines,<http://example.org/maritime#Country_CN>
9,PacificLines,<http://example.org/maritime#Country_CN>


In [15]:
def zone_info(zone: str) -> DataFrame:
    """
    Returns a dataframe with info about given zone
    Returned columns: name, geometry
    """

    query = f"""
        PREFIX geo: <http://www.opengis.net/ont/geosparql#> 
        SELECT ?name ?geometry
        WHERE {{
            :{zone}
                rdfs:label ?name ;
                geo:hasGeometry ?geometry
        }}
    """
    with conn.executeTupleQuery(query) as result:
        df = result.toPandas()
    return df

In [16]:
zone_info("Zone_US_EEZ")

Unnamed: 0,name,geometry
0,United States EEZ,_:b99221FAFx590


Example 1: explain event "Encounter_V014_1". This is an encounter event between vessels V014 and V018 in Norway.

In [17]:
# extract basic info
event = "Encounter_V014_1"
event_summary = event_info(event)
event_summary

Unnamed: 0,start,end,zone,latitude,longitude,vessels
0,2025-06-20 18:30:00+00:00,2025-06-20 20:30:00+00:00,<http://example.org/maritime#Zone_NO_EEZ>,64.0,6.9,"[V018, V014]"


In [18]:
# extract vessels involved
vessels = event_summary["vessels"].iloc[0]
for vessel in vessels:
    display(ship_info(vessel))

Unnamed: 0,name,type,call_sign,country,owner
0,MS Baltic Pearl,passenger,LNBP,<http://example.org/maritime#Country_NO>,<http://example.org/maritime#NordicFerries>


Unnamed: 0,name,type,call_sign,country,owner
0,RV Arctic Dawn,research,LNAD,<http://example.org/maritime#Country_NO>,<http://example.org/maritime#PelagosResearch>


In [19]:
event_types = ["EncounterEvent", "LoiteringEvent", "FishingEvent", "PortVisitEvent", "AISGapEvent", "WeatherEvent"]
related_events = set()

# extract related events
for vessel in vessels:
    for event_type in event_types:
        vessel_events = extract_events(vessel, event_type)
        for vessel_event in vessel_events:
            related_events.add((vessel_event, event_type))

for event, event_type in related_events:
    display(event_info(event))

Unnamed: 0,start,end,zone,latitude,longitude,vessels
0,2025-06-08 03:00:00+00:00,2025-06-08 06:00:00+00:00,<http://example.org/maritime#Zone_NO_EEZ>,63.4,9.1,[V014]


Unnamed: 0,start,end,zone,latitude,longitude,vessels
0,2025-06-20 18:30:00+00:00,2025-06-20 20:30:00+00:00,<http://example.org/maritime#Zone_NO_EEZ>,64.0,6.9,"[V018, V014]"


Unnamed: 0,start,end,zone,latitude,longitude,vessels
0,2025-06-18 11:45:00+00:00,2025-06-18 14:45:00+00:00,<http://example.org/maritime#Zone_AU_EEZ>,-25.4,115.6,[V018]


Unnamed: 0,start,end,zone,latitude,longitude,vessels
0,2025-06-23 02:00:00+00:00,2025-06-23 04:00:00+00:00,<http://example.org/maritime#Zone_US_EEZ>,36.1,-122.4,"[V014, V011]"


Unnamed: 0,start,end,zone,latitude,longitude,vessels
0,2025-06-21 07:00:00+00:00,2025-06-21 12:00:00+00:00,<http://example.org/maritime#Zone_NO_EEZ>,65.8,7.9,[V018]


### Compile knowledge into a JSON schema

In [20]:
# construct schema to represent a cluster of events & related entities
from pydantic import BaseModel

class Ship(BaseModel):
    id: str 
    name: str
    type: str
    country: str
    owner: str

class Event(BaseModel):
    id: str
    type: str
    start_time: str 
    end_time: str
    zone: str 
    latitude: str
    longitude: str 
    actors: list[Ship]

class Cluster(BaseModel):
    main_event: Event
    related_events: list[Event]

In [45]:
# construct example cluster
events = []

for event, event_type in related_events:
    event_summary = event_info(event)
    zone_id = event_summary["zone"].str.extract(PATTERN, expand = False).iloc[0]
    actor_ids = event_summary["vessels"].iloc[0]
    actors = []

    for actor_id in actor_ids:
        ship_summary = ship_info(actor_id)
        country_id = ship_summary["country"].str.extract(PATTERN, expand = False).iloc[0]
        owner_id = ship_summary["owner"].str.extract(PATTERN, expand = False).iloc[0]
        actors.append(
            Ship(
                id = actor_id, 
                name = ship_summary["name"].iloc[0], 
                type = ship_summary["type"].iloc[0],
                country = country_info(country_id)["name"].iloc[0],
                owner = owner_info(owner_id)["name"].iloc[0]
            )
        )

    if event == "Encounter_V014_1":
        main_event = Event(
            id = event,
            type = event_type,
            start_time = event_summary["start"].astype(str).iloc[0],
            end_time = event_summary["end"].astype(str).iloc[0],
            zone = zone_info(zone_id)["name"].iloc[0],
            latitude = event_summary["latitude"].astype(str).iloc[0],
            longitude = event_summary["longitude"].astype(str).iloc[0],
            actors = actors
        )
    else:
        events.append(
            Event(
                id = event,
                type = event_type,
                start_time = event_summary["start"].astype(str).iloc[0],
                end_time = event_summary["end"].astype(str).iloc[0],
                zone = zone_info(zone_id)["name"].iloc[0],
                latitude = event_summary["latitude"].astype(str).iloc[0],
                longitude = event_summary["longitude"].astype(str).iloc[0],
                actors = actors
            )
        )
example_cluster = Cluster(main_event = main_event, related_events = events)
example_cluster

Cluster(main_event=Event(id='Encounter_V014_1', type='EncounterEvent', start_time='2025-06-20 18:30:00+00:00', end_time='2025-06-20 20:30:00+00:00', zone='Norway EEZ', latitude='64.0', longitude='6.9', actors=[Ship(id='V018', name='MS Baltic Pearl', type='passenger', country='Norway', owner='NordicFerries'), Ship(id='V014', name='RV Arctic Dawn', type='research', country='Norway', owner='PelagosResearch')]), related_events=[Event(id='Loiter_V014_1', type='LoiteringEvent', start_time='2025-06-08 03:00:00+00:00', end_time='2025-06-08 06:00:00+00:00', zone='Norway EEZ', latitude='63.4', longitude='9.1', actors=[Ship(id='V014', name='RV Arctic Dawn', type='research', country='Norway', owner='PelagosResearch')]), Event(id='Loiter3', type='LoiteringEvent', start_time='2025-06-18 11:45:00+00:00', end_time='2025-06-18 14:45:00+00:00', zone='Australia EEZ', latitude='-25.4', longitude='115.6', actors=[Ship(id='V018', name='MS Baltic Pearl', type='passenger', country='Norway', owner='NordicFerri

In [46]:
example_cluster_json = example_cluster.model_dump_json()

### Prompt engineering

In [47]:
client = OpenAI()

In [None]:
SYSTEM_PROMPT = """
    You are a data scientist at a maritime research company. You are using a knowledge graph, curated from AIS data, to study the behavior of shipping vessels.

    Given data in JSON format that consists of information about a main event and related events/entities, your task is the following:
        1. Give a brief summary of this event (include information about type, actors, location, duration)
        2. Provide a descriptions of the actors involved
        3. Describe related events
        4. Compile the above to identify any potential anomalies in vessel behavior (if there are no anomalies & activity is normal, explicitly state that)

    Examples of anomalies:
        1. Encounter: e.g., Coast Guard and Fishing Vessel enter same EEZ and are co-located withi.env....fffffn 1 nautical mile for > 15 minutes.
        2. Unexplained Loitering: Vessel slows or zigzags in a region not flagged for weather, port, or declared hold pattern.
        3. Suspicious Course Deviation: A vessel’s current predicted path significantly diverges from its historical paths under similar contexts.

    Do not make any assumptions nor infer any information beyond the given data. Ensure your summaries and descriptions are in complete sentences. Each main point should have its own line.
    Make dates and times readable, and translate latitude and longitude into geographical descriptions.
"""

In [49]:
res = client.beta.chat.completions.parse(
    model = "gpt-5",
    messages = [
        {
            "role": "system", 
            "content": SYSTEM_PROMPT
        },
        {
            "role": "user", 
            "content": f"Data: {str(example_cluster_json)}"
        }
    ]
)

In [50]:
print(res.choices[0].message.content)

Event summary:
- The main event is an EncounterEvent between MS Baltic Pearl (V018, passenger) and RV Arctic Dawn (V014, research) in the Norway EEZ at 64.0, 6.9 from 2025-06-20 18:30:00+00:00 to 2025-06-20 20:30:00+00:00 (duration: 2 hours).

Actor descriptions:
- V018 — MS Baltic Pearl: passenger vessel, country Norway, owner NordicFerries.
- V014 — RV Arctic Dawn: research vessel, country Norway, owner PelagosResearch.

Related events:
- Loiter_V014_1: LoiteringEvent involving RV Arctic Dawn (V014) in the Norway EEZ at 63.4, 9.1 from 2025-06-08 03:00:00+00:00 to 2025-06-08 06:00:00+00:00 (duration: 3 hours).
- Loiter3: LoiteringEvent involving MS Baltic Pearl (V018) in the Australia EEZ at -25.4, 115.6 from 2025-06-18 11:45:00+00:00 to 2025-06-18 14:45:00+00:00 (duration: 3 hours).
- Encounter_V011_1: EncounterEvent involving RV Arctic Dawn (V014) and MV Samurai Wind (V011, cargo, Japan, owner NipponCargo) in the United States EEZ at 36.1, -122.4 from 2025-06-23 02:00:00+00:00 to 20