In [8]:
import os
import glob
import duckdb
import pandas as pd
import folium
from folium.plugins import MarkerCluster
from IPython.display import display

def get_filtered_ais_data_spatial(base_path, conditions_dict, center_lat, center_lon, buffer_radius_m, max_rows=None):
    """
    Leser alle Parquet-filer i 'base_path' (rekursivt) ved hjelp av DuckDB og filtrerer dataene basert på:
      - conditions_dict: Et dictionary med filterbetingelser, for eksempel {"hour": 11, "mmsi": 257565700, "artreslag": 10, "date_time_utc": "2024-12-01"}
        (Hvis en verdi settes til None eller 0, ignoreres filterbetingelsen for den kolonnen.)
      - Buffer-filter: Kun hent data der avstanden fra senterpunktet (center_lat, center_lon) er innenfor buffer_radius_m (i meter).

    Merk: For avstandsfiltrering omregnes meter til grader med tilnærmingen 1 grad ≈ 111000 m.

    Returnerer:
      En pandas DataFrame med de filtrerte AIS-dataene.
    """
    # Bygg filmønster for alle Parquet-filer i mappen (rekursivt)
    parquet_pattern = os.path.join(base_path, "**", "*.parquet")

    matching_files = glob.glob(parquet_pattern, recursive=True)
    if not matching_files:
        raise IOError(f"Ingen filer funnet som matcher: {parquet_pattern}")
    else:
        print(f"Antall funne filer: {len(matching_files)}")

    con = duckdb.connect()
    con.execute("INSTALL spatial;")
    con.execute("LOAD spatial;")

    query = f"SELECT * FROM read_parquet('{parquet_pattern}')"

    # Bygg WHERE-betingelser fra conditions_dict
    where_parts = []
    for col, val in conditions_dict.items():
        if val is not None and val != 0:
            if isinstance(val, str):
                where_parts.append(f"{col} = '{val}'")
            else:
                where_parts.append(f"{col} = {val}")

    # Legg til buffer-filtrering dersom senter og bufferradius er oppgitt
    if center_lat is not None and center_lon is not None and buffer_radius_m is not None:
        radius_deg = buffer_radius_m / 111000.0
        buffer_filter = f"ST_Distance(geometry, ST_GeomFromText('POINT({center_lon} {center_lat})')) <= {radius_deg}"
        where_parts.append(buffer_filter)

    if where_parts:
        query += " WHERE " + " AND ".join(where_parts)

    query += " ORDER BY date_time_utc"

    if max_rows is not None:
        query += f" LIMIT {max_rows}"

    print("\nKjørende spørring:")
    print(query)

    df = con.execute(query).fetchdf()
    con.close()
    return df

def render_ais_map(df, center_lat, center_lon, buffer_radius_m):
    """
    Lager et interaktivt Folium-kart med:
      - Senter (center_lat, center_lon)
      - En sirkel som markerer bufferområdet (buffer_radius_m i meter)
      - Et MarkerCluster-lag med markører for hvert datapunkt basert på 'longitude' og 'latitude'

    Parametere:
      df: pandas DataFrame med AIS-data (forventer kolonnene "longitude", "latitude", "mmsi", "date_time_utc").
      center_lat, center_lon: Senterkoordinat (EPSG:4326)
      buffer_radius_m: Bufferradius i meter
    Returnerer:
      Et interaktivt Folium-kart med visualiserte data.
    """
    m = folium.Map(location=[center_lat, center_lon], zoom_start=12, tiles="cartodb positron")

    folium.Circle(
        location=[center_lat, center_lon],
        radius=buffer_radius_m,
        color='blue',
        fill=True,
        fill_opacity=0.1,
        popup="Bufferområde"
    ).add_to(m)

    marker_cluster = MarkerCluster().add_to(m)
    for idx, row in df.iterrows():
        if pd.notnull(row.get('longitude')) and pd.notnull(row.get('latitude')):
            popup_text = (
                f"MMSI: {row.get('mmsi', '')}<br>"
                f"Date: {row.get('date_time_utc', '')}<br>"
                f"Lon: {row['longitude']:.4f}, Lat: {row['latitude']:.4f}"
            )
            folium.Marker(
                location=[row['latitude'], row['longitude']],
                popup=popup_text,
                icon=folium.Icon(color='red', icon='info-sign')
            ).add_to(marker_cluster)

    return m


In [30]:

base_path = "/Users/johannehaakenstad/Bachelor-Filer/Github-KartAI/Johanne/data/processed/."

# se og endre disse parameterne:
# NB Må ha dato for at den riktige filen skal leses
filter_cols = ["date_time_utc", "ship_type"]
filter_vals = ["2025-01-21", 51]

if len(filter_cols) != len(filter_vals):
    raise ValueError("filter_cols og filter_vals må ha lik lengde!")

conditions = {col: val for col, val in zip(filter_cols, filter_vals)}
print("\nBruker følgende filterbetingelser:")
for k, v in conditions.items():
    print(f"  {k} = {v}")

# Angi senterpunkt for bufferen (EPSG:4326) og bufferradius i meter:
center_lat, center_lon = 58.142359, 8.025218
buffer_radius_m = 6000 # for eksempel 10 km

# Angi maks antall rader du ønsker å hente:
max_rows = 100

# Hent filtrerte AIS-data med de spesifiserte parameterne:
df_filtered = get_filtered_ais_data_spatial(
    base_path,
    conditions,
    center_lat=center_lat,
    center_lon=center_lon,
    buffer_radius_m=buffer_radius_m,
    max_rows=max_rows
)

print("\nAntall rader hentet:", len(df_filtered))
display(df_filtered.head(10))

# Visualiser de filtrerte dataene i et interaktivt Folium-kart med marker clustering:
ais_map = render_ais_map(df_filtered, center_lat, center_lon, buffer_radius_m)
display(ais_map)



Bruker følgende filterbetingelser:
  date_time_utc = 2025-01-21
  ship_type = 51
Antall funne filer: 25

Kjørende spørring:
SELECT * FROM read_parquet('/Users/johannehaakenstad/Bachelor-Filer/Github-KartAI/Johanne/data/processed/./**/*.parquet') WHERE CAST(date_time_utc AS DATE) = '2025-01-21' AND ship_type = 51 AND ST_Distance(geometry, ST_GeomFromText('POINT(8.025218 58.142359)')) <= 0.05405405405405406 ORDER BY date_time_utc LIMIT 100

Antall rader hentet: 100


Unnamed: 0,date_time_utc,mmsi,longitude,latitude,status,course_over_ground,speed_over_ground,rate_of_turn,maneuvre,imo,...,data_source,ais_class,hex_7,hex_14,year,month,day,geometry,hour,ship_type
0,2025-01-21 12:59:34,257959900,8.070825,58.113528,0,288.7,28.1,0.0,0.0,0,...,G,A,608155174191497215,639680371570173263,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
1,2025-01-21 12:59:36,257959900,8.070363,58.113613,0,288.6,28.1,0.0,0.0,0,...,G,A,608155174191497215,639680371570174247,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
2,2025-01-21 12:59:38,257959900,8.069903,58.113692,0,288.7,28.2,0.0,0.0,0,...,G,A,608155174191497215,639680371570121543,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
3,2025-01-21 12:59:40,257959900,8.069437,58.113775,0,288.8,28.2,0.0,0.0,0,...,G,A,608155174191497215,639680371569533239,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
4,2025-01-21 12:59:42,257959900,8.068973,58.11386,0,288.8,28.2,127.0,0.0,0,...,G,A,608155174191497215,639680371569550167,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
5,2025-01-21 12:59:44,257959900,8.06851,58.113948,0,289.2,28.2,127.0,0.0,0,...,G,A,608155174191497215,639680371569542967,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
6,2025-01-21 12:59:45,257959900,8.06828,58.113993,0,289.4,28.1,127.0,0.0,0,...,G,A,608155174191497215,639680371569541391,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
7,2025-01-21 12:59:46,257959900,8.068052,58.114035,0,289.4,28.0,0.0,0.0,0,...,G,A,608155174191497215,639680371569543511,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
8,2025-01-21 12:59:48,257959900,8.067593,58.114125,0,289.5,28.0,127.0,0.0,0,...,G,A,608155174191497215,639680371569671479,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
9,2025-01-21 12:59:50,257959900,8.067138,58.114215,0,290.1,27.9,127.0,0.0,0,...,G,A,608155174191497215,639680371569656159,2025,1,21,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...",12,51
