In [1]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [38]:
import os, glob
import duckdb
import pandas as pd
import folium
from folium.plugins import MarkerCluster
from IPython.display import display
import ipywidgets as widgets
from ipywidgets import interact, fixed

# Brukerhistorie 1, Kjør funksjonene nedenfor



def get_filtered_ais_data_spatial(
    base_path,
    conditions_dict,
    center_lat, center_lon,
    buffer_radius_m,
    max_rows=None
):

    """
    Leser alle Parquet-filer i 'base_path' (rekursivt) ved hjelp av DuckDB og filtrerer dataene basert på:
      - conditions_dict: Et dictionary med filterbetingelser, for eksempel {"hour": 11, "mmsi": 257565700, "artreslag": 10, "date_time_utc": "2024-12-01"}
        (Hvis en verdi settes til None eller 0, ignoreres filterbetingelsen for den kolonnen.)
      - Buffer-filter: Kun hent data der avstanden fra senterpunktet (center_lat, center_lon) er innenfor buffer_radius_m (i meter).

    Merk: For avstandsfiltrering omregnes meter til grader med tilnærmingen 1 grad ≈ 111000 m.

    Returnerer:
      En pandas DataFrame med de filtrerte AIS-dataene.
    """

    # 1) Glob-mønster som fanger begge .parquet‐suffixene:
    parquet_pattern = os.path.join(base_path, "**", "*.parquet*")

    # Debug: sjekk antall filer
    matching_files = glob.glob(parquet_pattern, recursive=True)
    if not matching_files:
        raise IOError(f"Ingen filer funnet som matcher: {parquet_pattern}")
    print(f"Antall funne filer: {len(matching_files)}")

    # 2) Koble opp DuckDB + spatial
    con = duckdb.connect()
    con.execute("INSTALL spatial;")
    con.execute("LOAD spatial;")

    # 3) Bygg SELECT
    query = f"SELECT * FROM read_parquet('{parquet_pattern}')"

    # 4) Dynamisk WHERE-del
    where = []
    for col, val in conditions_dict.items():
        if val is None or val == 0:
            continue
        if col.lower() == "date_time_utc":
            # kast bort tid, sammenlign kun dato‐delen:
            where.append(f"CAST(date_time_utc AS DATE) = '{val}'")
        else:
            if isinstance(val, str):
                where.append(f"{col} = '{val}'")
            else:
                where.append(f"{col} = {val}")

    # 5) Buffer‐filter i grader:
    if center_lat is not None and center_lon is not None and buffer_radius_m is not None:
        radius_deg = buffer_radius_m / 111000.0
        where.append(
            f"ST_Distance(geometry, ST_GeomFromText('POINT({center_lon} {center_lat})')) <= {radius_deg}"
        )

    if where:
        query += " WHERE " + " AND ".join(where)

    query += " ORDER BY date_time_utc"
    if max_rows is not None:
        query += f" LIMIT {max_rows}"

    print("\nKjørende spørring:\n", query)
    df = con.execute(query).fetchdf()
    con.close()
    return df

def render_ais_map(df, center_lat, center_lon, buffer_radius_m):
    """
    Lager et interaktivt Folium-kart med:
      - Senter (center_lat, center_lon)
      - En sirkel som markerer bufferområdet (buffer_radius_m i meter)
      - Et MarkerCluster-lag med markører for hvert datapunkt basert på 'longitude' og 'latitude'

    Parametere:
      df: pandas DataFrame med AIS-data (forventer kolonnene "longitude", "latitude", "mmsi", "date_time_utc").
      center_lat, center_lon: Senterkoordinat (EPSG:4326)
      buffer_radius_m: Bufferradius i meter
    Returnerer:
      Et interaktivt Folium-kart med visualiserte data.
    """
    m = folium.Map(location=[center_lat, center_lon], zoom_start=12, tiles="cartodb positron")

    folium.Circle(
        location=[center_lat, center_lon],
        radius=buffer_radius_m,
        color='blue',
        fill=True,
        fill_opacity=0.1,
        popup="Bufferområde"
    ).add_to(m)

    marker_cluster = MarkerCluster().add_to(m)
    for idx, row in df.iterrows():
        if pd.notnull(row.get('longitude')) and pd.notnull(row.get('latitude')):
            popup_text = (
                f"MMSI: {row.get('mmsi', '')}<br>"
                f"Date: {row.get('date_time_utc', '')}<br>"
                f"Lon: {row['longitude']:.4f}, Lat: {row['latitude']:.4f}"
            )
            folium.Marker(
                location=[row['latitude'], row['longitude']],
                popup=popup_text,
                icon=folium.Icon(color='red', icon='info-sign')
            ).add_to(marker_cluster)

    return m


## Sett inn verdier og kjør funskjonen


base_path = "./data/processed/."

# se og endre disse parameterne:
# NB Må ha dato for at den riktige filen skal leses
filter_cols = ["date_time_utc"]
filter_vals = ["2025-01-21"]

if len(filter_cols) != len(filter_vals):
    raise ValueError("filter_cols og filter_vals må ha lik lengde!")

conditions = {col: val for col, val in zip(filter_cols, filter_vals)}
print("\nBruker følgende filterbetingelser:")
for k, v in conditions.items():
    print(f"  {k} = {v}")

# Angi senterpunkt for bufferen (EPSG:4326) og bufferradius i meter:
center_lat, center_lon = 58.142359, 8.025218
buffer_radius_m = 6000 # for eksempel 10 km

# Angi maks antall rader du ønsker å hente:
max_rows = 10

# Hent filtrerte AIS-data med de spesifiserte parameterne:
df_filtered = get_filtered_ais_data_spatial(
    base_path,
    conditions,
    center_lat=center_lat,
    center_lon=center_lon,
    buffer_radius_m=buffer_radius_m,
    max_rows=max_rows
)

print("\nAntall rader hentet:", len(df_filtered))
display(df_filtered.head(10))

# Visualiser de filtrerte dataene i et interaktivt Folium-kart med marker clustering:
ais_map = render_ais_map(df_filtered, center_lat, center_lon, buffer_radius_m)
display(ais_map)


import os, glob, duckdb, pandas as pd
import folium
from folium.plugins import MarkerCluster
from IPython.display import display

def get_filtered_ais_data_spatial(
    base_path,
    conditions_dict,
    center_lat, center_lon,
    buffer_radius_m,
    start_time: str = None,
    end_time:   str = None,
    max_rows:   int = None
) -> pd.DataFrame:
    """
    Leser partisjonerte Parquet-filer med DuckDB og filtrerer på:
      • Vilkår i conditions_dict (eks. {"mmsi": 257565700})
      • En valgfri dato‐range: start_time ≤ date_time_utc ≤ end_time
      • En bufferradius rundt (center_lat, center_lon)
    Hvis start_time eller end_time er None, ignoreres den delen av filteret.
    """

    # 1) Finn alle .parquet*-filer rekursivt
    parquet_pattern = os.path.join(base_path, "**", "*.parquet*")
    files = glob.glob(parquet_pattern, recursive=True)
    if not files:
        raise IOError(f"Ingen filer funnet for mønster {parquet_pattern!r}")
    print(f"Fant {len(files)} parquet-filer")

    # 2) Koble til DuckDB + spatial
    con = duckdb.connect()
    con.execute("INSTALL spatial;")
    con.execute("LOAD spatial;")

    # 3) Bygg SELECT
    query = f"SELECT * FROM read_parquet('{parquet_pattern}')"
    where = []

    # 4) Vilkår fra conditions_dict
    for col, val in conditions_dict.items():
        if val is None or val == 0:
            continue
        if isinstance(val, str):
            where.append(f"{col} = '{val}'")
        else:
            where.append(f"{col} = {val}")

    # 5) Legg på tids‐range hvis oppgitt
    #    Forutsetter at start_time/end_time er ISO‐strenger: 'YYYY-MM-DD HH24:MI:SS'
    if start_time:
        where.append(f"date_time_utc >= TIMESTAMP '{start_time}'")
    if end_time:
        where.append(f"date_time_utc <= TIMESTAMP '{end_time}'")

    # 6) Buffer‐filter
    if center_lat is not None and center_lon is not None and buffer_radius_m is not None:
        radius_deg = buffer_radius_m / 111000.0
        where.append(
            f"ST_Distance(geometry, ST_GeomFromText('POINT({center_lon} {center_lat})')) <= {radius_deg}"
        )

    if where:
        query += " WHERE " + " AND ".join(where)

    query += " ORDER BY date_time_utc"
    if max_rows:
        query += f" LIMIT {max_rows}"

    print("\nKjørende SQL:\n", query, "\n")
    df = con.execute(query).fetchdf()
    con.close()
    return df


def render_ais_map(df, center_lat, center_lon, buffer_radius_m):
    m = folium.Map(location=[center_lat, center_lon], zoom_start=12, tiles="cartodb positron")
    folium.Circle((center_lat, center_lon), radius=buffer_radius_m,
                  color='blue', fill=True, fill_opacity=0.1).add_to(m)
    cluster = MarkerCluster().add_to(m)
    for _, row in df.iterrows():
        if pd.notnull(row.longitude) and pd.notnull(row.latitude):
            folium.Marker(
                location=[row.latitude, row.longitude],
                popup=(f"MMSI: {row.mmsi}<br>"
                       f"Time: {row.date_time_utc}<br>"
                       f"Lon/Lat: {row.longitude:.4f}, {row.latitude:.4f}"),
                icon=folium.Icon(color='red', icon='info-sign')
            ).add_to(cluster)
    return m


# — Eksempel på bruk i en egen celle —
base_path = "./data/processed"
conds     = {} #Skriv inn hvilken parametere du vil filtrere på
# Filtrer alle data mellom 2024-12-01 08:00:00 og 2024-12-01 12:00:00
df = get_filtered_ais_data_spatial(
    base_path, conds,
    center_lat=58.14, center_lon=8.02,
    buffer_radius_m=6000,
    start_time="2025-01-21 13:00:00",
    end_time  ="2025-01-21 23:00:00",
    max_rows   = 50
)
print("Rader:", len(df))
display(df.head())

# Hvis du vil ignorere tids‐range, bare dropp start_time/end_time:
df2 = get_filtered_ais_data_spatial(
    base_path, conds,
    center_lat=58.14, center_lon=8.02,
    buffer_radius_m=6000
)


import os, glob
import duckdb
import pandas as pd
import folium
from folium.plugins import MarkerCluster
from IPython.display import display

def get_filtered_ais_data_spatial(
    base_path: str,
    conditions_dict: dict,
    center_lat: float,
    center_lon: float,
    buffer_radius_m: float,
    start_time: str = None,
    end_time:   str = None,
    max_rows:   int = None
) -> pd.DataFrame:
    """
    Leser partisjonerte Parquet-filer i base_path og filtrerer på:
      1) Vilkår i conditions_dict (f.eks. {"mmsi": 257565700})
      2) Valgfri start_time/end_time (ISO‐strenger "YYYY-MM-DD HH:MM:SS")
      3) Buffer rundt (center_lat, center_lon) med radius buffer_radius_m (m)
    Hvis start_time eller end_time er None, ignoreres dem.
    """
    # 1) Finn alle .parquet* filer rekursivt
    parquet_pattern = os.path.join(base_path, "**", "*.parquet*")
    files = glob.glob(parquet_pattern, recursive=True)
    if not files:
        raise IOError(f"Ingen filer funnet for mønster {parquet_pattern!r}")
    print(f"Fant {len(files)} filer")

    # 2) Koble DuckDB + spatial
    con = duckdb.connect()
    con.execute("INSTALL spatial;")
    con.execute("LOAD spatial;")

    # 3) Bygg SELECT
    query = f"SELECT * FROM read_parquet('{parquet_pattern}')"
    where = []

    # 4) Legg til conditions_dict‐filtre
    for col, val in conditions_dict.items():
        if val is None or val == 0:
            continue
        if isinstance(val, str):
            where.append(f"{col} = '{val}'")
        else:
            where.append(f"{col} = {val}")

    # 5) Legg til tidsspenn
    if start_time:
        where.append(f"date_time_utc >= TIMESTAMP '{start_time}'")
    if end_time:
        where.append(f"date_time_utc <= TIMESTAMP '{end_time}'")

    # 6) Legg til buffer‐filter
    if center_lat is not None and center_lon is not None and buffer_radius_m is not None:
        radius_deg = buffer_radius_m / 111000.0
        where.append(
            f"ST_Distance(geometry, ST_GeomFromText('POINT({center_lon} {center_lat})')) <= {radius_deg}"
        )

    # 7) Sett sammen WHERE og resten
    if where:
        query += " WHERE " + " AND ".join(where)
    query += " ORDER BY date_time_utc"
    if max_rows:
        query += f" LIMIT {max_rows}"

    print("Kjørende SQL:\n", query, "\n")
    df = con.execute(query).fetchdf()
    con.close()
    return df

def render_ais_map(
    df: pd.DataFrame,
    center_lat: float,
    center_lon: float,
    buffer_radius_m: float
) -> folium.Map:
    """
    Tegner folium‐kart med:
      • Senter (center_lat, center_lon)
      • Sirkel med radius buffer_radius_m
      • MarkerCluster med punkter for hver rad i df
    """
    m = folium.Map(location=[center_lat, center_lon],
                   zoom_start=12,
                   tiles="cartodb positron")
    folium.Circle(
        location=[center_lat, center_lon],
        radius=buffer_radius_m,
        color='blue',
        fill=True,
        fill_opacity=0.1,
        popup="Bufferområde"
    ).add_to(m)

    cluster = MarkerCluster().add_to(m)
    for _, row in df.iterrows():
        if pd.notnull(row.longitude) and pd.notnull(row.latitude):
            folium.Marker(
                location=[row.latitude, row.longitude],
                popup=(
                    f"MMSI: {row.mmsi}<br>"
                    f"Time: {row.date_time_utc}<br>"
                    f"Lon/Lat: {row.longitude:.4f}, {row.latitude:.4f}"
                ),
                icon=folium.Icon(color='red', icon='info-sign')
            ).add_to(cluster)
    return m

# — Eksempel på bruk i Jupyter Notebook (i egen celle) —

# 1) Angi sti til dine partisjonerte AIS-filer
base_path = "./data/processed"

# 2) Sett opp filterkriterier
conditions = {
   "ship_type": 51
}

# 3) Skriv inn ønsket tidsrom (eller la dem stå None)
start_time = "2025-01-21 13:00:00"
end_time   = "2025-01-21 14:00:00"

# 4) Definer buffer‐parametre
center_lat     = 58.142359
center_lon     = 8.025218
buffer_radius_m = 6000   # 6 km

# 5) Hent maks 50 rader
df = get_filtered_ais_data_spatial(
    base_path,
    conditions,
    center_lat, center_lon,
    buffer_radius_m,
    start_time=start_time,
    end_time=end_time,
    max_rows=10
)
print("Rader hentet:", len(df))
display(df.head())

# 6) Tegn kartet
m = render_ais_map(df, center_lat, center_lon, buffer_radius_m)
display(m)


In [41]:


def get_filtered_ais_data_spatial(
    base_path: str,
    ship_type: int,
    center_lat: float,
    center_lon: float,
    buffer_radius_m: float,
    start_time: str,
    end_time: str,
    max_rows: int
) -> pd.DataFrame:
    conditions_dict = {"ship_type": ship_type}

    parquet_pattern = os.path.join(base_path, "**", "*.parquet*")
    files = glob.glob(parquet_pattern, recursive=True)
    if not files:
        raise IOError(f"Ingen filer funnet for mønster {parquet_pattern!r}")

    con = duckdb.connect()
    con.execute("INSTALL spatial;")
    con.execute("LOAD spatial;")

    query = f"SELECT * FROM read_parquet('{parquet_pattern}')"
    where = []

    for col, val in conditions_dict.items():
        if val:
            where.append(f"{col} = {val}")

    if start_time:
        where.append(f"date_time_utc >= TIMESTAMP '{start_time}'")
    if end_time:
        where.append(f"date_time_utc <= TIMESTAMP '{end_time}'")

    radius_deg = buffer_radius_m / 111000.0
    where.append(
        f"ST_Distance(geometry, ST_GeomFromText('POINT({center_lon} {center_lat})')) <= {radius_deg}"
    )

    if where:
        query += " WHERE " + " AND ".join(where)
    query += " ORDER BY date_time_utc"
    if max_rows:
        query += f" LIMIT {max_rows}"

    df = con.execute(query).fetchdf()
    con.close()
    return df

def render_ais_map(
    df: pd.DataFrame,
    center_lat: float,
    center_lon: float,
    buffer_radius_m: float
) -> folium.Map:
    m = folium.Map(location=[center_lat, center_lon],
                   zoom_start=12,
                   tiles="cartodb positron")
    folium.Circle(
        location=[center_lat, center_lon],
        radius=buffer_radius_m,
        color='blue',
        fill=True,
        fill_opacity=0.1,
        popup="Bufferområde"
    ).add_to(m)

    cluster = MarkerCluster().add_to(m)
    for _, row in df.iterrows():
        if pd.notnull(row.longitude) and pd.notnull(row.latitude):
            folium.Marker(
                location=[row.latitude, row.longitude],
                popup=(
                    f"MMSI: {row.mmsi}<br>"
                    f"Time: {row.date_time_utc}<br>"
                    f"Lon/Lat: {row.longitude:.4f}, {row.latitude:.4f}"
                ),
                icon=folium.Icon(color='red', icon='info-sign')
            ).add_to(cluster)
    return m

# GUI-komponenter med ipywidgets
def interactive_ais_query():
    interact(run_query,
        base_path=widgets.Text(value='./data/processed', description='Base Path:'),
        ship_type=widgets.IntText(value=51, description='Ship Type:'),
        center_lat=widgets.FloatText(value=58.142359, description='Center Lat:'),
        center_lon=widgets.FloatText(value=8.025218, description='Center Lon:'),
        buffer_radius_m=widgets.IntText(value=6000, description='Radius (m):'),
        start_time=widgets.Text(value='2025-01-21 13:00:00', description='Start Time:'),
        end_time=widgets.Text(value='2025-01-21 14:00:00', description='End Time:'),
        max_rows=widgets.IntText(value=10, description='Max Rows:')
    )

  # Ferdig-knapp
done_button = widgets.Button(
    description="Done",
    button_style='info',
    layout=widgets.Layout(width='150px')
)


def run_query(base_path, ship_type, center_lat, center_lon, buffer_radius_m, start_time, end_time, max_rows):
    df = get_filtered_ais_data_spatial(
        base_path,
        ship_type,
        center_lat, center_lon,
        buffer_radius_m,
        start_time,
        end_time,
        max_rows
    )

    print("Rader hentet:", len(df))
    display(df.head()) #Ta bort head for å vise hele tabellen

    m = render_ais_map(df, center_lat, center_lon, buffer_radius_m)
    display(m)

# Kjør GUI
interactive_ais_query()

interactive(children=(Text(value='./data/processed', description='Base Path:'), IntText(value=51, description=…