In [0]:
import geopandas as gpd
from shapely.geometry import Point
import shutil
import os
import requests
from typing import Tuple
import time

In [0]:
catalog_dev = "`land_topografisk-gdb_dev`"
schema_dev = "ai2025"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
table = "predicted_gold"

In [0]:
def get_fylke_and_kommune(east: float, north: float) -> Tuple[int, int]:
    """Returns the fylke and kommune for a given coordinate"""
    url = f"https://api.kartverket.no/kommuneinfo/v1/punkt?nord={north}&ost={east}&koordsys=25833"
    response = requests.get(url, timeout=10)
    time.sleep(2)
    if response.status_code == 200:
        data = response.json()
        fylke = data.get("fylkesnummer")
        kommune = data.get("kommunenummer")
        return fylke, kommune
    else:
        return None, None

In [0]:
local_gpkg = "/tmp/predicted.gpkg"
final_gpkg_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/geopackages/"
final_gpkg_path = os.path.join(final_gpkg_dir, "predicted.gpkg")

if os.path.exists(local_gpkg):
    os.remove(local_gpkg)

df = spark.read.table(table)
pdf = df.toPandas()

pdf['geometry'] = pdf.apply(lambda row: Point(row['centroid_x'], row['centroid_y']), axis=1)
fylke_kommune = pdf.apply(lambda row: get_fylke_and_kommune(row['centroid_x'], row['centroid_y']), axis=1)
pdf['fylke'], pdf['kommune'] = zip(*fylke_kommune)
pdf = pdf.drop(columns=['centroid_x', 'centroid_y'])
gdf = gpd.GeoDataFrame(pdf, geometry='geometry')
gdf.set_crs(epsg=25833, inplace=True)

categories = gdf['category'].unique()
for i, cat in enumerate(categories):
    mode = "w" if i == 0 else "a"
    gdf[gdf['category'] == cat].to_file(local_gpkg, driver="GPKG", layer=f"predicted_{cat}", mode=mode)


shutil.move(local_gpkg, final_gpkg_path)