# Konvertering av N50 Vektor til GeoParquet

## 1. Last ned N50 Kartdata

![Geonorge N50](img/geonorge-n50.png)

Vektordata kan fritt lastes ned fra kartkatalogen.geonorge.no, du finner N50 [Her](https://kartkatalog.geonorge.no/metadata/n50-kartdata/ea192681-d039-42ec-b1bc-f3ce04c189ac)

Til denne demoen brukes:
- `Hele landet`
- `UTM 33`
- `FGBD`

![Geonorge Last ned](img/geonorge-lastned.png)

Du får en zip fil når du laster ned, pakk ut denne i `src/geoparquet`. Git er satt opp til å ikke tracke `.gdb` mapper.

In [None]:
# import packages
import fiona
import geopandas as gpd
from pathlib import Path

import utils

## Data input

Starter med å lese ut alle lagene som finnes i `.gdb` filen.

In [None]:
# Leser alle lagene fra.gdb filen
path = utils.get_workdir() / Path("Basisdata_0000_Norge_25833_N50Kartdata_FGDB.gdb")
layers = fiona.listlayers(path)
print(layers)

## Parquet produksjon

For hvert lag produseres en parquet fil i `/out` mappen.

In [None]:
# Oppretter /out mappen
utils.create_dir("out")
# Leser hvert lag fra .gdb filen og skriver til geoparquet
for layer in layers:
    try:
        n50_df = gpd.read_file(path, layer=layer)
        n50_df.to_parquet(
            path=f"out/{layer}.snappy.parquet",
            compression="snappy",
            geometry_encoding="WKB",
            write_covering_bbox=True,
        )
    except Exception as e:
        print(f"Failed to load layer: {layer} with error: {e}")

## Setter fil og bbox

In [35]:
file_path = 'out/N50_Samferdsel_senterlinje.snappy.parquet'
# file_path = 'out/N50_Arealdekke_omrade.snappy.parquet'
xmin = 82100
xmax = 96000
ymin = 6461800
ymax = 6470500

## Initialiserer en DuckDB database i minne

Credit: https://github.com/Norkart/syntaks-2025/blob/main/parquet-create.ipynb

In [None]:
import duckdb

# Connect to DuckDB
con = duckdb.connect(database=':memory:')

# Load the necessary extensions
con.execute("INSTALL spatial;")
con.execute("LOAD spatial;")

con.execute("INSTALL httpfs;")
con.execute("LOAD httpfs;")



## Leser ut metadata

In [37]:
query = f"""
SELECT *
FROM parquet_metadata('{file_path}')
"""
result = con.query(query).to_df()
result

Unnamed: 0,file_name,row_group_id,row_group_num_rows,row_group_num_columns,row_group_bytes,column_id,file_offset,num_values,path_in_schema,type,...,index_page_offset,dictionary_page_offset,data_page_offset,total_compressed_size,total_uncompressed_size,key_value_metadata,bloom_filter_offset,bloom_filter_length,min_is_exact,max_is_exact
0,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,0,0,1048576,objtype,BYTE_ARRAY,...,,4,40,95,91,{},,,,
1,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,1,0,1048576,datafangstdato,BYTE_ARRAY,...,,99,50966,1661061,1973720,{},,,,
2,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,2,0,1048576,oppdateringsdato,BYTE_ARRAY,...,,1661160,1674619,869326,1361871,{},,,,
3,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,3,0,1048576,sporantall,BYTE_ARRAY,...,,2530486,2530512,842,835,{},,,,
4,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,4,0,1048576,anleggstype,BYTE_ARRAY,...,,2531328,2531364,810,925,{},,,,
5,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,5,0,1048576,medium,BYTE_ARRAY,...,,2532138,2532174,35922,57357,{},,,,
6,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,6,0,1048576,malemetode,BYTE_ARRAY,...,,2568060,2568121,295708,347808,{},,,,
7,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,7,0,1048576,noyaktighet,INT32,...,,2863768,2863824,369556,462900,{},,,,
8,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,8,0,1048576,rutemerking,BYTE_ARRAY,...,,3233324,3233353,145736,160885,{},,,,
9,out/N50_Samferdsel_senterlinje.snappy.parquet,0,1048576,22,309986742,9,0,1048576,vedlikeholdsansvarlig,BYTE_ARRAY,...,,3379060,3379092,68604,85561,{},,,,


## Leser schema

In [38]:

query = f"""
SELECT *
FROM parquet_schema('{file_path}')
"""
result = con.query(query).to_df()
result

Unnamed: 0,file_name,name,type,type_length,repetition_type,num_children,converted_type,scale,precision,field_id,logical_type
0,out/N50_Samferdsel_senterlinje.snappy.parquet,schema,,,REQUIRED,19.0,,,,,
1,out/N50_Samferdsel_senterlinje.snappy.parquet,objtype,BYTE_ARRAY,,OPTIONAL,,UTF8,,,,StringType()
2,out/N50_Samferdsel_senterlinje.snappy.parquet,datafangstdato,BYTE_ARRAY,,OPTIONAL,,UTF8,,,,StringType()
3,out/N50_Samferdsel_senterlinje.snappy.parquet,oppdateringsdato,BYTE_ARRAY,,OPTIONAL,,UTF8,,,,StringType()
4,out/N50_Samferdsel_senterlinje.snappy.parquet,sporantall,BYTE_ARRAY,,OPTIONAL,,UTF8,,,,StringType()
5,out/N50_Samferdsel_senterlinje.snappy.parquet,anleggstype,BYTE_ARRAY,,OPTIONAL,,UTF8,,,,StringType()
6,out/N50_Samferdsel_senterlinje.snappy.parquet,medium,BYTE_ARRAY,,OPTIONAL,,UTF8,,,,StringType()
7,out/N50_Samferdsel_senterlinje.snappy.parquet,malemetode,BYTE_ARRAY,,OPTIONAL,,UTF8,,,,StringType()
8,out/N50_Samferdsel_senterlinje.snappy.parquet,noyaktighet,INT32,,OPTIONAL,,,,,,
9,out/N50_Samferdsel_senterlinje.snappy.parquet,rutemerking,BYTE_ARRAY,,OPTIONAL,,UTF8,,,,StringType()


## Leser vektordata

In [39]:

query = f"""
SELECT *
FROM read_parquet('{file_path}')
LIMIT 10
"""
result = con.query(query).to_df()
result

Unnamed: 0,objtype,datafangstdato,oppdateringsdato,sporantall,anleggstype,medium,malemetode,noyaktighet,rutemerking,vedlikeholdsansvarlig,vegnummer,motorvegtype,typeveg,vegkategori,vegfase,banestatus,SHAPE_Length,geometry,bbox
0,Bane,1996/07/01 00:00:00+00,2025/01/05 00:00:00+00,F,F,U,fot,2000,,,,,,,,I,1943.957349,"[4, 4, 0, 0, 0, 0, 0, 0, 228, 133, 126, 72, 83...","{'xmin': 260631.5700000003, 'ymin': 6649641.9,..."
1,Bane,1973/01/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,U,dig,1500,,,,,,,,I,242.078258,"[4, 4, 0, 0, 0, 0, 0, 0, 228, 50, 72, 72, 132,...","{'xmin': 205003.5700000003, 'ymin': 6700610.22..."
2,Bane,1985/01/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,dig,1500,,,,,,,,I,874.861565,"[4, 4, 0, 0, 0, 0, 0, 0, 16, 28, 122, 72, 29, ...","{'xmin': 256112.25999999978, 'ymin': 6692622.9..."
3,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,fot,2000,,,,,,,,I,19.755126,"[4, 4, 0, 0, 0, 0, 0, 0, 129, 127, 167, 71, 14...","{'xmin': 85759.00999999978, 'ymin': 6467782.88..."
4,Bane,1993/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,fot,2000,,,,,,,,I,5862.380461,"[4, 4, 0, 0, 0, 0, 0, 0, 168, 115, 164, 72, 89...","{'xmin': 336797.26999999955, 'ymin': 6693036.8..."
5,Bane,1995/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,U,fot,2000,,,,,,,,I,396.076308,"[4, 4, 0, 0, 0, 0, 0, 0, 160, 7, 24, 73, 220, ...","{'xmin': 622714.0, 'ymin': 7591150.0, 'xmax': ..."
6,Bane,1999/11/03 00:00:00+00,2025/01/05 00:00:00+00,F,F,T,ukj,2500,,,,,,,,I,51.988818,"[4, 4, 0, 0, 0, 0, 0, 0, 176, 66, 130, 72, 189...","{'xmin': 266773.5099999998, 'ymin': 6642782.52..."
7,Bane,1979/01/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,U,dig,1500,,,,,,,,I,290.464595,"[4, 4, 0, 0, 0, 0, 0, 0, 153, 32, 254, 72, 14,...","{'xmin': 520452.7999999998, 'ymin': 7442567.18..."
8,Bane,1988/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,dig,1500,,,,,,,,I,73.112966,"[4, 4, 0, 0, 0, 0, 0, 0, 98, 51, 248, 198, 189...","{'xmin': -31769.69000000041, 'ymin': 6573022.6..."
9,Bane,1993/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,fot,2000,,,,,,,,I,842.951868,"[4, 4, 0, 0, 0, 0, 0, 0, 206, 58, 68, 72, 161,...","{'xmin': 200939.21999999974, 'ymin': 6880720.8..."


## Leser vektor data i bounding box

In [40]:
query = f"""
SELECT *
FROM read_parquet('{file_path}')
WHERE bbox.xmin BETWEEN {xmin} AND {xmax}
AND bbox.ymin BETWEEN {ymin} AND {ymax}
LIMIT 10;
"""
result = con.query(query).to_df()
result

Unnamed: 0,objtype,datafangstdato,oppdateringsdato,sporantall,anleggstype,medium,malemetode,noyaktighet,rutemerking,vedlikeholdsansvarlig,vegnummer,motorvegtype,typeveg,vegkategori,vegfase,banestatus,SHAPE_Length,geometry,bbox
0,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,fot,2000,,,,,,,,I,19.755126,"[4, 4, 0, 0, 0, 0, 0, 0, 129, 127, 167, 71, 14...","{'xmin': 85759.00999999978, 'ymin': 6467782.88..."
1,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,fot,2000,,,,,,,,I,134.898729,"[4, 4, 0, 0, 0, 0, 0, 0, 88, 103, 167, 71, 176...","{'xmin': 85710.69000000041, 'ymin': 6467544.48..."
2,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,fot,2000,,,,,,,,I,785.27506,"[4, 4, 0, 0, 0, 0, 0, 0, 165, 15, 166, 71, 127...","{'xmin': 85023.29000000004, 'ymin': 6470079.56..."
3,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,L,fot,2000,,,,,,,,I,53.825283,"[4, 4, 0, 0, 0, 0, 0, 0, 64, 133, 167, 71, 173...","{'xmin': 85770.5, 'ymin': 6467798.949999999, '..."
4,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,U,fot,2000,,,,,,,,I,136.496584,"[4, 4, 0, 0, 0, 0, 0, 0, 128, 79, 165, 71, 204...","{'xmin': 84639.0, 'ymin': 6467558.07, 'xmax': ..."
5,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,U,fot,2000,,,,,,,,I,80.815509,"[4, 4, 0, 0, 0, 0, 0, 0, 142, 181, 164, 71, 23...","{'xmin': 84331.11000000034, 'ymin': 6467700.66..."
6,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,fot,2000,,,,,,,,I,186.820767,"[4, 4, 0, 0, 0, 0, 0, 0, 67, 232, 166, 71, 160...","{'xmin': 85456.53000000026, 'ymin': 6467664.09..."
7,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,F,J,T,fot,2000,,,,,,,,I,1310.565115,"[4, 4, 0, 0, 0, 0, 0, 0, 39, 16, 168, 71, 5, 8...","{'xmin': 86048.30999999959, 'ymin': 6466690.64..."
8,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,fot,2000,,,,,,,,I,266.032136,"[4, 4, 0, 0, 0, 0, 0, 0, 70, 218, 164, 71, 242...","{'xmin': 84404.54999999981, 'ymin': 6467577.09..."
9,Bane,1990/07/01 00:00:00+00,2025/01/05 00:00:00+00,E,J,T,fot,2000,,,,,,,,I,188.404937,"[4, 4, 0, 0, 0, 0, 0, 0, 67, 232, 166, 71, 61,...","{'xmin': 85456.53000000026, 'ymin': 6467614.84..."


## Viser data på interaktivt kart

In [36]:
kristiansand = gpd.read_parquet(
    path=file_path,
    bbox=(xmin, ymin, xmax, ymax)
)
kristiansand.explore()