In [105]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [106]:
import fiona
from shapely.geometry import shape
from rdflib import Graph, Literal, Namespace, URIRef, XSD, OWL, RDF
from rdflib.namespace import GEO
import rdflib
import os

In [107]:
# Define the namespaces
gadm = Namespace("http://example.com/ontologies/gadm#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
sf = Namespace("http://www.opengis.net/ont/sf#")

In [113]:
# Create a graph
g = Graph()
g.bind("gadm", gadm)

### Converting GADM data into RDF

In [114]:
path_to_shp_files = ["/mnt/data/raw/gadm41_DEU_shp/gadm41_DEU_1.shp", 
                    "/mnt/data/raw/gadm41_DEU_shp/gadm41_DEU_2.shp",
                    "/mnt/data/raw/gadm41_DEU_shp/gadm41_DEU_3.shp",
                    "/mnt/data/raw/gadm41_DEU_shp/gadm41_DEU_4.shp"]

for path_to_shp_file in path_to_shp_files:

    adm_lvl = int(path_to_shp_file.split(".")[0].split("_")[-1])
    
    # Open the Shapefile using fiona
    with fiona.open(path_to_shp_file, 'r') as src:
        # Iterate through features
        for feature in src:
            # Print the geometry as WKT
            geometry = shape(feature['geometry'])
            geometry_wkt = geometry.wkt
            
            # extracting information
            gid = feature["properties"][f"GID_{adm_lvl}"]
            gid_upper_lvl = feature["properties"][f"GID_{adm_lvl - 1}"]
            name = feature["properties"][f"NAME_{adm_lvl}"]
            type_ = feature["properties"][f"TYPE_{adm_lvl}"]
            country = feature["properties"]["COUNTRY"]
            
            # creating entities
            ent_adm_unit = URIRef(gadm[feature["properties"][f"GID_{adm_lvl}"]])
            ent_adm_unit_geom = URIRef(gadm[f"GEOM_{feature['properties'][f'GID_{adm_lvl}']}"])
            ent_hasName = URIRef(gadm["hasName"])
            ent_hasNationalLevel = URIRef(gadm["hasNationalLevel"])
            ent_hasType = URIRef(gadm["hasType"])
            ent_country = URIRef(gadm["country"])
            ent_hasUpperLevelUnit = URIRef(gadm["hasUpperLevelUnit"])
    
            # creating rdf triples
            g.add((ent_adm_unit, RDF.type, gadm.AdministrativeUnit))
            g.add((ent_adm_unit, ent_hasName, Literal(name)))
            g.add((ent_adm_unit, ent_hasNationalLevel, Literal(adm_lvl)))
            g.add((ent_adm_unit, ent_hasType, Literal(type_)))
            g.add((ent_adm_unit, ent_country, Literal(country)))
            g.add((ent_adm_unit, GEO.hasGeometry, ent_adm_unit_geom))
            g.add((ent_adm_unit_geom, RDF.type, sf.Geometry))
            g.add((ent_adm_unit_geom, GEO.asWKT, Literal(geometry_wkt, datatype=GEO.wktLiteral)))
            
            if adm_lvl > 1:
                ent_upper_level_unit = URIRef(gadm[feature["properties"][f"GID_{adm_lvl - 1}"]])
                g.add((ent_adm_unit, ent_hasUpperLevelUnit, ent_upper_level_unit))

            break
                    
    # Close the fiona dataset
    src.close()

In [115]:
g.serialize(destination="/mnt/data/processed/RDF/GADM/gadm_germany_test.ttl")

<Graph identifier=N5798abecb8cb4b6ab28ce38fe84c344a (<class 'rdflib.graph.Graph'>)>

### Example Queries

In [91]:
# Execute Query 1 (get first 10 triples in graph)
query1 = """    
        SELECT ?s ?p ?o
        WHERE {
            ?s ?p ?o
        }
        LIMIT 10
       """

results1 = g.query(query1)
print("Query 1 Results:")
for row in results1:
    print(f"{row['s']} {row['p']} {row['o']}")

Query 1 Results:
http://example.com/ontologies/gadm#DEU.2.69.17.1_1 http://www.opengis.net/ont/geosparql#hasGeometry http://example.com/ontologies/gadm#GEOM_DEU.2.69.17.1_1
http://example.com/ontologies/gadm#DEU.15.3.7.7_1 http://example.com/ontologies/gadm#hasUpperLevelUnit http://example.com/ontologies/gadm#DEU.15.3.7_1
http://example.com/ontologies/gadm#GEOM_DEU.9.22.10_1 http://www.opengis.net/ont/geosparql#asWKT POLYGON ((7.606202126000085 53.0582542410001, 7.6048474300000635 53.05770874000018, 7.598015784999973 53.05464172400008, 7.58857345500013 53.05043029700005, 7.5839357380000365 53.04828262400008, 7.580125332000023 53.04653930700016, 7.575692176000075 53.04449844400017, 7.573518751999984 53.043502807000095, 7.572997093000083 53.04326248100011, 7.5723018650000995 53.04294204800016, 7.571276664000095 53.04241180400015, 7.5694031720000226 53.04117202800006, 7.568666935000124 53.04056930500019, 7.567396640000084 53.04081726100014, 7.56641673900009 53.040966034000064, 7.565625190

In [104]:
# Execute Query 2 (get all names of admin units of level 2 and their upper level name)
query1 = """    
        SELECT ?admin_unit_name ?upper_admin_unit_name ?upper_admin_unit_level
        WHERE {
            ?admin_unit a gadm:AdministrativeUnit ;
                gadm:hasName ?admin_unit_name ;
                gadm:hasNationalLevel 2 .
            
            OPTIONAL {
                ?admin_unit gadm:hasUpperLevelUnit ?upper_admin_unit .
                ?upper_admin_unit a gadm:AdministrativeUnit ;
                    gadm:hasName ?upper_admin_unit_name ;
                    gadm:hasNationalLevel ?upper_admin_unit_level .
            }
        }
        LIMIT 10
       """

results1 = g.query(query1)
print("Query 1 Results:")
for row in results1:
    print(f"{row['admin_unit_name']} {row['upper_admin_unit_name']} {row['upper_admin_unit_level']}")

Query 1 Results:
Alb-Donau-Kreis Baden-Württemberg 1
Baden-Baden Baden-Württemberg 1
Biberach Baden-Württemberg 1
Böblingen Baden-Württemberg 1
Bodensee Baden-Württemberg 1
Bodenseekreis Baden-Württemberg 1
Breisgau-Hochschwarzwald Baden-Württemberg 1
Calw Baden-Württemberg 1
Emmendingen Baden-Württemberg 1
Enzkreis Baden-Württemberg 1
