In [21]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [30]:
import fiona
from shapely.geometry import shape
from rdflib import Graph, Literal, Namespace, URIRef, XSD, OWL, RDF
from rdflib.namespace import GEO
import rdflib
import os

from minio import Minio
from minio.error import S3Error

In [23]:
# Define the namespaces
gadm = Namespace("http://example.com/ontologies/gadm#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
sf = Namespace("http://www.opengis.net/ont/sf#")

In [24]:
# Create a graph
g = Graph()
g.bind("gadm", gadm)

### Converting GADM data into RDF

In [25]:
path_to_shp_files = ["/mnt/data/raw/gadm41_DEU_shp/gadm41_DEU_1.shp", 
                    "/mnt/data/raw/gadm41_DEU_shp/gadm41_DEU_2.shp",
                    "/mnt/data/raw/gadm41_DEU_shp/gadm41_DEU_3.shp",
                    "/mnt/data/raw/gadm41_DEU_shp/gadm41_DEU_4.shp"]

for path_to_shp_file in path_to_shp_files:

    adm_lvl = int(path_to_shp_file.split(".")[0].split("_")[-1])
    
    # Open the Shapefile using fiona
    with fiona.open(path_to_shp_file, 'r') as src:
        # Iterate through features
        for feature in src:
            # Print the geometry as WKT
            geometry = shape(feature['geometry'])
            geometry_wkt = geometry.wkt
            
            # extracting information
            gid = feature["properties"][f"GID_{adm_lvl}"].replace(".", "_")
            gid_upper_lvl = feature["properties"][f"GID_{adm_lvl - 1}"].replace(".", "_")
            name = feature["properties"][f"NAME_{adm_lvl}"]
            type_ = feature["properties"][f"TYPE_{adm_lvl}"]
            country = feature["properties"]["COUNTRY"]
            
            # creating entities
            ent_adm_unit = URIRef(gadm[gid])
            ent_adm_unit_geom = URIRef(gadm[f"GEOM_{gid}"])
            ent_hasName = URIRef(gadm["hasName"])
            ent_hasNationalLevel = URIRef(gadm["hasNationalLevel"])
            ent_hasType = URIRef(gadm["hasType"])
            ent_country = URIRef(gadm["country"])
            ent_hasUpperLevelUnit = URIRef(gadm["hasUpperLevelUnit"].replace(".", "_"))
    
            # creating rdf triples
            g.add((ent_adm_unit, RDF.type, gadm.AdministrativeUnit))
            g.add((ent_adm_unit, ent_hasName, Literal(name)))
            g.add((ent_adm_unit, ent_hasNationalLevel, Literal(adm_lvl)))
            g.add((ent_adm_unit, ent_hasType, Literal(type_)))
            g.add((ent_adm_unit, ent_country, Literal(country)))
            g.add((ent_adm_unit, GEO.hasGeometry, ent_adm_unit_geom))
            g.add((ent_adm_unit_geom, RDF.type, sf.Geometry))
            g.add((ent_adm_unit_geom, GEO.asWKT, Literal(geometry_wkt, datatype=GEO.wktLiteral)))
            
            if adm_lvl > 1:
                ent_upper_level_unit = URIRef(gadm[gid_upper_lvl])
                g.add((ent_adm_unit, ent_hasUpperLevelUnit, ent_upper_level_unit))
                    
    # Close the fiona dataset
    src.close()

In [26]:
g.serialize(destination="/mnt/data/processed/RDF/GADM/gadm_germany.ttl")

<Graph identifier=Nf44f5584f37041328f5df7e9e1e9d209 (<class 'rdflib.graph.Graph'>)>

In [39]:
# Upload ttl file to contabo storage
# Set your MinIO server information
CONTABO_ENDPOINT = 'eu2.contabostorage.com'
CONTABO_ACCESS_KEY = '6199ac7eec0ebb44c1252bf8f785285d'
CONTABO_SECRET_KEY = '109130641c157b9386e33059ef080778'

# Set the name of the bucket
BUCKET_NAME = 'aqqa'

# Set the path to the local file you want to upload
FILE_PATH = "/mnt/data/processed/RDF/GADM/gadm_germany.ttl"

def upload_to_minio(local_file, bucket, s3_file):
    minio_client = Minio(
        endpoint=CONTABO_ENDPOINT,
        access_key=CONTABO_ACCESS_KEY,
        secret_key=CONTABO_SECRET_KEY,
        secure=True  # Change to True if using HTTPS
    )

    try:
        minio_client.fput_object(bucket, s3_file, local_file)
        print("Upload Successful")
        return True
    except S3Error as e:
        print("Error:", e)
        return False

uploaded = upload_to_minio(FILE_PATH, BUCKET_NAME, 'gadm_germany.ttl')
if uploaded:
    print("File uploaded successfully.")
else:
    print("File upload failed.")

Upload Successful
File uploaded successfully.


### Example Queries

In [19]:
# Execute Query 1 (get first 10 triples in graph)
query1 = """    
        SELECT ?s ?p ?o
        WHERE {
            ?s ?p ?o
        }
        LIMIT 20
       """

results1 = g.query(query1)
print("Query 1 Results:")
for row in results1:
    print(f"{row['s']} {row['p']} {row['o']}")

Query 1 Results:
http://example.com/ontologies/gadm#DEU_10_44_1_1_1 http://www.opengis.net/ont/geosparql#hasGeometry http://example.com/ontologies/gadm#GEOM_DEU_10_44_1_1_1
http://example.com/ontologies/gadm#DEU_10_4_2_1 http://example.com/ontologies/gadm#hasNationalLevel 3
http://example.com/ontologies/gadm#GEOM_DEU_11_31_2_1_1 http://www.opengis.net/ont/geosparql#asWKT POLYGON ((6.665374279000105 49.617942810000045, 6.667879582000012 49.620586394999975, 6.6753764160001765 49.623416902000145, 6.678871632000039 49.62149810700009, 6.678237915000068 49.62023544300007, 6.680403233000163 49.61795806900011, 6.684755803000087 49.617797851000034, 6.688429832000054 49.61571502600009, 6.689437867000038 49.616287231000115, 6.693688394000105 49.612712860000045, 6.694113254000058 49.61224746600004, 6.694188117000124 49.61217880300018, 6.694880962000184 49.6116600040001, 6.695554256000094 49.61121749800009, 6.6964111340001296 49.610900877999995, 6.696667193999986 49.610820770000146, 6.6971616740000

In [20]:
# Execute Query 2 (get all names of admin units of level 2 and their upper level name)
query1 = """    
        SELECT ?admin_unit_name ?upper_admin_unit_name ?upper_admin_unit_level
        WHERE {
            ?admin_unit a gadm:AdministrativeUnit ;
                gadm:hasName ?admin_unit_name ;
                gadm:hasNationalLevel 2 .
            
            OPTIONAL {
                ?admin_unit gadm:hasUpperLevelUnit ?upper_admin_unit .
                ?upper_admin_unit a gadm:AdministrativeUnit ;
                    gadm:hasName ?upper_admin_unit_name ;
                    gadm:hasNationalLevel ?upper_admin_unit_level .
            }
        }
        LIMIT 10
       """

results1 = g.query(query1)
print("Query 1 Results:")
for row in results1:
    print(f"{row['admin_unit_name']} {row['upper_admin_unit_name']} {row['upper_admin_unit_level']}")

Query 1 Results:
Alb-Donau-Kreis Baden-Württemberg 1
Alb-Donau-Kreis Baden-Württemberg 1
Baden-Baden None None
Biberach None None
Böblingen None None
Bodensee None None
Bodenseekreis None None
Breisgau-Hochschwarzwald None None
Calw None None
Emmendingen None None
