In [13]:
import pandas as pd
import numpy as np
import uuid
import re
from datetime import datetime
from rdflib import Graph, Literal, Namespace, URIRef, BNode, RDF, RDFS
from rdflib.namespace import XSD, DCTERMS, PROV, SKOS
from functions import *

In [14]:
COMMUNE = 'Gentilly'
OUTPUT_PATH = "/home/STual/KG-cadastre/data/rdf/"

In [15]:
PATH = "/home/STual/KG-cadastre/data/gentilly/LANDMARKS.csv"
OUTPUT_FOLDER_PATH = "/home/STual/KG-cadastre/data/rdf"
landmarks = pd.read_csv(PATH,header=0)

In [16]:
# Create a new RDF graph
g = Graph()

#Define URIs
baseuri = Namespace("http://data.ign.fr/id/landmark/")
g.bind("landmark", baseuri)
# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
cad_ltype = Namespace("http://data.ign.fr/def/cadastre/landmarkType/")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
ltype = Namespace("http://rdf.geohistoricaldata.org/def/address/landmarkType/")
lrtype = Namespace("http://rdf.geohistoricaldata.org/def/address/landmarkRelationType/")
atype = Namespace("http://rdf.geohistoricaldata.org/def/address/attributeType/")
geofla = Namespace("http://data.ign.fr/def/geofla#")
pwikidata = Namespace("http://www.wikidata.org/wiki/Property:")
time = Namespace("http://www.w3.org/2006/time#")
g.bind("cad", cad)
g.bind("add", add)
g.bind("ltype", ltype)
g.bind("lrtype", lrtype)
g.bind("atype", atype)
g.bind("cad_ltype", cad_ltype)
g.bind("geofla", geofla)
g.bind("pwikidata", pwikidata)
g.bind("time", time)

# Iterate over each row in the DataFrame
for index, row in landmarks.iterrows():
    subject_uri = URIRef(baseuri + row['uuid'])
    g.add((subject_uri, RDF.type, add.Landmark))
    #add.isLandmarkType
    if row['add:LandmarkType'] in ['Section','Plot','Departement','Arrondissement','Canton']:
        g.add((subject_uri, add.isLandmarkType, URIRef(cad_ltype + f"{row['add:LandmarkType']}")))
    else:
        g.add((subject_uri, add.isLandmarkType, URIRef(ltype + f"{row['add:LandmarkType']}")))
    #rdfs:label
    if not pd.isnull(row['rdfs:label']):
        g.add((subject_uri, RDFS.label, Literal(row['rdfs:label'], datatype=XSD.string)))
    #dcterms:identifier
    if not pd.isnull(row['dcterms:identifier']):
        g.add((subject_uri, DCTERMS.identifier, Literal(row['dcterms:identifier'], datatype=XSD.string)))
    #geofla:numInsee
    if not pd.isnull(row['geofla:numInsee']):
        g.add((subject_uri, add.numInsee, Literal(int(row['geofla:numInsee']), datatype=XSD.string)))
    #wikidata:numEhess
    if not pd.isnull(row['wikidata:P8422']):
        g.add((subject_uri, pwikidata.P8422, Literal(int(row['wikidata:P8422']), datatype=XSD.string)))
    firstStep = BNode()
    if not pd.isnull(row['Parent']):
        g.add((subject_uri, add.targets, subject_uri))
        firstStep = BNode()
        g.add((subject_uri, add.firstStep, firstStep))
        g.add((firstStep, RDF.type, add.AddressSegment))
        if not pd.isnull(row['Relation']):
            g.add((firstStep, add.isLandmarkRelationType, URIRef(lrtype + f"{row['Relation']}")))
        else:
            g.add((firstStep, add.isLandmarkRelationType, lrtype.Undefined))
        g.add((firstStep, add.locatum, subject_uri))
        parentuuid = find_uuid(landmarks, row['Parent'])
        g.add((firstStep, add.relatum, URIRef(baseuri + parentuuid)))
    if not pd.isnull(row['hasBeginning']) or not pd.isnull(row['hasEnd']):
        hastimeinterval = BNode()
        g.add((subject_uri, add.hasTime, hastimeinterval))
        g.add((hastimeinterval, RDF.type, add.TimeInterval))
        if not pd.isnull(row['hasBeginning']):
            hasbeginning = BNode()
            g.add((hastimeinterval, add.hasBeginning, hasbeginning))
            g.add((hasbeginning, RDF.type, add.TimeInstant))
            g.add((hasbeginning, add.timeCalendar, time.Gregorian))
            g.add((hasbeginning, add.timePrecision, time.Year))
            g.add((hasbeginning, add.timeStamp, Literal(datetime.strptime(str(int(row['hasBeginning'])), '%Y'), datatype=XSD.dateTimeStamp)))
        if not pd.isnull(row['hasEnd']):
            hasend = BNode()
            g.add((hastimeinterval, add.hasEnd, hasend))
            g.add((hasend, RDF.type, add.TimeInstant))
            g.add((hasend, add.timeCalendar, time.Gregorian))
            g.add((hasend, add.timePrecision, time.Year))
            g.add((hasend, add.timeStamp, Literal(datetime.strptime(str(int(row['hasEnd'])) + '-12-31', '%Y-%m-%d'), datatype=XSD.dateTimeStamp)))

print(g.serialize(format='turtle'))

#write the graph to a file
g.serialize(destination=f'{OUTPUT_PATH}/landmarks.ttl', format='turtle')

@prefix add: <http://rdf.geohistoricaldata.org/def/address#> .
@prefix cad_ltype: <http://data.ign.fr/def/cadastre/landmarkType/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix geofla: <http://data.ign.fr/def/geofla#> .
@prefix landmark: <http://data.ign.fr/id/landmark/> .
@prefix lrtype: <http://rdf.geohistoricaldata.org/def/address/landmarkRelationType/> .
@prefix ltype: <http://rdf.geohistoricaldata.org/def/address/landmarkType/> .
@prefix pwikidata: <http://www.wikidata.org/wiki/Property:> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix time: <http://www.w3.org/2006/time#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

landmark:2f90aba5-c26e-4efb-a5f2-ac1ee069b5e6 a add:Landmark ;
    rdfs:label "Val-de-Marne"^^xsd:string ;
    geofla:numInsee "94"^^xsd:string ;
    add:hasTime [ a add:TimeInterval ;
            add:hasBeginning [ a add:TimeInstant ;
                    add:timeCalendar time:Gregorian ;
                    add:timePrecision time:Y

<Graph identifier=N8f051f7d83494b64b8f17bd5fa98e066 (<class 'rdflib.graph.Graph'>)>