In [1]:
import pandas as pd
from datetime import datetime
from rdflib import Graph, Literal, Namespace, RDF, URIRef, BNode
from rdflib.namespace import XSD, DCTERMS, PROV, SKOS, RDFS, FOAF

In [2]:
ROOT = "/workspaces/ontologie-peuplement/"  #/home/STual/KG-cadastre/
PATH = ROOT + "data/ACTIVITIES.csv"
OUTPUT_PATH = ROOT + "data/rdf/"
COMMUNE = "Gentilly"

activities = pd.read_csv(PATH,header=0)
display(activities)

Unnamed: 0,ID,rdf:type,rdfs:label,rdfs:comment,prov:wasAssociatedWith,foaf:firstName,foaf:familyName,prov:startedAtTime,prov:endedAtTime,prov:used
0,0001,prov:Activity,Classification manuelle des pages de registres,,cf6c4e2f-bcdd-440c-81ac-379cdad50a05,,,,,
1,0002,prov:Activity,Transcription manuelle des registres,,cf6c4e2f-bcdd-440c-81ac-379cdad50a05,,,2024-03-04,2024-03-29,
2,0003,prov:Activity,Recallage de plans parcellaires géoréférencés,,cf6c4e2f-bcdd-440c-81ac-379cdad50a05,,,2024-04-15,2024-04-15,FRAD094_3P_001041_georef
3,0004,prov:Activity,Géoréférencement des plans du cadastre napoléo...,,026db102-3789-43cf-b357-d24115b2e9d1,,,,2015-03-17,FRAD094_3P_001041
4,0005,prov:Activity,Géoréférencement des plans du cadastre napoléo...,,7a7f65af-7bf4-4c75-8200-1633d952e647,,,,2023-05-16,CN80
5,0006,prov:Activity,Vectorisation manuelle des plans parcellaires,,cf6c4e2f-bcdd-440c-81ac-379cdad50a05,,,2024-06-19,2024-06-19,FRAD094_3P_001041_georef_2;digit_CN80_georef
6,cf6c4e2f-bcdd-440c-81ac-379cdad50a05,prov:Agent;foaf:Person,Solenn Tual,,,Solenn,Tual,,,
7,14ac6f3c-b44d-49fc-af54-dc41e41323f8,prov:SoftwareAgent,DAN,Document Attention Network (proposé par Denis ...,,,,,,
8,14ffecf3-f064-4ac1-82fc-c4ae543381c7,prov:SoftwareAgent,Classifieur,Modèle de deep learning de détection et classi...,,,,,,
9,026db102-3789-43cf-b357-d24115b2e9d1,prov:Agent,Service départemental d'Archéologie du Val-de-...,,,,,,,


In [3]:
vocabs = {
    "prov": PROV,
    "skos": SKOS,
    "rdfs": RDFS,
    "dcterms": DCTERMS,
    "foaf":FOAF
}

classesdict = {
    "prov:Agent":PROV.Agent,
    "prov:SoftwareAgent":PROV.SoftwareAgent,
    "foaf:Person":FOAF.Person,
}

In [4]:
from namespaces import *

g = Graph()
g.bind("activity", activity)
g.bind("source", srcuri)

for index, row in activities.iterrows():
    
    if row["rdf:type"] == "prov:Activity":
        activity_ = URIRef(activity + row["ID"])
        g.add((activity_, RDF.type, PROV.Activity))
        g.add((activity_, RDFS.label, Literal(row["rdfs:label"], datatype=XSD.string)))
        agent = BNode()
        g.add((activity_, PROV.wasAssociatedWith, agent))
        for index2, row2 in activities.iterrows():
            if 'Agent' in row2["rdf:type"] and row2["ID"] == row["prov:wasAssociatedWith"]:
                types = row2["rdf:type"].split(';')
                for t in types:
                    vocab, classe = t.split(':')
                    g.add((agent, RDF.type, classesdict[t]))
                    g.add((agent, RDFS.label, Literal(row2["rdfs:label"], datatype=XSD.string)))
                    if not pd.isnull(row2["foaf:firstName"]):
                        g.add((agent, FOAF.firstName, Literal(row2["foaf:firstName"], datatype=XSD.string)))
                    if not pd.isnull(row2["foaf:familyName"]):
                        g.add((agent, FOAF.familyName, Literal(row2["foaf:familyName"], datatype=XSD.string)))
                    if not pd.isnull(row2["rdfs:comment"]):
                        g.add((agent, RDFS.comment, Literal(row2["rdfs:comment"], datatype=XSD.string)))
        
        if not pd.isnull(row["prov:startedAtTime"]):
            g.add((activity_, PROV.startedAtTime, Literal(datetime.strptime(row["prov:startedAtTime"], '%Y-%m-%d'), datatype=XSD.dateTimeStamp)))
        if not pd.isnull(row["prov:endedAtTime"]):
            g.add((activity_, PROV.endedAtTime, Literal(datetime.strptime(row["prov:endedAtTime"], '%Y-%m-%d'), datatype=XSD.dateTimeStamp)))
        if not pd.isnull(row["prov:used"]):
            if ';' in row["prov:used"]:
                used = row["prov:used"].split(';')
                for u in used:
                    used_ = URIRef(srcuri + u)
                    g.add((activity_, PROV.used, used_))
            else:
                g.add((activity_, PROV.used, URIRef(srcuri + row["prov:used"])))

In [5]:
print(g.serialize(format='turtle'))
g.serialize(destination=f"{OUTPUT_PATH}/activities.ttl", format='turtle')

@prefix activity: <http://rdf.geohistoricaldata.org/id/codes/cadastre/activity/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix source: <http://rdf.geohistoricaldata.org/id/source/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

activity:0001 a prov:Activity ;
    rdfs:label "Classification manuelle des pages de registres"^^xsd:string ;
    prov:wasAssociatedWith [ a prov:Agent,
                foaf:Person ;
            rdfs:label "Solenn Tual"^^xsd:string ;
            foaf:familyName "Tual"^^xsd:string ;
            foaf:firstName "Solenn"^^xsd:string ] .

activity:0002 a prov:Activity ;
    rdfs:label "Transcription manuelle des registres"^^xsd:string ;
    prov:endedAtTime "2024-03-29T00:00:00"^^xsd:dateTimeStamp ;
    prov:startedAtTime "2024-03-04T00:00:00"^^xsd:dateTimeStamp ;
    prov:wasAssociatedWith [ a prov:Agent,
                foaf:Person ;
            rdfs

<Graph identifier=Nca24c603cdfc4460a908970a0c3b1f67 (<class 'rdflib.graph.Graph'>)>