# Création des resources RDF des mentions de parcelles

In [1]:
import pandas as pd
import numpy as np
import uuid
import re
from rdflib import Graph, Literal, Namespace, RDF, URIRef, BNode
from rdflib.namespace import XSD, DCTERMS, PROV, SKOS, RDFS
from functions import *

## 1. Lecture des données

### Matrices

In [2]:
COMMUNE = 'Gentilly'
matrices_metada = {
    "MAT_1813": {
        "PLAN": "1811",
        "MATRICE_ID": "MAT_B_NB_1813",
        "MATRICE_START": "1813",
        "MATRICE_END": "1835"
    },
    "MAT_1836": {
        "PLAN": "1811",
        "MATRICE_ID": "MAT_NB_1836",
        "MATRICE_START": "1836",
        "MATRICE_END": "1847"
    },
    "MAT_1848": {
        "PLAN": "1845",
        "MATRICE_ID": "MAT_NB_1848",
        "MATRICE_START": "1848",
        "MATRICE_END": "1860"
    }
}


In [3]:
ROOT = "/workspaces/ontologie-peuplement/"  #/home/STual/KG-cadastre/

PATH = ROOT + "data/gentilly/MAT_1813.csv"
mat1813 = pd.read_csv(PATH,header=0)
PATH = ROOT + "data/gentilly/MAT_1836.csv"
mat1836 = pd.read_csv(PATH,header=0)
PATH = ROOT + "data/gentilly/MAT_1848.csv"
mat1848 = pd.read_csv(PATH,header=0)

OUTPUT_FOLDER_PATH = ROOT + "data/rdf"

In [30]:
mat1813['registre'] = 'MAT_1813'
mat1836['registre'] = 'MAT_1836'
mat1848['registre'] = 'MAT_1848'

matrices = pd.concat([mat1813, mat1836, mat1848])
matrices = matrices.reset_index(drop=True)
print(matrices.columns)

Index(['ID', 'UUID', 'Type_CF', 'Num_Folio', 'Alt_Num_CF', 'Groupe CF',
       'Ordre de lecture', 'Voie', 'Num_Voie', 'Image', 'Section_clean',
       'Parcelle_clean', 'Lieu-dit_transcript', 'Lieu-dit_clean',
       'Lieu-dit_treated', 'Lieu-dit_type', 'Propriétaires_transcript',
       'Nature_transcript', 'Nature_clean', 'Nature_treated', 'Date entrée',
       'Date entrée_treated', 'Date sortie', 'Date sortie_treated', 'Tiré de',
       'Tiré de_treated', 'Porté à', 'Porté à_treated', 'Ligne barrée ?',
       'CF rayé ?', 'Spécification', 'Commentaire', 'Cote liée', 'registre'],
      dtype='object')


In [31]:
display(matrices[["Lieu-dit_treated"]])

Unnamed: 0,Lieu-dit_treated
0,
1,
2,
3,
4,
...,...
437,Barrière d'Italie;4
438,Barrière d'Italie;4
439,Barrière d'Italie;2
440,Barrière d'Italie;2


### Propriétaires

In [751]:
import json 
import pandas as pd
#open three json files
with open("/home/STual/KG-cadastre/data/gentilly/structured_owners_ok.json") as f:
    data_owner_ok = json.load(f)
with open("/home/STual/KG-cadastre/data/gentilly/structured_owners_nok1.json") as f:
    data_owner_nok1 = json.load(f)
with open("/home/STual/KG-cadastre/data/gentilly/structured_owners_nok2.json") as f:
    data_owner_nok2 = json.load(f)

#concatenate the three json files
data_owners = data_owner_ok + data_owner_nok1 + data_owner_nok2

#read as df
owners_df = pd.DataFrame(data_owners)

In [752]:
oregistre = []
otype_folio = []
ofolio = []
o_groupe_cf = []
o_transcription = []

for row in owners_df.iterrows():
    cell_info = row[1]['cell']
    oregistre.append(cell_info['registre'])
    otype_folio.append(cell_info['type_folio'])
    ofolio.append(cell_info['folio'])
    o_groupe_cf.append(cell_info['groupe_cf'])
    o_transcription.append(cell_info['transcription'])

owners_df['registre'] = oregistre
owners_df['type_folio'] = otype_folio
owners_df['folio'] = ofolio
owners_df['groupe_cf'] = o_groupe_cf
owners_df['transcription'] = o_transcription


In [753]:
owners_df

Unnamed: 0,cell,owners,changes,registre,type_folio,folio,groupe_cf,transcription
0,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Louves', '...",[],MAT_1813,Bâti,114,1,Louves
1,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Hardon', '...",[],MAT_1813,Bâti,82,1,"Hardon, Bourgeois ~~nourisseur~~→à Paris"
2,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...",,,MAT_1813,Bâti,64,1,Faipot François→m↑d↓ de vin b↑re↓ de fontaineb...
3,"{'registre': 'MAT_1813', 'type_folio': 'Non bâ...","[{'owner-id': 1, 'owner-lastname': 'Besson', '...","[{'change-order': 1, 'owner-before': 1, 'owner...",MAT_1813,Non bâti,23bis,1,~~Besson Jard↑e↓→Fleuriste 1832→Lecoq Jean b↑t...
4,"{'registre': 'MAT_1813', 'type_folio': 'Non bâ...",,,MAT_1813,Non bâti,46bis,1,Bizouard
...,...,...,...,...,...,...,...,...
184,"{'registre': 'MAT_1836', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Marie Adam...",[],MAT_1836,Bâti,442,6,marie adam→Piétri
185,"{'registre': 'MAT_1836', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Marie Adam...",[],MAT_1836,Bâti,442,6,marie adam→Piétri
186,"{'registre': 'MAT_1836', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Gérard', '...",[],MAT_1836,Non Bâti,708,1,Gérard→route de paris 11
187,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Biermann',...",[],MAT_1848,Non Bâti,836,1,"Biermann, Corroyeur à Paris"


### 1.1. Création des pages
- rdf:type rico:Instanciation : instance numérisée d'une page de registre
- rdf:type rico:Record => concept de la page de registre, fait le lien avec le registre (concept, RecordSet)

In [5]:
#select distinct values in th colum Image
images = matrices[['registre','Image']].drop_duplicates()
images

Unnamed: 0,registre,Image
0,MAT_1813,FRAD094_3P_000255_01_0586
1,MAT_1813,FRAD094_3P_000255_01_0588
2,MAT_1813,FRAD094_3P_000255_01_0579
4,MAT_1813,FRAD094_3P_000255_01_0571
5,MAT_1813,FRAD094_3P_000255_01_0015
...,...,...
425,MAT_1848,FRAD094_3P_000264_01_0218
427,MAT_1848,FRAD094_3P_000264_01_0219
429,MAT_1848,FRAD094_3P_000264_01_0220
432,MAT_1848,FRAD094_3P_000264_01_0221


In [6]:
# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/source/")
mlclasse = Namespace("http://data.ign.fr/id/codes/cadastre/mlClasse/")
g.bind('source', baseuri)
g.bind('mlclasse', mlclasse)

# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
rico = Namespace("https://www.ica.org/standards/RiC/ontology#")
fpo = Namespace("https://github.com/johnBradley501/FPO/raw/master/fpo.owl#")
time = Namespace("http://www.w3.org/2006/time#")

g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

for index, row in images.iterrows():
    img = row['Image']
    MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]
    json = parse_record_id(img)
    subject_uri = URIRef(baseuri + f"{img}")
    g.add((subject_uri, RDF.type, rico.Instanciation))
    g.add((subject_uri, rico.identifier, Literal(img)))
    mlClasseNode = BNode()
    g.add((subject_uri, cad.hasClasse, mlClasseNode))
    g.add((mlClasseNode, cad.hasClasseValue, URIRef(mlclasse + f"MATMainTable")))
    g.add((mlClasseNode, PROV.wasGeneratedBy, URIRef(f"http://data.ign.fr/id/codes/cadastre/activity/0001")))
    folder_end = img.rfind('_')
    g.add((subject_uri,rico.isOrWasDigitalInstanciationOf,URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{img}_page")))

    subject_uri_record = URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_page")
    g.add((subject_uri_record, RDF.type, rico.Record))
    g.add((subject_uri_record, rico.isOrWasIncludedIn, URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}")))

print(g.serialize(format='turtle'))
#write g into a .ttl file
g.serialize(destination=f"{OUTPUT_FOLDER_PATH}/{COMMUNE}_sources_pages.ttl", format='turtle')

@prefix cad: <http://data.ign.fr/def/cadastre#> .
@prefix mlclasse: <http://data.ign.fr/id/codes/cadastre/mlClasse/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rico: <https://www.ica.org/standards/RiC/ontology#> .
@prefix source: <http://data.ign.fr/id/source/> .

source:FRAD094_3P_000255_01_0015 a rico:Instanciation ;
    cad:hasClasse [ cad:hasClasseValue mlclasse:MATMainTable ;
            prov:wasGeneratedBy <http://data.ign.fr/id/codes/cadastre/activity/0001> ] ;
    rico:identifier "FRAD094_3P_000255_01_0015" ;
    rico:isOrWasDigitalInstanciationOf source:94_Gentilly_FRAD094_3P_000255_01_0015_page .

source:FRAD094_3P_000255_01_0032 a rico:Instanciation ;
    cad:hasClasse [ cad:hasClasseValue mlclasse:MATMainTable ;
            prov:wasGeneratedBy <http://data.ign.fr/id/codes/cadastre/activity/0001> ] ;
    rico:identifier "FRAD094_3P_000255_01_0032" ;
    rico:isOrWasDigitalInstanciationOf source:94_Gentilly_FRAD094_3P_000255_01_0032_page .

source:FRAD094_3P_00025

<Graph identifier=Nfe3a30bf7f114b5f8d37322433a40eda (<class 'rdflib.graph.Graph'>)>

### 1.2 Folios
- Pré-traitement des colonnes *Num_Folio*, *Tiré de* et *Porté à*
- Création des objets "Folios" à partir de la colonne *Num_Folio* et des colonnes *Tiré de* et *Porté à* (manquants)
- Création des objets spéciaux mentionnés dans les colonnes destinées aux folios (reste, construction nouvelle, ruine etc)

#### Pré-traitement

In [7]:
from functions import parse_record_id, cleanNumFolio

#Clean columns Num_Folio, Tire_de, Porte_a
clean_folio, clean_tire_de, clean_porte_a = [], [], []
symbols = [",", "→", "."," ",";","&"]

for index, row in matrices.iterrows():
    clean_folio.append(cleanNumFolio(row["Num_Folio"],symbols))
    clean_tire_de.append(cleanNumFolio(row["Tiré de_treated"],symbols))
    clean_porte_a.append(cleanNumFolio(row["Porté à_treated"],symbols))

# Create new columns containing the cleaned values
matrices['Num_Folio_clean'] = clean_folio
matrices['Tire_de_clean'] = clean_tire_de
matrices['Porte_a_clean'] = clean_porte_a

matrices['Num_Folio_clean'] = matrices['Num_Folio_clean'].astype(str)

['236↑4↓', '361', '258', '166', '235↑2↓', '138', '357', '440']
['248', 'additionconstructionsv']
['249', '249']
['288', '', '433']
['443', '443']
['450', '443', '453', '968']
['449', '968']
['968', '442']
['442', '449', '439', '450', '']
['836', '403']
['837', '403']
['403', '513↑25↓']
['837', 'additionconstructionsv']
['844', 'additionconstructionsv']
['844', 'additionconstructionsv']
['846↑2↓', 'additionconstructionsv']
['847↑2↓', 'additionconstructionsv']
['847↑2↓', 'additionconstructionsv']


In [8]:
print(clean_porte_a)

['EMPTY', '82', 'EMPTY', 'EMPTY', 'EMPTY', '156', '156', '192bis', '280bis', 'EMPTY', 'EMPTY', '34ter', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '124bis', 'EMPTY', 'EMPTY', '192bis', '247ter', 'voiepubliquesv', 'EMPTY', 'EMPTY', '46bis', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '211bis', '107↑2↓', 'EMPTY', 'doubleemploisv', 'EMPTY', 'EMPTY', '269↑2↓', '138', '236ter', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '361', 'EMPTY', 'EMPTY', 'EMPTY', '236↑4↓;361;258;166;235↑2↓;138;357;440', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '249', 'EMPTY', 'EMPTY', '248', 'EMPTY', '249;249', 'EMPTY', 'EMPTY', '249↑16↓', '249↑16↓', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'demolitionsv', 'EMPTY', 'EMPTY', 'demolitionsv', '288;433', 'EMPTY', 'EMPTY', 'EMPTY', 'augmentationsv', 'EMPTY', 'EMPTY', 'EMPTY

In [9]:
#Using matrices, create new df named folios containing all lines of matrices where register = MAT_1836 and MAT_1848. For register=MAT_1813, remove the lines where type_CF = "Bâti"
folios = matrices[(matrices['registre'] == 'MAT_1836') | (matrices['registre'] == 'MAT_1848') | ((matrices['registre'] == 'MAT_1813') & (matrices['Type_CF'] != 'Bâti'))]
folios.reset_index(drop=True)

Unnamed: 0,ID,UUID,Type_CF,Num_Folio,Alt_Num_CF,Groupe CF,Ordre de lecture,Voie,Num_Voie,Image,...,Porté à_treated,Ligne barrée ?,CF rayé ?,Spécification,Commentaire,Cote liée,registre,Num_Folio_clean,Tire_de_clean,Porte_a_clean
0,6,877701e8-ab0f-4dc6-90d9-731849696678,Non bâti,11,,1,1,,,FRAD094_3P_000255_01_0015,...,156,Non,Oui,,,,MAT_1813,11,EMPTY,156
1,7,018a368c-1a98-4cff-b374-91bffb682937,Non bâti,11,,1,2,,,FRAD094_3P_000255_01_0015,...,156,Non,Oui,,,,MAT_1813,11,EMPTY,156
2,8,1abd1a94-ad72-4d45-b007-f87f6faf8577,Non bâti,11,,1,3,,,FRAD094_3P_000255_01_0015,...,192bis,Oui,Oui,,,,MAT_1813,11,EMPTY,192bis
3,9,4bf1bd10-fea2-4945-b20d-f3440c571df1,Non bâti,23bis,,1,1,,,FRAD094_3P_000255_01_0032,...,280bis,Oui,Oui,,,,MAT_1813,23bis,constructionnouvellesv,280bis
4,10,4b6e66ca-1318-4822-a729-7bd2a3607a44,Non bâti,34ter,,1,1,,,FRAD094_3P_000255_01_0044,...,,Non,Oui,,,,MAT_1813,34ter,46ter,EMPTY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,219,e0ac6603-2873-4ce4-8eff-855207af9abe,Non Bâti,1221,,1,6,,,FRAD094_3P_000264_01_0221,...,,Non,Non,,,,MAT_1848,1221,EMPTY,EMPTY
433,220,8bb35f06-61a6-4ef0-8e33-613cab5c70a1,Non Bâti,1221,,1,7,,,FRAD094_3P_000264_01_0221,...,,Non,Non,,,,MAT_1848,1221,EMPTY,EMPTY
434,221,3779a0f9-d748-4725-bab6-27ae5c56bad1,Non Bâti,1222,,1,1,,,FRAD094_3P_000264_01_0222,...,,Non,Non,,,,MAT_1848,1222,EMPTY,EMPTY
435,222,86f61693-f811-4f84-8f35-0c64b3b523ce,Non Bâti,1222,,1,2,,,FRAD094_3P_000264_01_0222,...,,Non,Non,,,,MAT_1848,1222,EMPTY,EMPTY


In [10]:
#Create a new dataframe from sources with columns "Num_Folio" and "Image" containing only distinct rows
folios_pages = folios[["Num_Folio_clean","Alt_Num_CF","Image","registre"]].drop_duplicates(subset=["Num_Folio_clean","Alt_Num_CF","Image","registre"]).reset_index(drop=True)
display(folios_pages)

Unnamed: 0,Num_Folio_clean,Alt_Num_CF,Image,registre
0,11,,FRAD094_3P_000255_01_0015,MAT_1813
1,23bis,,FRAD094_3P_000255_01_0032,MAT_1813
2,34ter,,FRAD094_3P_000255_01_0044,MAT_1813
3,46bis,,FRAD094_3P_000255_01_0057,MAT_1813
4,107bis,,FRAD094_3P_000255_01_0125,MAT_1813
...,...,...,...,...
148,1218,,FRAD094_3P_000264_01_0218,MAT_1848
149,1219,,FRAD094_3P_000264_01_0219,MAT_1848
150,1220,,FRAD094_3P_000264_01_0220,MAT_1848
151,1221,,FRAD094_3P_000264_01_0221,MAT_1848


In [15]:
from rdflib import Graph, Literal, Namespace, RDF, URIRef, BNode
from rdflib.namespace import XSD, DCTERMS
import uuid

# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/source/")
srctypeuri = Namespace("http://data.ign.fr/id/codes/cadastre/sourceType/")
g.bind('source', baseuri)
g.bind('srctype', srctypeuri)
# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
rico = Namespace("https://www.ica.org/standards/RiC/ontology#")
fpo = Namespace("https://github.com/johnBradley501/FPO/raw/master/fpo.owl#")
time = Namespace("http://www.w3.org/2006/time#")

g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

# Iterate over each row in the DataFrame
for index, row in folios_pages.iterrows():
    json = parse_record_id(row['Image'])
    MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]

    subject_uri = URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(row['Num_Folio_clean'])}")
    g.add((subject_uri, RDF.type, rico.RecordPart))
    g.add((subject_uri, cad.isSourceType, URIRef(srctypeuri.Folio)))
    g.add((subject_uri, cad.hasNumFolio, Literal(row["Num_Folio_clean"],datatype=XSD.string)))
    g.add((subject_uri, rico.isOrWasConstituentOf,URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_page")))

    if not pd.isna(row['Alt_Num_CF']):
        g.add((subject_uri, cad.hasAlternativeNumFolio, Literal(int(row["Alt_Num_CF"]),datatype=XSD.string)))

print(g.serialize(format='turtle'))

@prefix cad: <http://data.ign.fr/def/cadastre#> .
@prefix rico: <https://www.ica.org/standards/RiC/ontology#> .
@prefix source: <http://data.ign.fr/id/source/> .
@prefix srctype: <http://data.ign.fr/id/codes/cadastre/sourceType/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

source:94_Gentilly_MAT_B_NB_1813_107bis a rico:RecordPart ;
    cad:hasNumFolio "107bis"^^xsd:string ;
    cad:isSourceType srctype:Folio ;
    rico:isOrWasConstituentOf source:94_Gentilly_FRAD094_3P_000255_01_0125_page .

source:94_Gentilly_MAT_B_NB_1813_11 a rico:RecordPart ;
    cad:hasNumFolio "11"^^xsd:string ;
    cad:isSourceType srctype:Folio ;
    rico:isOrWasConstituentOf source:94_Gentilly_FRAD094_3P_000255_01_0015_page .

source:94_Gentilly_MAT_B_NB_1813_122bis a rico:RecordPart ;
    cad:hasNumFolio "122bis"^^xsd:string ;
    cad:isSourceType srctype:Folio ;
    rico:isOrWasConstituentOf source:94_Gentilly_FRAD094_3P_000255_01_0146_page .

source:94_Gentilly_MAT_B_NB_1813_124bis a rico:RecordPa

#### Création des folios issus de "Tiré de" et "Porté à" qui ne sont pas dans la colonne 'Num_Folios'

In [16]:
# Iterate over each row in the DataFrame
for index, row in folios.iterrows():
    if row['Tire_de_clean'] != 'EMPTY':
        ls = row['Tire_de_clean'].split(";")
        for l in ls:
            if any(num.isdigit() for num in l) and 'omission' not in l:
                MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]

                subject_uri = URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(l)}")
                g.add((subject_uri, RDF.type, rico.RecordPart))
                g.add((subject_uri, cad.isSourceType, URIRef(srctypeuri.Folio)))
                g.add((subject_uri, cad.hasNumFolio, Literal(l,datatype=XSD.string)))

In [17]:
# Iterate over each row in the DataFrame
for index, row in folios.iterrows():
    if row['Porte_a_clean'] != 'EMPTY':
        ls = row['Porte_a_clean'].split(";")
        for l in ls:
            #test if str has digit
            if any(num.isdigit() for num in l) and 'omission' not in l:
                MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]

                subject_uri = URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(l)}")
                g.add((subject_uri, RDF.type, rico.RecordPart))
                g.add((subject_uri, cad.isSourceType, URIRef(srctypeuri.Folio)))
                g.add((subject_uri, cad.hasNumFolio, Literal(l,datatype=XSD.string)))

In [18]:
g.serialize(destination=f"{OUTPUT_FOLDER_PATH}/{COMMUNE}_sources_folios.ttl", format='turtle')

<Graph identifier=Nb99c8760dced4444a3cae891d49f662c (<class 'rdflib.graph.Graph'>)>

### 1.3 Dates
Une date dans le cadastre correspond généralement une année.

Une cellule contient une date valide si :
* uniquement des chiffres
* 4 chiffres
* valeur située entre l'initialisation et la clôture de la matrice

### 1.4 Adresses

In [169]:
# Select the column as a new DataFrame
addresses = matrices[['registre','Lieu-dit_treated','Lieu-dit_type']].copy().drop_duplicates().reset_index(drop=True)
addresses

Unnamed: 0,registre,Lieu-dit_treated,Lieu-dit_type
0,MAT_1813,,
1,MAT_1813,Les Girantiers,District
2,MAT_1813,Rue Thiers,Thoroughfare
3,MAT_1813,Les Girantins,District
4,MAT_1813,Rue Thiers;10,Address
...,...,...,...
130,MAT_1848,Barrière d'Italie;8↑B↓,Address
131,MAT_1848,Barrière d'Italie;8↑A↓,Address
132,MAT_1848,Barrière d'Italie;6,Address
133,MAT_1848,Barrière d'Italie;4,Address


In [170]:
multipart_addresses_street = []
multipart_addresses_street_number = []
multipart_addresses_street_type = []
multipart_addresses_street_number_type = []


for index, row in addresses.iterrows():
    tag = str(row["Lieu-dit_treated"])
    if ';' in tag:
        add = tag.split(";")
        add_street_or_district = add[0]
        add_num_or_part = add[1]

        multipart_addresses_street.append(add_street_or_district)
        multipart_addresses_street_number.append(add_num_or_part)

        #test if digit
        if any(num.isdigit() for num in add_num_or_part):
            multipart_addresses_street_type.append('Thoroughfare')
            multipart_addresses_street_number_type.append('StreetNumber')
        else:
            multipart_addresses_street_type.append('District')
            multipart_addresses_street_number_type.append('Undefined')

    else:
        multipart_addresses_street.append('')
        multipart_addresses_street_number.append('')
        multipart_addresses_street_type.append('')
        multipart_addresses_street_number_type.append('')

addresses['part_street_district'] = multipart_addresses_street
addresses['part_street_number'] = multipart_addresses_street_number
addresses['part_street_district_type'] = multipart_addresses_street_type
addresses['part_street_number_type'] = multipart_addresses_street_number_type

#assign a distinct uuid for each group of rows with same values in Lieu-dit_treated and registre
addresses['address_uuid'] = [uuid.uuid4() for _ in range(len(addresses))]

addresses

Unnamed: 0,registre,Lieu-dit_treated,Lieu-dit_type,part_street_district,part_street_number,part_street_district_type,part_street_number_type,address_uuid
0,MAT_1813,,,,,,,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
1,MAT_1813,Les Girantiers,District,,,,,8a7fbafa-d1a0-4cb8-afe3-2a32282e2b2c
2,MAT_1813,Rue Thiers,Thoroughfare,,,,,c52565bf-9559-48a4-a36e-0f0050ca3d93
3,MAT_1813,Les Girantins,District,,,,,2f1ad073-443f-4ae9-a2bd-93919c73aebf
4,MAT_1813,Rue Thiers;10,Address,Rue Thiers,10,Thoroughfare,StreetNumber,ee3ae674-3fcc-4e88-9ee6-b8c468578e7c
...,...,...,...,...,...,...,...,...
130,MAT_1848,Barrière d'Italie;8↑B↓,Address,Barrière d'Italie,8↑B↓,Thoroughfare,StreetNumber,d57bcc51-9a54-4a98-8738-9cd74b3c6107
131,MAT_1848,Barrière d'Italie;8↑A↓,Address,Barrière d'Italie,8↑A↓,Thoroughfare,StreetNumber,098fa37a-d374-40c2-baac-d41fad6cd4ff
132,MAT_1848,Barrière d'Italie;6,Address,Barrière d'Italie,6,Thoroughfare,StreetNumber,6776e84e-9b64-42ca-be18-5169f989dc78
133,MAT_1848,Barrière d'Italie;4,Address,Barrière d'Italie,4,Thoroughfare,StreetNumber,c32e40f8-6d02-48b3-b37b-c9654f6e39b3


In [171]:
import pandas as pd
import uuid

# Create a new column 'part_street_district_uuid'
addresses['part_street_district_uuid'] = None

# Create a dictionary to store the uuid for each unique part_street_district
uuid_dict = {}

# Iterate over the DataFrame
for i, row in addresses.iterrows():
    if row['part_street_district'] != '':
        # Check if the part_street_district value is equal to one of the Lieu-dit_treated values
        if row['part_street_district'] in addresses['Lieu-dit_treated'].values:
            # If yes, set the part_street_district_uuid to the uuid of the retrieved Lieu-dit_treated
            addresses.loc[i, 'part_street_district_uuid'] = addresses.loc[addresses['Lieu-dit_treated'] == row['part_street_district'], 'address_uuid'].values[0]
        else:
            # If no, check if the part_street_district value has other occurrences in the part_street_district column
            if row['part_street_district'] in addresses['part_street_district'].values:
                # If yes, check if the part_street_district value is already in the uuid_dict
                if row['part_street_district'] in uuid_dict:
                    # If yes, assign the same uuid
                    addresses.loc[i, 'part_street_district_uuid'] = uuid_dict[row['part_street_district']]
                else:
                    # If no, create a new uuid and add it to the uuid_dict
                    new_uuid = uuid.uuid4()
                    uuid_dict[row['part_street_district']] = new_uuid
                    addresses.loc[i, 'part_street_district_uuid'] = new_uuid
            else:
                # If no, create a new uuid
                addresses.loc[i, 'part_street_district_uuid'] = uuid.uuid4()

In [172]:
addresses

Unnamed: 0,registre,Lieu-dit_treated,Lieu-dit_type,part_street_district,part_street_number,part_street_district_type,part_street_number_type,address_uuid,part_street_district_uuid
0,MAT_1813,,,,,,,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c,
1,MAT_1813,Les Girantiers,District,,,,,8a7fbafa-d1a0-4cb8-afe3-2a32282e2b2c,
2,MAT_1813,Rue Thiers,Thoroughfare,,,,,c52565bf-9559-48a4-a36e-0f0050ca3d93,
3,MAT_1813,Les Girantins,District,,,,,2f1ad073-443f-4ae9-a2bd-93919c73aebf,
4,MAT_1813,Rue Thiers;10,Address,Rue Thiers,10,Thoroughfare,StreetNumber,ee3ae674-3fcc-4e88-9ee6-b8c468578e7c,c52565bf-9559-48a4-a36e-0f0050ca3d93
...,...,...,...,...,...,...,...,...,...
130,MAT_1848,Barrière d'Italie;8↑B↓,Address,Barrière d'Italie,8↑B↓,Thoroughfare,StreetNumber,d57bcc51-9a54-4a98-8738-9cd74b3c6107,900688a4-04e4-4ada-813d-8ea3359f9dd3
131,MAT_1848,Barrière d'Italie;8↑A↓,Address,Barrière d'Italie,8↑A↓,Thoroughfare,StreetNumber,098fa37a-d374-40c2-baac-d41fad6cd4ff,900688a4-04e4-4ada-813d-8ea3359f9dd3
132,MAT_1848,Barrière d'Italie;6,Address,Barrière d'Italie,6,Thoroughfare,StreetNumber,6776e84e-9b64-42ca-be18-5169f989dc78,900688a4-04e4-4ada-813d-8ea3359f9dd3
133,MAT_1848,Barrière d'Italie;4,Address,Barrière d'Italie,4,Thoroughfare,StreetNumber,c32e40f8-6d02-48b3-b37b-c9654f6e39b3,900688a4-04e4-4ada-813d-8ea3359f9dd3


In [173]:
# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/landmark/")
srcuri = Namespace("http://data.ign.fr/id/source/")
cad_ltype = Namespace("http://data.ign.fr/def/cadastre/landmarkType/")
g.bind('landmark', baseuri)
g.bind('source', srcuri)
g.bind('cad_ltype', cad_ltype)

# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
atype = Namespace("http://rdf.geohistoricaldata.org/def/address/attributeType/")
ltype = Namespace("http://rdf.geohistoricaldata.org/def/address/landmarkType/")
lrtype = Namespace("http://rdf.geohistoricaldata.org/def/address/landmarkRelationType/")
rico = Namespace("https://www.ica.org/standards/RiC/ontology#")
fpo = Namespace("https://github.com/johnBradley501/FPO/raw/master/fpo.owl#")
time = Namespace("http://www.w3.org/2006/time#")
g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)
g.bind("ltype", ltype)
g.bind("lrtype", lrtype)
g.bind("atype", atype)

for index, row in addresses.iterrows():
    if row['Lieu-dit_treated'] != np.nan:
        subject_uri = URIRef(baseuri + str(row['address_uuid']))
        g.add((subject_uri, RDF.type, add.Landmark))

        if pd.notnull(addresses.loc[index, 'Lieu-dit_type']):
            if ';' in row['Lieu-dit_treated']:
                name = row['Lieu-dit_treated'].split(";")

                g.add((subject_uri, add.isLandmarkType, URIRef(ltype + row['part_street_number_type'])))

                if any(num.isdigit() for num in name[1]):
                    g.add((subject_uri, RDFS.label, Literal(name[1] + ' (' + name[0] + ', ' + COMMUNE + ')', datatype=XSD.string)))
                    relationode = BNode()
                    g.add((URIRef(relationode.n3()), add.isLandmarkRelationType, lrtype.Along))
                else:
                    g.add((subject_uri, RDFS.label, Literal(name[0] + ' (' + name[1] + ', ' + COMMUNE + ')', datatype=XSD.string)))
                    relationode = BNode()
                    g.add((URIRef(relationode.n3()), add.isLandmarkRelationType, lrtype.Undefined))

                g.add((URIRef(relationode.n3()), RDF.type, add.LandmarkRelation))
                g.add((URIRef(relationode.n3()), add.locatum, subject_uri))
                g.add((URIRef(relationode.n3()), add.relatum, URIRef(baseuri + str(row['part_street_district_uuid']))))

                #Street or district relation with section
                sectionNode = BNode()
                g.add((URIRef(sectionNode.n3()), RDF.type, add.LandmarkRelation))
                g.add((URIRef(sectionNode.n3()), add.isLandmarkRelationType, lrtype.Within))
                g.add((URIRef(sectionNode.n3()), add.locatum, URIRef(baseuri + str(row['part_street_district_uuid']))))
                if row['registre'] != 'MAT_1848':
                    g.add((URIRef(sectionNode.n3()), add.relatum, URIRef(baseuri + 'da6a5c2c-e86d-43bb-8950-7169bd0df60a'))) #Section D Cadastre 1848
                else:
                    g.add((URIRef(sectionNode.n3()), add.relatum, URIRef(baseuri + '87d7c2f6-306b-45a1-a833-5e17821c3102'))) #Section B Cadastre 1811

            else:
                g.add((subject_uri, add.isLandmarkType, URIRef(ltype + row['Lieu-dit_type'])))
                g.add((subject_uri, RDFS.label, Literal(row['Lieu-dit_treated'] + ', ' + COMMUNE, datatype=XSD.string)))

                relationode = BNode()
                g.add((URIRef(relationode.n3()), add.isLandmarkRelationType, lrtype.Within))
                g.add((URIRef(relationode.n3()), add.locatum, subject_uri))

                sectionNode = BNode()
                g.add((URIRef(sectionNode.n3()), RDF.type, add.LandmarkRelation))
                g.add((URIRef(sectionNode.n3()), add.isLandmarkRelationType, lrtype.Within))
                g.add((URIRef(sectionNode.n3()), add.locatum, subject_uri))
                if row['registre'] != 'MAT_1848':
                    g.add((URIRef(sectionNode.n3()), add.relatum, URIRef(baseuri + 'da6a5c2c-e86d-43bb-8950-7169bd0df60a'))) #Section D Cadastre 1848
                else:
                    g.add((URIRef(sectionNode.n3()), add.relatum, URIRef(baseuri + '87d7c2f6-306b-45a1-a833-5e17821c3102'))) #Section B Cadastre 1811

        g.add((subject_uri, cad.sourcedFrom, URIRef(srcuri + f'94_{COMMUNE}_{row["registre"]}')))


print(g.serialize(format='turtle'))
g.serialize(destination=f"{OUTPUT_FOLDER_PATH}/{COMMUNE}_landmarks_lieu_dit.ttl", format='turtle')

@prefix add: <http://rdf.geohistoricaldata.org/def/address#> .
@prefix cad: <http://data.ign.fr/def/cadastre#> .
@prefix landmark: <http://data.ign.fr/id/landmark/> .
@prefix lrtype: <http://rdf.geohistoricaldata.org/def/address/landmarkRelationType/> .
@prefix ltype: <http://rdf.geohistoricaldata.org/def/address/landmarkType/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix source: <http://data.ign.fr/id/source/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<_:N001f3c9f321d40619ec31256f8ec4048> a add:LandmarkRelation ;
    add:isLandmarkRelationType lrtype:Along ;
    add:locatum landmark:684193c6-b9d1-4505-8c74-099b4a8de943 ;
    add:relatum landmark:3153cb63-4f5c-4e7f-815d-7e9f083fcb96 .

<_:N037bc1114a05412292067b6547c43059> add:isLandmarkRelationType lrtype:Within ;
    add:locatum landmark:900688a4-04e4-4ada-813d-8ea3359f9dd3 .

<_:N04f7225182504102b5252a57034b725d> a add:LandmarkRelation ;
    add:isLandmarkRelationType lrtype:Within ;
    add:locatum 

<Graph identifier=N7a8c90e70d8e4714bcb4d654c4987a15 (<class 'rdflib.graph.Graph'>)>

In [174]:
#Join Address/District UUID to the main matrices table
subaddresses = addresses[['address_uuid','registre','Lieu-dit_treated']].copy()
#join subaddresses['uuid'] to matrices on ['registre','Lieu-dit_treated'] rename the column to 'address_uuid'
matrices = pd.merge(matrices, subaddresses, how='left', left_on=['registre','Lieu-dit_treated'], right_on=['registre','Lieu-dit_treated'])
matrices

Unnamed: 0,ID,UUID,Type_CF,Num_Folio,Alt_Num_CF,Groupe CF,Ordre de lecture,Voie,Num_Voie,Image,...,Tiré de_treated,Porté à,Porté à_treated,Ligne barrée ?,CF rayé ?,Spécification,Commentaire,Cote liée,registre,address_uuid
0,1,b2b478a0-7b8f-4715-84c4-bcd5112e41eb,Bâti,108,,1,1,,,FRAD094_3P_000255_01_0586,...,,,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
1,2,6af95196-31c4-49cc-914a-ed618b0c6646,Bâti,114,,1,1,,,FRAD094_3P_000255_01_0588,...,,82,82,Oui,Oui,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
2,3,afb83e4f-2351-42ca-af63-14ccd79c5621,Bâti,82,,1,1,,,FRAD094_3P_000255_01_0579,...,114,,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
3,4,432f72f1-ba8a-453f-90ec-79b2b89e8592,Bâti,82,,1,2,,,FRAD094_3P_000255_01_0579,...,,,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
4,5,2d3be415-19f5-4684-996e-5fb3ccc434bf,Bâti,64,,1,1,,,FRAD094_3P_000255_01_0571,...,,,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
437,219,e0ac6603-2873-4ce4-8eff-855207af9abe,Non Bâti,1221,,1,6,,,FRAD094_3P_000264_01_0221,...,,,,Non,Non,,,,MAT_1848,c32e40f8-6d02-48b3-b37b-c9654f6e39b3
438,220,8bb35f06-61a6-4ef0-8e33-613cab5c70a1,Non Bâti,1221,,1,7,,,FRAD094_3P_000264_01_0221,...,,,,Non,Non,,,,MAT_1848,c32e40f8-6d02-48b3-b37b-c9654f6e39b3
439,221,3779a0f9-d748-4725-bab6-27ae5c56bad1,Non Bâti,1222,,1,1,,,FRAD094_3P_000264_01_0222,...,,,,Non,Non,,,,MAT_1848,d09000f9-6cb0-450b-9d2b-b86c6cfd51c6
440,222,86f61693-f811-4f84-8f35-0c64b3b523ce,Non Bâti,1222,,1,2,,,FRAD094_3P_000264_01_0222,...,,,,Non,Non,,,,MAT_1848,d09000f9-6cb0-450b-9d2b-b86c6cfd51c6


### 1.5 Propriétaires

In [768]:
owners_df.columns
owners_df['groupe_cf'] = owners_df['groupe_cf'].astype(str)
matrices['Groupe CF'] = matrices['Groupe CF'].astype(str)

In [769]:
matrices = matrices.merge(owners_df, how='left', right_on=["registre","type_folio","folio",'groupe_cf'], left_on=["registre","Type_CF","Num_Folio","Groupe CF"])
matrices = matrices.drop(["type_folio","folio",'groupe_cf',"transcription","cell"], axis=1).reset_index(drop=True)
matrices['CF_uuid'] = [uuid.uuid4() for _ in range(len(matrices))]
matrices['CF_uuid'] = matrices.groupby(['registre', 'Num_Folio_clean','Type_CF','Groupe CF'])['CF_uuid'].transform(lambda x: uuid.uuid4())
matrices

Unnamed: 0,ID,UUID,Type_CF,Num_Folio,Alt_Num_CF,Groupe CF,Ordre de lecture,Voie,Num_Voie,Image,...,Cote liée,registre,Num_Folio_clean,Tire_de_clean,Porte_a_clean,Lieu-dit_treated,Lieu-dit_uuid,owners,changes,CF_uuid
0,1,b14e16a7-54a9-472b-9e15-d71d1cde7b39,Bâti,108,,1,1,,,FRAD094_3P_000255_01_0586,...,,MAT_1813,108,EMPTY,EMPTY,,2b541f04-1541-4770-bef0-da174f89cccd,"[{'owner-id': 1, 'owner-lastname': 'Legendre',...",[],92c2883e-f5a6-44c5-baf8-2213360fc76e
1,2,1d704267-987d-4a0a-89bd-a96620da39e5,Bâti,114,,1,1,,,FRAD094_3P_000255_01_0588,...,,MAT_1813,114,EMPTY,82,,2b541f04-1541-4770-bef0-da174f89cccd,"[{'owner-id': 1, 'owner-lastname': 'Louves', '...",[],247c086f-4780-4cc9-b1d5-9a043dd095f4
2,3,622eb82c-1332-4be8-a0fc-805ff7eb3421,Bâti,82,,1,1,,,FRAD094_3P_000255_01_0579,...,,MAT_1813,82,114,EMPTY,,2b541f04-1541-4770-bef0-da174f89cccd,"[{'owner-id': 1, 'owner-lastname': 'Hardon', '...",[],fbb82503-ba83-49eb-8c1e-0b4649042cf0
3,4,03456c67-aff8-49ed-9291-d4c27aa28373,Bâti,82,,1,2,,,FRAD094_3P_000255_01_0579,...,,MAT_1813,82,EMPTY,EMPTY,,2b541f04-1541-4770-bef0-da174f89cccd,"[{'owner-id': 1, 'owner-lastname': 'Hardon', '...",[],fbb82503-ba83-49eb-8c1e-0b4649042cf0
4,5,2ecd32f7-5c98-4a74-b94e-ccd28464bfe0,Bâti,64,,1,1,,,FRAD094_3P_000255_01_0571,...,,MAT_1813,64,EMPTY,EMPTY,,2b541f04-1541-4770-bef0-da174f89cccd,,,1444f2d6-a309-418d-bddc-2cecc18b0599
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
430,209,6a3147f9-1fa1-46be-adc9-ba4eb0832b55,Non Bâti,1219,,1,1,,,FRAD094_3P_000264_01_0219,...,,MAT_1848,1219,EMPTY,EMPTY,bre ditie mon blanche 8a,c52ad1a4-8b22-450c-919e-3c140f78288c,"[{'owner-id': 1, 'owner-lastname': 'Delon', 'i...",[],f0c06272-77de-42f4-bfda-52799de54b67
431,210,8dfa4b95-7ef2-41aa-8d0b-59bc7f00ac78,Non Bâti,1219,,1,2,,,FRAD094_3P_000264_01_0219,...,,MAT_1848,1219,EMPTY,EMPTY,bre ditie mon blanche 8a,c52ad1a4-8b22-450c-919e-3c140f78288c,"[{'owner-id': 1, 'owner-lastname': 'Delon', 'i...",[],f0c06272-77de-42f4-bfda-52799de54b67
432,211,f6323014-f334-44ae-8135-5af77ffd4296,Non Bâti,1220,,1,1,,,FRAD094_3P_000264_01_0220,...,,MAT_1848,1220,EMPTY,EMPTY,bre ditie mon blanche 6,9e918682-d76e-46d4-8917-3d9f4cc82b6d,"[{'owner-id': 1, 'owner-lastname': 'Dufresne',...",[],24527afd-27b9-4ca6-bf65-6b3d63ce183a
433,212,94ffd0e2-2f07-40f0-9606-c54785cb161d,Non Bâti,1220,,1,2,,,FRAD094_3P_000264_01_0220,...,,MAT_1848,1220,EMPTY,EMPTY,bre ditie mon blanche 6,9e918682-d76e-46d4-8917-3d9f4cc82b6d,"[{'owner-id': 1, 'owner-lastname': 'Dufresne',...",[],24527afd-27b9-4ca6-bf65-6b3d63ce183a


* Créer les comptes fonciers
* Associer à chaque compte foncier ses propriétaires (ordonnés dans le temps)
* Associer à chaque compte foncier le landmark (état) qu'il mentionne

In [772]:
# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/landmark/")
srcuri = Namespace("http://data.ign.fr/id/source/")
cad_ltype = Namespace("http://data.ign.fr/def/cadastre/landmarkType/")
cad_atype = Namespace("http://data.ign.fr/def/cadastre/attributeType/")
srctype = Namespace("http://data.ign.fr/id/codes/cadastre/sourceType/")
mlclasse= Namespace("http://data.ign.fr/id/codes/cadastre/mlClasse/")

g.bind('landmark', baseuri)
g.bind('source', srcuri)
g.bind('cad_ltype', cad_ltype)
g.bind('srctype', srctype)
g.bind('mlclasse', mlclasse)

for index, row in matrices.iterrows():
    json = parse_record_id(row["Image"])
    lineuuid_ = MATRICE_ID + '_' + str(row['ID']) #str(uuid.uuid4())

    subject_uri = URIRef(srcuri + f"{row['CF_uuid']}")
    g.add((subject_uri, RDF.type, rico.RecordPart))
    g.add((subject_uri, cad.isSourceType, URIRef(srctype.CompteFoncier)))
    g.add((subject_uri, rico.hasOrHadConstituent, Literal(srcuri + f"{row['UUID']}")))
    g.add((subject_uri, rico.hasOrWasConstituentOf, URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(row['Num_Folio_clean'])}")))
    g.add((subject_uri, rico.hasOrHadConstituent, Literal(srcuri + f"{row['CF_uuid']}_mutations")))

    articlemutationuri = URIRef(srcuri + f"{row['CF_uuid']}_mutations")
    g.add((articlemutationuri, RDF.type, rico.RecordPart))
    g.add((articlemutationuri, cad.isSourceType, URIRef(srctype.ArticleDeMutation)))

    ownersattribute = BNode()
    g.add((ownersattribute, RDF.type, add.Attribute))
    g.add((subject_uri, cad.hasCadastreAttribute, ownersattribute))
    g.add((ownersattribute, add.hasAttributeType, URIRef(cad_atype.PlotTaxpayer)))
    print(row['owners'])


    articleclassementuri = URIRef(srcuri + f"{row['UUID']}")
    g.add((articleclassementuri, RDF.type, rico.RecordPart))
    g.add((articleclassementuri, cad.isSourceType, URIRef(srctype.ArticleDeClassement)))

[{'owner-id': 1, 'owner-lastname': 'Legendre', 'owner-address': 'H↑re↓ de →Fontainebleau', 'is-stripped': 'no'}]
[{'owner-id': 1, 'owner-lastname': 'Louves', 'is-stripped': 'no'}]
[{'owner-id': 1, 'owner-lastname': 'Hardon', 'owner-job': 'Bourgeois', 'owner-activity': 'nourisseur', 'owner-address': 'à Paris', 'is-stripped': 'no'}]
[{'owner-id': 1, 'owner-lastname': 'Hardon', 'owner-job': 'Bourgeois', 'owner-activity': 'nourisseur', 'owner-address': 'à Paris', 'is-stripped': 'no'}]
nan
[{'owner-id': 1, 'owner-lastname': 'Bacot', 'owner-firstname': 'David', 'owner-activity': 'couverturier', 'owner-address': 'à Paris', 'is-stripped': 'no'}]
[{'owner-id': 1, 'owner-lastname': 'Bacot', 'owner-firstname': 'David', 'owner-activity': 'couverturier', 'owner-address': 'à Paris', 'is-stripped': 'no'}]
[{'owner-id': 1, 'owner-lastname': 'Bacot', 'owner-firstname': 'David', 'owner-activity': 'couverturier', 'owner-address': 'à Paris', 'is-stripped': 'no'}]
[{'owner-id': 1, 'owner-lastname': 'Besson

### 1.X Création des états de parcelles

In [177]:
matrices

Unnamed: 0,ID,UUID,Type_CF,Num_Folio,Alt_Num_CF,Groupe CF,Ordre de lecture,Voie,Num_Voie,Image,...,Tiré de_treated,Porté à,Porté à_treated,Ligne barrée ?,CF rayé ?,Spécification,Commentaire,Cote liée,registre,address_uuid
0,1,b2b478a0-7b8f-4715-84c4-bcd5112e41eb,Bâti,108,,1,1,,,FRAD094_3P_000255_01_0586,...,,,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
1,2,6af95196-31c4-49cc-914a-ed618b0c6646,Bâti,114,,1,1,,,FRAD094_3P_000255_01_0588,...,,82,82,Oui,Oui,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
2,3,afb83e4f-2351-42ca-af63-14ccd79c5621,Bâti,82,,1,1,,,FRAD094_3P_000255_01_0579,...,114,,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
3,4,432f72f1-ba8a-453f-90ec-79b2b89e8592,Bâti,82,,1,2,,,FRAD094_3P_000255_01_0579,...,,,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
4,5,2d3be415-19f5-4684-996e-5fb3ccc434bf,Bâti,64,,1,1,,,FRAD094_3P_000255_01_0571,...,,,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,5e646a3e-1f6f-416d-b11f-3a653b4fbc6c
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
437,219,e0ac6603-2873-4ce4-8eff-855207af9abe,Non Bâti,1221,,1,6,,,FRAD094_3P_000264_01_0221,...,,,,Non,Non,,,,MAT_1848,c32e40f8-6d02-48b3-b37b-c9654f6e39b3
438,220,8bb35f06-61a6-4ef0-8e33-613cab5c70a1,Non Bâti,1221,,1,7,,,FRAD094_3P_000264_01_0221,...,,,,Non,Non,,,,MAT_1848,c32e40f8-6d02-48b3-b37b-c9654f6e39b3
439,221,3779a0f9-d748-4725-bab6-27ae5c56bad1,Non Bâti,1222,,1,1,,,FRAD094_3P_000264_01_0222,...,,,,Non,Non,,,,MAT_1848,d09000f9-6cb0-450b-9d2b-b86c6cfd51c6
440,222,86f61693-f811-4f84-8f35-0c64b3b523ce,Non Bâti,1222,,1,2,,,FRAD094_3P_000264_01_0222,...,,,,Non,Non,,,,MAT_1848,d09000f9-6cb0-450b-9d2b-b86c6cfd51c6


In [176]:
matrices.columns

Index(['ID', 'UUID', 'Type_CF', 'Num_Folio', 'Alt_Num_CF', 'Groupe CF',
       'Ordre de lecture', 'Voie', 'Num_Voie', 'Image', 'Section_clean',
       'Parcelle_clean', 'Lieu-dit_transcript', 'Lieu-dit_clean',
       'Lieu-dit_treated', 'Lieu-dit_type', 'Propriétaires_transcript',
       'Nature_transcript', 'Nature_clean', 'Nature_treated', 'Date entrée',
       'Date entrée_treated', 'Date sortie', 'Date sortie_treated', 'Tiré de',
       'Tiré de_treated', 'Porté à', 'Porté à_treated', 'Ligne barrée ?',
       'CF rayé ?', 'Spécification', 'Commentaire', 'Cote liée', 'registre',
       'address_uuid'],
      dtype='object')

In [194]:
# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/landmark/")
srcuri = Namespace("http://data.ign.fr/id/source/")
owneruri = Namespace("http://data.ign.fr/id/owner/")
cad_ltype = Namespace("http://data.ign.fr/def/cadastre/landmarkType/")
cad_atype = Namespace("http://data.ign.fr/def/cadastre/attributeType/")
lrtype = Namespace("http://rdf.geohistoricaldata.org/def/address#")
g.bind('landmark', baseuri)
g.bind('owner', owneruri)
g.bind('source', srcuri)
g.bind('cad_ltype', cad_ltype)
g.bind('cad_atype', cad_atype)

# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
rico = Namespace("https://www.ica.org/standards/RiC/ontology#")
fpo = Namespace("https://github.com/johnBradley501/FPO/raw/master/fpo.owl#")
time = Namespace("http://www.w3.org/2006/time#")

g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

# Iterate over each row in the DataFrame
for index, row in matrices.iterrows():

    json = parse_record_id(row["Image"])
    MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]
    PLAN = matrices_metada[row['registre']]["PLAN"]
    lineuuid_ = MATRICE_ID + '_' + str(row['ID']) #str(uuid.uuid4())

    subject_uri = URIRef(baseuri + f"{row['UUID']}")
    g.add((subject_uri, RDF.type, add.Landmark))
    g.add((subject_uri, add.isLandmarkType, cad_ltype.Plot))
    #g.add((subject_uri, DCTERMS.identifier, Literal(row['Section_clean'] + '-' + row['Parcelle_clean'], datatype=XSD.string)))

    #Folios
    g.add((subject_uri, cad.hasNumFolio, URIRef(foliouri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(row['Num_Folio'])}")))
    tire_de = str(row['Tiré de_treated']).split(',')
    for f in tire_de:
        if any(char.isdigit() for char in str(f)):
            g.add((subject_uri, cad.takenFrom, URIRef(foliouri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(f)}")))
        elif f != 'nan':
            g.add((subject_uri, cad.takenFrom, Literal(str(f))))
    porte_a = str(row['Porté à_treated']).split(',')
    for f in porte_a:
        if any(char.isdigit() for char in str(f)):
            g.add((subject_uri, cad.passedTo, URIRef(foliouri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(f)}")))
        elif f != 'nan':
            g.add((subject_uri, cad.passedTo, Literal(str(f))))

    #Source
    rowSource = BNode()
    g.add((subject_uri, fpo.sourcedFrom, rowSource))
    g.add((rowSource, RDF.type, fpo.SourceCitation))
    g.add((rowSource, fpo.fromSource, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}")))
    g.add((rowSource, rico.isComponentOfTransitive, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}")))
    g.add((rowSource, cad.hasExtractionID, Literal(lineuuid_)))
    g.add((rowSource, PROV.wasGeneratedBy, URIRef(f"http://data.ign.fr/id/codes/cadastre/activity/0002")))
    g.add((URIRef(f"http://data.ign.fr/id/codes/cadastre/activity/0002"), PROV.used, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}")))
    g.add((rowSource, rico.isOrWasDigitalInstanciation,URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_{lineuuid_}_area")))
    g.add((rowSource, cad.lineOrderInArea, Literal(row['Ordre de lecture'], datatype=XSD.integer)))

    #Create recordpart
    recordparturi = URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_{lineuuid_}_area")
    g.add((recordparturi, RDF.type, rico.RecordPart))
    g.add((recordparturi, rico.isOrWasIncludedIn, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_page")))
    
    #Address
    if row['Lieu-dit_treated'] != 'nan':
        plotaddress = BNode()
        g.add((subject_uri, add.hasAttribute, plotaddress))
        g.add((plotaddress, add.isAttributeType, cad_atype.PlotAddress))
        plotaddressversion = BNode()
        g.add((plotaddress, add.hasAttributeVersion, plotaddressversion))
        g.add((plotaddressversion, RDF.type, add.LandmarkRelation))
        g.add((plotaddressversion, add.isLandmarkRelationType, lrtype.Undefined))
        g.add((plotaddressversion, add.locatum, subject_uri))
        g.add((plotaddressversion, add.relatum, URIRef(baseuri + str(row['address_uuid']))))

    #Owner

    #Nature
    if not pd.isnull(row['Nature_treated']):
        nature = BNode()
        g.add((subject_uri, add.hasNature, nature))
        g.add((nature, RDF.type, add.Nature))
        g.add((nature, RDFS.label, Literal(row['Nature_treated'], datatype=XSD.string)))
    
    #Time
    if not pd.isnull(row['Date entrée']) or not pd.isnull(['Date sortie']):
        hastime = BNode()
        g.add((subject_uri, add.hasTime, hastime))
        g.add((hastime, RDF.type, add.TimeInterval))
        
        if not pd.isna(row['Date entrée']) and not pd.isnull(row['Date entrée']):
            hasbeginning = BNode()
            g.add((hastime, add.hasBeginning, hasbeginning))
            g.add((hasbeginning,RDF.type, add.TimeInstant))
            g.add((hasbeginning, add.timeCalendar, time.Gregorian))
            g.add((hasbeginning, add.timePrecision, time.Year))
            g.add((hasbeginning, add.timeStamp, Literal(row['Date entrée'], datatype=XSD.date)))
        #else:
            #g.add((hastime, add.hasBeginning, Literal(MATRICE_START, datatype=XSD.date)))#date d'ouverture de la matrice
        if not pd.isnull(row['Date sortie']) and row['Date sortie'] != 'nan':
            hasend = BNode()
            g.add((hastime, add.hasBeginning, hasend))
            g.add((hasend,RDF.type, add.TimeInstant))
            g.add((hasend, add.timeCalendar, time.Gregorian))
            g.add((hasend, add.timePrecision, time.Year))
            g.add((hasend, add.timeStamp, Literal(row['Date entrée'], datatype=XSD.date)))
print(g.serialize(format='turtle'))

Failed to convert Literal lexical form to value. Datatype=http://www.w3.org/2001/XMLSchema#date, Converter=<function parse_date at 0x79cff59e2950>
Traceback (most recent call last):
  File "/home/codespace/.python/current/lib/python3.10/site-packages/rdflib/term.py", line 2119, in _castLexicalToPython
    return conv_func(lexical)  # type: ignore[arg-type]
  File "/home/codespace/.python/current/lib/python3.10/site-packages/isodate/isodates.py", line 203, in parse_date
    raise ISO8601Error('Unrecognised ISO 8601 date format: %r' % datestring)
isodate.isoerror.ISO8601Error: Unrecognised ISO 8601 date format: 'id'
Failed to convert Literal lexical form to value. Datatype=http://www.w3.org/2001/XMLSchema#date, Converter=<function parse_date at 0x79cff59e2950>
Traceback (most recent call last):
  File "/home/codespace/.python/current/lib/python3.10/site-packages/rdflib/term.py", line 2119, in _castLexicalToPython
    return conv_func(lexical)  # type: ignore[arg-type]
  File "/home/codes

http://data.ign.fr/id/folio/94_Gentilly_MAT_NB_1836_450 443 453→968 does not look like a valid URI, trying to serialize this will break.


Exception: "http://data.ign.fr/id/folio/94_Gentilly_MAT_NB_1836_OmissionSV{{1830}}" does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?