# Création des resources RDF des mentions de parcelles

In [664]:
import pandas as pd
import numpy as np
import uuid
import re
from rdflib import Graph, Literal, Namespace, RDF, URIRef, BNode
from rdflib.namespace import XSD, DCTERMS, PROV, SKOS, RDFS
from functions import *

## 1. Matrice de 1813

In [665]:
COMMUNE = 'Gentilly'
matrices_metada = {
    "MAT_1813": {
        "PLAN": "1811",
        "MATRICE_ID": "MAT_B_NB_1813",
        "MATRICE_START": "1813",
        "MATRICE_END": "1835"
    },
    "MAT_1836": {
        "PLAN": "1811",
        "MATRICE_ID": "MAT_NB_1836",
        "MATRICE_START": "1836",
        "MATRICE_END": "1847"
    },
    "MAT_1848": {
        "PLAN": "1845",
        "MATRICE_ID": "MAT_NB_1848",
        "MATRICE_START": "1848",
        "MATRICE_END": "1860"
    }
}


In [666]:
OUTPUT_FOLDER_PATH = "/home/STual/KG-cadastre/data/rdf"

PATH = "/home/STual/KG-cadastre/data/gentilly/MAT_1813.csv"
mat1813 = pd.read_csv(PATH,header=0)
PATH = "/home/STual/KG-cadastre/data/gentilly/MAT_1836.csv"
mat1836 = pd.read_csv(PATH,header=0)
PATH = "/home/STual/KG-cadastre/data/gentilly/MAT_1848.csv"
mat1848 = pd.read_csv(PATH,header=0)

In [667]:
mat1813['registre'] = 'MAT_1813'
mat1836['registre'] = 'MAT_1836'
mat1848['registre'] = 'MAT_1848'

matrices = pd.concat([mat1813, mat1836, mat1848])
matrices = matrices.reset_index(drop=True)
print(matrices.columns)

Index(['ID', 'Type_CF', 'Num_Folio', 'Alt_Num_CF', 'Groupe CF',
       'Ordre de lecture', 'Voie', 'Num_Voie', 'Image', 'Section_clean',
       'Parcelle_clean', 'Lieu-dit_transcript', 'Lieu-dit_clean',
       'Propriétaires_transcript', 'Nature_transcript', 'Nature_clean',
       'Nature_treated', 'Date entrée', 'Date entrée_treated', 'Date sortie',
       'Date sortie_treated', 'Tiré de', 'Tiré de_treated', 'Porté à',
       'Porté à_treated', 'Ligne barrée ?', 'CF rayé ?', 'Spécification',
       'Commentaire', 'Cote liée', 'registre'],
      dtype='object')


### 1.1. Création des pages
- rdf:type rico:Instanciation : instance numérisée d'une page de registre
- rdf:type rico:Record => concept de la page de registre, fait le lien avec le registre (concept, RecordSet)

In [668]:
#select distinct values in th colum Image
images = matrices[['registre','Image']].drop_duplicates()
images

Unnamed: 0,registre,Image
0,MAT_1813,FRAD094_3P_000255_01_0586
1,MAT_1813,FRAD094_3P_000255_01_0588
2,MAT_1813,FRAD094_3P_000255_01_0579
4,MAT_1813,FRAD094_3P_000255_01_0571
5,MAT_1813,FRAD094_3P_000255_01_0015
...,...,...
415,MAT_1848,FRAD094_3P_000264_01_0216
418,MAT_1848,FRAD094_3P_000264_01_0217
425,MAT_1848,FRAD094_3P_000264_01_0218
427,MAT_1848,FRAD094_3P_000264_01_0219


In [669]:
# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/source/")
mlclasse = Namespace("http://data.ign.fr/id/codes/cadastre/mlClasse/")
g.bind('source', baseuri)
g.bind('mlclasse', mlclasse)

# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
rico = Namespace("https://www.ica.org/standards/RiC/ontology#")
fpo = Namespace("https://github.com/johnBradley501/FPO/raw/master/fpo.owl#")
time = Namespace("http://www.w3.org/2006/time#")

g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

for index, row in images.iterrows():
    img = row['Image']
    MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]
    json = parse_record_id(img)
    subject_uri = URIRef(baseuri + f"{img}")
    g.add((subject_uri, RDF.type, rico.Instanciation))
    g.add((subject_uri, rico.identifier, Literal(img)))
    mlClasseNode = BNode()
    g.add((subject_uri, cad.hasClasse, mlClasseNode))
    g.add((mlClasseNode, cad.hasClasseValue, URIRef(mlclasse + f"MATMainTable")))
    g.add((mlClasseNode, PROV.wasGeneratedBy, URIRef(f"http://data.ign.fr/id/codes/cadastre/activity/0001")))
    folder_end = img.rfind('_')
    g.add((subject_uri,rico.isOrWasDigitalInstanciationOf,URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{img}_page")))

    subject_uri_record = URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_page")
    g.add((subject_uri_record, RDF.type, rico.Record))
    g.add((subject_uri_record, rico.isOrWasIncludedIn, URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}")))

print(g.serialize(format='turtle'))
#write g into a .ttl file
g.serialize(destination=f"{OUTPUT_FOLDER_PATH}/{COMMUNE}_SOURCES_Images.ttl", format='turtle')

@prefix cad: <http://data.ign.fr/def/cadastre#> .
@prefix mlclasse: <http://data.ign.fr/id/codes/cadastre/mlClasse/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rico: <https://www.ica.org/standards/RiC/ontology#> .
@prefix source: <http://data.ign.fr/id/source/> .

source:FRAD094_3P_000255_01_0015 a rico:Instanciation ;
    cad:hasClasse [ cad:hasClasseValue mlclasse:MATMainTable ;
            prov:wasGeneratedBy <http://data.ign.fr/id/codes/cadastre/activity/0001> ] ;
    rico:identifier "FRAD094_3P_000255_01_0015" ;
    rico:isOrWasDigitalInstanciationOf source:94_Gentilly_FRAD094_3P_000255_01_0015_page .

source:FRAD094_3P_000255_01_0032 a rico:Instanciation ;
    cad:hasClasse [ cad:hasClasseValue mlclasse:MATMainTable ;
            prov:wasGeneratedBy <http://data.ign.fr/id/codes/cadastre/activity/0001> ] ;
    rico:identifier "FRAD094_3P_000255_01_0032" ;
    rico:isOrWasDigitalInstanciationOf source:94_Gentilly_FRAD094_3P_000255_01_0032_page .

source:FRAD094_3P_00025

<Graph identifier=N45c0becbf3ae412aab5ee23536b08f3d (<class 'rdflib.graph.Graph'>)>

### 1.2 Folios
- Pré-traitement des colonnes *Num_Folio*, *Tiré de* et *Porté à*
- Création des objets "Folios" à partir de la colonne *Num_Folio* et des colonnes *Tiré de* et *Porté à* (manquants)
- Création des objets spéciaux mentionnés dans les colonnes destinées aux folios (reste, construction nouvelle, ruine etc)

#### Pré-traitement

In [670]:
from functions import parse_record_id, cleanNumFolio

#Clean columns Num_Folio, Tire_de, Porte_a
clean_folio, clean_tire_de, clean_porte_a = [], [], []
symbols = [",", "→", "."," ",";","&"]

for index, row in matrices.iterrows():
    clean_folio.append(cleanNumFolio(row["Num_Folio"],symbols))
    clean_tire_de.append(cleanNumFolio(row["Tiré de_treated"],symbols))
    clean_porte_a.append(cleanNumFolio(row["Porté à_treated"],symbols))

# Create new columns containing the cleaned values
matrices['Num_Folio_clean'] = clean_folio
matrices['Tire_de_clean'] = clean_tire_de
matrices['Porte_a_clean'] = clean_porte_a

['236↑4↓', '361', '258', '166', '235↑2↓', '138', '357', '440']
['248', 'additionconstructionsv']
['249', '249']
['288', '', '433']
['443', '443']
['450', '443', '453', '968']
['449', '968']
['968', '442']
['442', '449', '439', '450', '']
['836', '403']


In [671]:
print(clean_porte_a)

['EMPTY', '82', 'EMPTY', 'EMPTY', 'EMPTY', '156', '156', '192bis', '280bis', 'EMPTY', 'EMPTY', '34ter', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '124bis', 'EMPTY', 'EMPTY', '192bis', '247ter', 'voiepubliquesv', 'EMPTY', 'EMPTY', '46bis', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '211bis', '107↑2↓', 'EMPTY', 'doubleemploisv', 'EMPTY', 'EMPTY', '269↑2↓', '138', '236ter', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '361', 'EMPTY', 'EMPTY', 'EMPTY', '236↑4↓;361;258;166;235↑2↓;138;357;440', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '249', 'EMPTY', 'EMPTY', '248', 'EMPTY', '249;249', 'EMPTY', 'EMPTY', '249↑16↓', '249↑16↓', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'demolitionsv', 'EMPTY', 'EMPTY', 'demolitionsv', '288;433', 'EMPTY', 'EMPTY', 'EMPTY', 'augmentationsv', 'EMPTY', 'EMPTY', 'EMPTY

In [672]:
#Using matrices, create new df named folios containing all lines of matrices where register = MAT_1836 and MAT_1848. For register=MAT_1813, remove the lines where type_CF = "Bâti"
folios = matrices[(matrices['registre'] == 'MAT_1836') | (matrices['registre'] == 'MAT_1848') | ((matrices['registre'] == 'MAT_1813') & (matrices['Type_CF'] != 'Bâti'))]
folios.reset_index(drop=True)

Unnamed: 0,ID,Type_CF,Num_Folio,Alt_Num_CF,Groupe CF,Ordre de lecture,Voie,Num_Voie,Image,Section_clean,...,Porté à_treated,Ligne barrée ?,CF rayé ?,Spécification,Commentaire,Cote liée,registre,Num_Folio_clean,Tire_de_clean,Porte_a_clean
0,6,Non bâti,11,,1.0,1.0,,,FRAD094_3P_000255_01_0015,B,...,156,Non,Oui,,,,MAT_1813,11,EMPTY,156
1,7,Non bâti,11,,1.0,2.0,,,FRAD094_3P_000255_01_0015,B,...,156,Non,Oui,,,,MAT_1813,11,EMPTY,156
2,8,Non bâti,11,,1.0,3.0,,,FRAD094_3P_000255_01_0015,B,...,192bis,Oui,Oui,,,,MAT_1813,11,EMPTY,192bis
3,9,Non bâti,23bis,,1.0,1.0,,,FRAD094_3P_000255_01_0032,B,...,280bis,Oui,Oui,,,,MAT_1813,23bis,constructionnouvellesv,280bis
4,10,Non bâti,34ter,,1.0,1.0,,,FRAD094_3P_000255_01_0044,B,...,,Non,Oui,,,,MAT_1813,34ter,46ter,EMPTY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
422,209,Non Bâti,1219,,1.0,1.0,,,FRAD094_3P_000264_01_0219,D,...,,Non,Non,,,,MAT_1848,1219,EMPTY,EMPTY
423,210,Non Bâti,1219,,1.0,2.0,,,FRAD094_3P_000264_01_0219,D,...,,Non,Non,,,,MAT_1848,1219,EMPTY,EMPTY
424,211,Non Bâti,1220,,1.0,1.0,,,FRAD094_3P_000264_01_0220,D,...,,Non,Non,,,,MAT_1848,1220,EMPTY,EMPTY
425,212,Non Bâti,1220,,1.0,2.0,,,FRAD094_3P_000264_01_0220,D,...,,Non,Non,,,,MAT_1848,1220,EMPTY,EMPTY


In [673]:
#Create a new dataframe from sources with columns "Num_Folio" and "Image" containing only distinct rows
folios_pages = folios[["Num_Folio_clean","Alt_Num_CF","Image","registre"]].drop_duplicates(subset=["Num_Folio_clean","Alt_Num_CF","Image","registre"]).reset_index(drop=True)
display(folios_pages)

Unnamed: 0,Num_Folio_clean,Alt_Num_CF,Image,registre
0,11,,FRAD094_3P_000255_01_0015,MAT_1813
1,23bis,,FRAD094_3P_000255_01_0032,MAT_1813
2,34ter,,FRAD094_3P_000255_01_0044,MAT_1813
3,46bis,,FRAD094_3P_000255_01_0057,MAT_1813
4,107bis,,FRAD094_3P_000255_01_0125,MAT_1813
...,...,...,...,...
146,1216,,FRAD094_3P_000264_01_0216,MAT_1848
147,1217,,FRAD094_3P_000264_01_0217,MAT_1848
148,1218,,FRAD094_3P_000264_01_0218,MAT_1848
149,1219,,FRAD094_3P_000264_01_0219,MAT_1848


In [674]:
from rdflib import Graph, Literal, Namespace, RDF, URIRef, BNode
from rdflib.namespace import XSD, DCTERMS
import uuid

# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/source/")
srctypeuri = Namespace("http://data.ign.fr/id/codes/cadastre/sourceType/")
g.bind('source', baseuri)
g.bind('srctype', srctypeuri)
# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
rico = Namespace("https://www.ica.org/standards/RiC/ontology#")
fpo = Namespace("https://github.com/johnBradley501/FPO/raw/master/fpo.owl#")
time = Namespace("http://www.w3.org/2006/time#")

g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

# Iterate over each row in the DataFrame
for index, row in folios_pages.iterrows():
    json = parse_record_id(row['Image'])
    MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]

    subject_uri = URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(row['Num_Folio_clean'])}")
    g.add((subject_uri, RDF.type, rico.RecordPart))
    g.add((subject_uri, cad.hasSourceType, URIRef(srctypeuri.Folio)))
    g.add((subject_uri, cad.hasNumFolio, Literal(row["Num_Folio_clean"],datatype=XSD.string)))
    g.add((subject_uri, rico.isOrWasIncludedIn,URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_page")))

    if not pd.isna(row['Alt_Num_CF']):
        g.add((subject_uri, cad.hasDeprecatedNumFolio, Literal(row["Alt_Num_CF"],datatype=XSD.string)))

print(g.serialize(format='turtle'))

@prefix cad: <http://data.ign.fr/def/cadastre#> .
@prefix rico: <https://www.ica.org/standards/RiC/ontology#> .
@prefix source: <http://data.ign.fr/id/source/> .
@prefix srctype: <http://data.ign.fr/id/codes/cadastre/sourceType/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

source:94_Gentilly_MAT_B_NB_1813_107bis a rico:RecordPart ;
    cad:hasNumFolio "107bis"^^xsd:string ;
    cad:hasSourceType srctype:Folio ;
    rico:isOrWasIncludedIn source:94_Gentilly_FRAD094_3P_000255_01_0125_page .

source:94_Gentilly_MAT_B_NB_1813_11 a rico:RecordPart ;
    cad:hasNumFolio "11"^^xsd:string ;
    cad:hasSourceType srctype:Folio ;
    rico:isOrWasIncludedIn source:94_Gentilly_FRAD094_3P_000255_01_0015_page .

source:94_Gentilly_MAT_B_NB_1813_122bis a rico:RecordPart ;
    cad:hasNumFolio "122bis"^^xsd:string ;
    cad:hasSourceType srctype:Folio ;
    rico:isOrWasIncludedIn source:94_Gentilly_FRAD094_3P_000255_01_0146_page .

source:94_Gentilly_MAT_B_NB_1813_124bis a rico:RecordPart ;
 

#### Création des folios issus de "Tiré de" et "Porté à" qui ne sont pas dans la colonne 'Num_Folios'

### 1.3 Dates
Une date dans le cadastre correspond généralement une année.

Une cellule contient une date valide si :
* uniquement des chiffres
* 4 chiffres
* valeur située entre l'initialisation et la clôture de la matrice

### 1.4 Adresses
* Création des objets adresses à partir de la colonne *Lieu-dit* (dédoublonnage simple et création des objets)
* URIS : uuid

In [675]:
# Select the column as a new DataFrame
addresses = matrices[['Lieu-dit_clean']].copy()

# Create pre-treated columns
matrices['Lieu-dit_treated'] = matrices['Lieu-dit_clean']
matrices['Lieu-dit_treated'] = matrices['Lieu-dit_treated'].str.lower()
matrices['Lieu-dit_treated'] = matrices['Lieu-dit_treated'].str.replace(r'[^\w\s~]', '', regex=True)
matrices['Lieu-dit_uuid'] = matrices['Lieu-dit_treated'].map(dict(zip(matrices['Lieu-dit_treated'].unique(), [uuid.uuid4() for _ in range(len(matrices['Lieu-dit_treated'].unique()))])))

In [676]:
addresses = matrices[['registre','Lieu-dit_treated']].drop_duplicates().values.tolist()
addresses

[['MAT_1813', nan],
 ['MAT_1813', 'les girantiers'],
 ['MAT_1813', 'rue thiers'],
 ['MAT_1813', 'les girantins'],
 ['MAT_1813', 'rue thiers n10'],
 ['MAT_1813', 'route de paris'],
 ['MAT_1813', 'bre fontainebleau'],
 ['MAT_1813', 'lauteis'],
 ['MAT_1813', 'girantins'],
 ['MAT_1836', 'bd extérieur'],
 ['MAT_1836', 'b ditalie 115'],
 ['MAT_1836', 'boulevard ditalie 11'],
 ['MAT_1836', 'boulevard ditalie 11bis'],
 ['MAT_1836', 'les girantins'],
 ['MAT_1836', 'boulevard ditalie 12'],
 ['MAT_1836', 'boulevard ditalie 13'],
 ['MAT_1836', 'boulevard ditalie 7'],
 ['MAT_1836', 'boulevard ditalie 5'],
 ['MAT_1836', 'boulevard extérieur'],
 ['MAT_1836', 'butte aux cailles'],
 ['MAT_1836', 'boul ditalie'],
 ['MAT_1836', 'les girantins ptie'],
 ['MAT_1836', 'barrière fontainebleau'],
 ['MAT_1836', 'barrière fontainebleau n10'],
 ['MAT_1836', 'barrière fontainebleau 10'],
 ['MAT_1836', 'rue tiers'],
 ['MAT_1836', 'rue tiers 1'],
 ['MAT_1836', 'rue thiers 3'],
 ['MAT_1836', 'rue tiers 5'],
 ['MAT_18

In [677]:
# Create a list with all unique values of this new column
unique_addresses = matrices[['registre','Lieu-dit_uuid', 'Lieu-dit_treated']].drop_duplicates().values.tolist()
unique_addresses

[['MAT_1813', UUID('ac670867-e0ef-4e80-bc04-667c329ffb05'), nan],
 ['MAT_1813', UUID('a25e0e7d-80b9-4dab-8688-e0023f046458'), 'les girantiers'],
 ['MAT_1813', UUID('605e924b-10ce-4397-bd7c-8507546f7ffe'), 'rue thiers'],
 ['MAT_1813', UUID('dd7936ed-46b1-438f-a1eb-6aecc75b3ec3'), 'les girantins'],
 ['MAT_1813', UUID('a8466c07-2ffc-4a26-96f3-2659dbbf4bae'), 'rue thiers n10'],
 ['MAT_1813', UUID('6228c5f1-b514-4580-bffb-93f566872dfc'), 'route de paris'],
 ['MAT_1813',
  UUID('9560d00a-7b72-4bdd-8904-c8a9f2050ed8'),
  'bre fontainebleau'],
 ['MAT_1813', UUID('f8d85437-e688-4a2f-91da-76d49ee20173'), 'lauteis'],
 ['MAT_1813', UUID('6fe9c3e9-82c4-4156-8c25-5d4a7f97d020'), 'girantins'],
 ['MAT_1836', UUID('77c2c6bf-a11c-4b92-948b-b702f4d0cf03'), 'bd extérieur'],
 ['MAT_1836', UUID('e816ef8c-6a40-45ec-abdd-203dd67d0da4'), 'b ditalie 115'],
 ['MAT_1836',
  UUID('8690ee0c-ec50-46a1-8f2c-db15862687c0'),
  'boulevard ditalie 11'],
 ['MAT_1836',
  UUID('caeaf096-d327-4c93-8ba0-dfcf7148bfcd'),
  'bou

A ce stade, il faudrait classer les landmarks en fonction de leur type (lieu-dit, rue, etc.) et les ajouter à la base de données. Pour cela, il faudrait créer une nouvelle colonne dans le dataframe contenant les landmarks, avant de les ajouter au graph.


In [682]:
# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/landmark/")
srcuri = Namespace("http://data.ign.fr/id/source/")
cad_ltype = Namespace("http://data.ign.fr/def/cadastre/landmarkType/")
g.bind('landmark', baseuri)
g.bind('source', srcuri)
g.bind('cad_ltype', cad_ltype)

# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
atype = Namespace("http://rdf.geohistoricaldata.org/def/address/attributeType/")
ltype = Namespace("http://rdf.geohistoricaldata.org/def/address/landmarkType/")
lrtype = Namespace("http://rdf.geohistoricaldata.org/def/address/landmarkRelationType/")
rico = Namespace("https://www.ica.org/standards/RiC/ontology#")
fpo = Namespace("https://github.com/johnBradley501/FPO/raw/master/fpo.owl#")
time = Namespace("http://www.w3.org/2006/time#")
g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)
g.bind("ltype", ltype)
g.bind("lrtype", lrtype)
g.bind("atype", atype)

for elem in unique_addresses:
    if elem[2] != 'nan':
        subject_uri = URIRef(baseuri + str(elem[1]))
        g.add((subject_uri, RDF.type, add.Landmark))
        g.add((subject_uri, add.isLandmarkType, ltype.Undefined))
        g.add((subject_uri, RDFS.label, Literal(elem[2], datatype=XSD.string)))

        lr = BNode()
        g.add((URIRef(lr.n3()), RDF.type, add.LandmarkRelation))
        g.add((URIRef(lr.n3()), add.isLandmarkRelationType, lrtype.Undefined))
        g.add((URIRef(lr.n3()), add.locatum, subject_uri))
        g.add((URIRef(lr.n3()), add.relatum, URIRef('http://data.ign.fr/id/landmark/bea20dea-cce0-4f08-8b5a-517423483e54')))

print(g.serialize(format='turtle'))

@prefix add: <http://rdf.geohistoricaldata.org/def/address#> .
@prefix landmark: <http://data.ign.fr/id/landmark/> .
@prefix lrtype: <http://rdf.geohistoricaldata.org/def/address/landmarkRelationType/> .
@prefix ltype: <http://rdf.geohistoricaldata.org/def/address/landmarkType/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<_:N00389e266829458fbd3a41d941a98f95> a add:LandmarkRelation ;
    add:isLandmarkRelationType lrtype:Undefined ;
    add:locatum landmark:f44635f9-1cfe-49b7-b8f1-fa1dfa673705 ;
    add:relatum landmark:bea20dea-cce0-4f08-8b5a-517423483e54 .

<_:N006e5d189aa34c8ba3c1fccb10f4ed9e> a add:LandmarkRelation ;
    add:isLandmarkRelationType lrtype:Undefined ;
    add:locatum landmark:b3f9f684-b3a0-430d-8bf3-e489ed9b1ce4 ;
    add:relatum landmark:bea20dea-cce0-4f08-8b5a-517423483e54 .

<_:N01056955b8284ebc85794d2be3a3fee6> a add:LandmarkRelation ;
    add:isLandmarkRelationType lrtype:Undefined ;
    add:locat

### 1.5 Propriétaires

### 1.6 Nature
* Associer chaque transcription avec un ou plusieurs types de skos = fait à la main pour l'instant

### 1.X Création des états de parcelles

In [None]:
# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/landmark/")
srcuri = Namespace("http://data.ign.fr/id/source/")
owneruri = Namespace("http://data.ign.fr/id/owner/")
foliouri = Namespace("http://data.ign.fr/id/folio/")
cad_ltype = Namespace("http://data.ign.fr/def/cadastre/landmarkType/")
g.bind('landmark', baseuri)
g.bind('owner', owneruri)
g.bind('source', srcuri)
g.bind('cad_ltype', cad_ltype)
g.bind('folio', foliouri)

# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
rico = Namespace("https://www.ica.org/standards/RiC/ontology#")
fpo = Namespace("https://github.com/johnBradley501/FPO/raw/master/fpo.owl#")
time = Namespace("http://www.w3.org/2006/time#")

g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

# Iterate over each row in the DataFrame
for index, row in matrices.iterrows():

    json = parse_record_id(row["Image"])
    lineuuid_ = MATRICE_ID + '_' + str(row['ID']) #str(uuid.uuid4())

    subject_uri = URIRef(baseuri + f"{json['departement']}_{COMMUNE}_{PLAN}_{row['Section_clean']}_{row['Parcelle_clean']}_{lineuuid_}")
    g.add((subject_uri, RDF.type, add.Landmark))
    g.add((subject_uri, add.isLandmarkType, cad_ltype.Plot))
    g.add((subject_uri, DCTERMS.identifier, Literal(row['Section_clean'] + '-' + row['Parcelle_clean'], datatype=XSD.string)))
    #Folios
    g.add((subject_uri, cad.hasNumFolio, URIRef(foliouri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(row['Num_Folio_clean'])}")))
    tire_de = row['Tire_de_clean'] = row['Tire_de_clean'].split(';')
    for f in tire_de:
        if any(char.isdigit() for char in str(f)):
            g.add((subject_uri, cad.takenFrom, URIRef(foliouri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(f)}")))
        elif f != 'nan':
            g.add((subject_uri, cad.takenFrom, Literal(str(f))))
    porte_a = row['Porte_a_clean'] = row['Porte_a_clean'].split(';')
    for f in porte_a:
        if any(char.isdigit() for char in str(f)):
            g.add((subject_uri, cad.passedTo, URIRef(foliouri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(f)}")))
        elif f != 'nan':
            g.add((subject_uri, cad.passedTo, Literal(str(f))))
    #Source
    rowSource = BNode()
    g.add((subject_uri, fpo.sourcedFrom, rowSource))
    g.add((rowSource, RDF.type, fpo.SourceCitation))
    g.add((rowSource, fpo.fromSource, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}")))
    g.add((rowSource, rico.isComponentOfTransitive, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}")))
    g.add((rowSource, cad.hasExtractionID, Literal(lineuuid_)))
    g.add((rowSource, PROV.wasGeneratedBy, URIRef(f"http://data.ign.fr/id/codes/cadastre/activity/0002")))
    g.add((URIRef(f"http://data.ign.fr/id/codes/cadastre/activity/0002"), PROV.used, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}")))
    g.add((rowSource, rico.isOrWasDigitalInstanciation,URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_{lineuuid_}_area")))
    g.add((rowSource, cad.lineOrderInArea, Literal(row['Ordre de lecture'], datatype=XSD.integer)))

    #Create recordpart
    recordparturi = URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_{lineuuid_}_area")
    g.add((recordparturi, RDF.type, rico.RecordPart))
    g.add((recordparturi, rico.isOrWasIncludedIn, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_page")))
    
    #Address
    if row['Lieu-dit_treated'] != 'nan':
        g.add((subject_uri, add.targets, subject_uri))
        firstStep = BNode()
        g.add((subject_uri, add.firstStep, firstStep))
        g.add((firstStep, RDF.type, add.AddressSegment))
        g.add((firstStep, add.isLandmarkRelationType, lrtype.Undefined))
        g.add((firstStep, add.locatum, subject_uri))
        g.add((firstStep, add.relatum, URIRef(baseuri + str(row['Lieu-dit_uuid']))))

    #Owner

    #Nature
    
    #Time
    if not pd.isnull(row['Date entrée']) or not pd.isnull(['Date sortie']):
        hastime = BNode()
        g.add((subject_uri, add.hasTime, hastime))
        g.add((hastime, RDF.type, add.TimeInterval))
        

        if not pd.isna(row['Date entrée']) and not pd.isnull(row['Date entrée']):
            hasbeginning = BNode()
            g.add((hastime, add.hasBeginning, hasbeginning))
            g.add((hasbeginning,RDF.type, add.TimeInstant))
            g.add((hasbeginning, add.timeCalendar, time.Gregorian))
            g.add((hasbeginning, add.timePrecision, time.Year))
            g.add((hasbeginning, add.timeStamp, Literal(row['Date entrée'], datatype=XSD.date)))
        #else:
            #g.add((hastime, add.hasBeginning, Literal(MATRICE_START, datatype=XSD.date)))#date d'ouverture de la matrice
        if not pd.isnull(row['Date sortie']) and row['Date sortie'] != 'nan':
            hasend = BNode()
            g.add((hastime, add.hasBeginning, hasend))
            g.add((hasend,RDF.type, add.TimeInstant))
            g.add((hasend, add.timeCalendar, time.Gregorian))
            g.add((hasend, add.timePrecision, time.Year))
            g.add((hasend, add.timeStamp, Literal(row['Date entrée'], datatype=XSD.date)))
print(g.serialize(format='turtle'))

NameError: name 'PLAN' is not defined