# Création des resources RDF des mentions de parcelles

In [118]:
import json 
import pandas as pd
import numpy as np
import uuid
import re
from rdflib import Graph, Literal, Namespace, RDF, URIRef, BNode
from rdflib.namespace import XSD, DCTERMS, PROV, SKOS, RDFS
from functions import *
from namespaces import *

## 1. Lecture des données

### Articles de classement

In [119]:
COMMUNE = 'Gentilly'
matrices_metada = {
    "MAT_1813": {
        "PLAN": "1811",
        "MATRICE_ID": "MAT_B_NB_1813",
        "MATRICE_START": "1813",
        "MATRICE_END": "1835"
    },
    "MAT_1836": {
        "PLAN": "1811",
        "MATRICE_ID": "MAT_NB_1836",
        "MATRICE_START": "1836",
        "MATRICE_END": "1847"
    },
    "MAT_1848": {
        "PLAN": "1845",
        "MATRICE_ID": "MAT_NB_1848",
        "MATRICE_START": "1848",
        "MATRICE_END": "1860"
    }
}


In [120]:
ROOT = "/workspaces/ontologie-peuplement/"  #/home/STual/KG-cadastre/

PATH = ROOT + "data/gentilly/MAT_1813.csv"
mat1813 = pd.read_csv(PATH,header=0)
PATH = ROOT + "data/gentilly/MAT_1836.csv"
mat1836 = pd.read_csv(PATH,header=0)
PATH = ROOT + "data/gentilly/MAT_1848.csv"
mat1848 = pd.read_csv(PATH,header=0)

OUTPUT_FOLDER_PATH = ROOT + "data/rdf"

In [121]:
mat1813['registre'] = 'MAT_1813'
mat1836['registre'] = 'MAT_1836'
mat1848['registre'] = 'MAT_1848'

matrices = pd.concat([mat1813, mat1836, mat1848])
matrices = matrices.reset_index(drop=True)
print(matrices.columns)

Index(['ID', 'UUID', 'Type_CF', 'Num_Folio', 'Alt_Num_CF', 'Groupe_CF',
       'Ordre_de_lecture', 'Voie', 'Num_Voie', 'Image', 'Section_clean',
       'Parcelle_clean', 'Lieu-dit_transcript', 'Lieu-dit_clean',
       'Lieu-dit_treated', 'Lieu-dit_type', 'Propriétaires_transcript',
       'Nature_transcript', 'Nature_clean', 'Nature_treated', 'Date entrée',
       'Date entrée_treated', 'Date sortie', 'Date sortie_treated', 'Tiré de',
       'Tiré de_treated', 'Porté à', 'Porté à_treated', 'Ligne barrée ?',
       'CF rayé ?', 'Spécification', 'Commentaire', 'Cote liée', 'registre'],
      dtype='object')


In [122]:
len(matrices)

442

### Propriétaires

In [123]:
#open three json files
with open(ROOT + "data/gentilly/output_structured_owners.json") as f:
    data_owners = json.load(f)

#read as df
owners_df = pd.DataFrame(data_owners)

In [124]:
oregistre = []
otype_folio = []
ofolio = []
o_groupe_cf = []
o_transcription = []

for row in owners_df.iterrows():
    cell_info = row[1]['cell']
    oregistre.append(cell_info['registre'])
    otype_folio.append(cell_info['type_folio'])
    ofolio.append(cell_info['folio'])
    o_groupe_cf.append(cell_info['groupe_cf'])
    o_transcription.append(cell_info['transcription'])

owners_df['registre'] = oregistre
owners_df['type_folio'] = otype_folio
owners_df['folio'] = ofolio
owners_df['groupe_cf'] = o_groupe_cf
owners_df['transcription'] = o_transcription


In [125]:
owners_df

Unnamed: 0,cell,owners,changes,registre,type_folio,folio,groupe_cf,transcription
0,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Legendre',...",[],MAT_1813,Bâti,108,1,Legendre H↑re↓ de →Fontainebleau
1,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Louves', '...",[],MAT_1813,Bâti,114,1,Louves
2,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Hardon', '...",[],MAT_1813,Bâti,82,1,"Hardon, Bourgeois ~~nourisseur~~→à Paris"
3,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Faipot', '...",[],MAT_1813,Bâti,64,1,Faipot François→m↑d↓ de vin b↑re↓ de fontaineb...
4,"{'registre': 'MAT_1813', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Bacot', 'o...",[],MAT_1813,Non Bâti,11,1,Bacot→(david) couverturier→à Paris
...,...,...,...,...,...,...,...,...
185,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Delon', 'i...","[{'change-order': 1, 'owner-before': 1, 'owner...",MAT_1848,Non Bâti,1218,1,~~Delon~~ 1858 Sevestre Clément boulanger Bar↑...
186,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Delon', 'i...",[],MAT_1848,Non Bâti,1219,1,Delon
187,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Dufresne',...",[],MAT_1848,Non Bâti,1220,1,Dufresne
188,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Nicolle', ...",[],MAT_1848,Non Bâti,1221,1,Nicolle J↑n↓ f↑ois↓ Paul


### 1.1. Création des pages
- rdf:type rico:Instanciation : instance numérisée d'une page de registre
- rdf:type rico:Record => concept de la page de registre, fait le lien avec le registre (concept, RecordSet)

In [126]:
#select distinct values in th colum Image
images = matrices[['registre','Image']].drop_duplicates()
images

Unnamed: 0,registre,Image
0,MAT_1813,FRAD094_3P_000255_01_0586
1,MAT_1813,FRAD094_3P_000255_01_0588
2,MAT_1813,FRAD094_3P_000255_01_0579
4,MAT_1813,FRAD094_3P_000255_01_0571
5,MAT_1813,FRAD094_3P_000255_01_0015
...,...,...
425,MAT_1848,FRAD094_3P_000264_01_0218
427,MAT_1848,FRAD094_3P_000264_01_0219
429,MAT_1848,FRAD094_3P_000264_01_0220
432,MAT_1848,FRAD094_3P_000264_01_0221


In [127]:
# Create a new RDF graph
g = Graph()

g.bind('cad', cad)
g.bind('add', add)
g.bind('source', srcuri)
g.bind('mlclasse', mlclasse)
g.bind('activity', cad_act)

g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

for index, row in images.iterrows():
    img = row['Image']
    MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]
    json = parse_record_id(img)
    subject_uri = URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{img}")
    g.add((subject_uri, RDF.type, rico.Instanciation))
    g.add((subject_uri, rico.identifier, Literal(img)))
    mlClasseNode = BNode()
    g.add((subject_uri, cad.hasClasse, mlClasseNode))
    g.add((mlClasseNode, cad.hasClasseValue, URIRef(mlclasse + f"MATMainTable")))
    g.add((mlClasseNode, PROV.wasGeneratedBy, URIRef(cad_act + f"0001")))
    folder_end = img.rfind('_')
    g.add((subject_uri,rico.isOrWasDigitalInstanciationOf,URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{img}_page")))

    subject_uri_record = URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_page")
    g.add((subject_uri_record, RDF.type, rico.Record))
    g.add((subject_uri_record, rico.isOrWasIncludedIn, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}")))

print(g.serialize(format='turtle'))
#write g into a .ttl file
g.serialize(destination=f"{OUTPUT_FOLDER_PATH}/{COMMUNE}_sources_pages.ttl", format='turtle')

@prefix activity: <http://data.ign.fr/id/codes/cadastre/activity/> .
@prefix cad: <http://data.ign.fr/def/cadastre#> .
@prefix mlclasse: <http://data.ign.fr/id/codes/cadastre/mlClasse/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rico: <https://www.ica.org/standards/RiC/ontology#> .
@prefix source: <http://data.ign.fr/id/source/> .

source:94_Gentilly_FRAD094_3P_000255_01_0015 a rico:Instanciation ;
    cad:hasClasse [ cad:hasClasseValue mlclasse:MATMainTable ;
            prov:wasGeneratedBy activity:0001 ] ;
    rico:identifier "FRAD094_3P_000255_01_0015" ;
    rico:isOrWasDigitalInstanciationOf source:94_Gentilly_FRAD094_3P_000255_01_0015_page .

source:94_Gentilly_FRAD094_3P_000255_01_0032 a rico:Instanciation ;
    cad:hasClasse [ cad:hasClasseValue mlclasse:MATMainTable ;
            prov:wasGeneratedBy activity:0001 ] ;
    rico:identifier "FRAD094_3P_000255_01_0032" ;
    rico:isOrWasDigitalInstanciationOf source:94_Gentilly_FRAD094_3P_000255_01_0032_page .

source:9

<Graph identifier=Nb27200f193f8491cb88e7703be053107 (<class 'rdflib.graph.Graph'>)>

### 1.2 Folios
- Pré-traitement des colonnes *Num_Folio*, *Tiré de* et *Porté à*
- Création des objets "Folios" à partir de la colonne *Num_Folio* et des colonnes *Tiré de* et *Porté à* (manquants)
- Création des objets spéciaux mentionnés dans les colonnes destinées aux folios (reste, construction nouvelle, ruine etc)

#### Pré-traitement

In [128]:
from functions import parse_record_id, cleanNumFolio

#Clean columns Num_Folio, Tire_de, Porte_a
clean_folio, clean_tire_de, clean_porte_a = [], [], []
symbols = [",", "→", "."," ",";","&"]

for index, row in matrices.iterrows():
    clean_folio.append(cleanNumFolio(row["Num_Folio"],symbols))
    clean_tire_de.append(cleanNumFolio(row["Tiré de_treated"],symbols))
    clean_porte_a.append(cleanNumFolio(row["Porté à_treated"],symbols))

# Create new columns containing the cleaned values
matrices['Num_Folio_clean'] = clean_folio
matrices['Tire_de_clean'] = clean_tire_de
matrices['Porte_a_clean'] = clean_porte_a

matrices['Num_Folio_clean'] = matrices['Num_Folio_clean'].astype(str)

['236↑4↓', '361', '258', '166', '235↑2↓', '138', '357', '440']
['248', 'additionconstructionsv']
['249', '249']
['288', '', '433']
['443', '443']
['450', '443', '453', '968']
['449', '968']
['968', '442']
['442', '449', '439', '450', '']
['836', '403']
['837', '403']
['403', '513↑25↓']
['837', 'additionconstructionsv']
['844', 'additionconstructionsv']
['844', 'additionconstructionsv']
['846↑2↓', 'additionconstructionsv']
['847↑2↓', 'additionconstructionsv']
['847↑2↓', 'additionconstructionsv']


In [129]:
print(clean_porte_a)

['EMPTY', '82', 'EMPTY', 'EMPTY', 'EMPTY', '156', '156', '192bis', '280bis', 'EMPTY', 'EMPTY', '34ter', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '124bis', 'EMPTY', 'EMPTY', '192bis', '247ter', 'voiepubliquesv', 'EMPTY', 'EMPTY', '46bis', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '211bis', '107↑2↓', 'EMPTY', 'doubleemploisv', 'EMPTY', 'EMPTY', '269↑2↓', '138', '236ter', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '361', 'EMPTY', 'EMPTY', 'EMPTY', '236↑4↓;361;258;166;235↑2↓;138;357;440', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', '249', 'EMPTY', 'EMPTY', '248', 'EMPTY', '249;249', 'EMPTY', 'EMPTY', '249↑16↓', '249↑16↓', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'demolitionsv', 'EMPTY', 'EMPTY', 'demolitionsv', '288;433', 'EMPTY', 'EMPTY', 'EMPTY', 'augmentationsv', 'EMPTY', 'EMPTY', 'EMPTY

In [130]:
#Using matrices, create new df named folios containing all lines of matrices where register = MAT_1836 and MAT_1848. For register=MAT_1813, remove the lines where type_CF = "Bâti"
folios = matrices[(matrices['registre'] == 'MAT_1836') | (matrices['registre'] == 'MAT_1848') | ((matrices['registre'] == 'MAT_1813') & (matrices['Type_CF'] != 'Bâti'))]
folios.reset_index(drop=True)

Unnamed: 0,ID,UUID,Type_CF,Num_Folio,Alt_Num_CF,Groupe_CF,Ordre_de_lecture,Voie,Num_Voie,Image,...,Porté à_treated,Ligne barrée ?,CF rayé ?,Spécification,Commentaire,Cote liée,registre,Num_Folio_clean,Tire_de_clean,Porte_a_clean
0,6,877701e8-ab0f-4dc6-90d9-731849696678,Non Bâti,11,,1,1,,,FRAD094_3P_000255_01_0015,...,156,Non,Oui,,,,MAT_1813,11,EMPTY,156
1,7,018a368c-1a98-4cff-b374-91bffb682937,Non Bâti,11,,1,2,,,FRAD094_3P_000255_01_0015,...,156,Non,Oui,,,,MAT_1813,11,EMPTY,156
2,8,1abd1a94-ad72-4d45-b007-f87f6faf8577,Non Bâti,11,,1,3,,,FRAD094_3P_000255_01_0015,...,192bis,Oui,Oui,,,,MAT_1813,11,EMPTY,192bis
3,9,4bf1bd10-fea2-4945-b20d-f3440c571df1,Non Bâti,23bis,,1,1,,,FRAD094_3P_000255_01_0032,...,280bis,Oui,Oui,,,,MAT_1813,23bis,constructionnouvellesv,280bis
4,10,4b6e66ca-1318-4822-a729-7bd2a3607a44,Non Bâti,34ter,,1,1,,,FRAD094_3P_000255_01_0044,...,,Non,Oui,,,,MAT_1813,34ter,46ter,EMPTY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,219,e0ac6603-2873-4ce4-8eff-855207af9abe,Non Bâti,1221,,1,6,,,FRAD094_3P_000264_01_0221,...,,Non,Non,,,,MAT_1848,1221,EMPTY,EMPTY
433,220,8bb35f06-61a6-4ef0-8e33-613cab5c70a1,Non Bâti,1221,,1,7,,,FRAD094_3P_000264_01_0221,...,,Non,Non,,,,MAT_1848,1221,EMPTY,EMPTY
434,221,3779a0f9-d748-4725-bab6-27ae5c56bad1,Non Bâti,1222,,1,1,,,FRAD094_3P_000264_01_0222,...,,Non,Non,,,,MAT_1848,1222,EMPTY,EMPTY
435,222,86f61693-f811-4f84-8f35-0c64b3b523ce,Non Bâti,1222,,1,2,,,FRAD094_3P_000264_01_0222,...,,Non,Non,,,,MAT_1848,1222,EMPTY,EMPTY


In [131]:
#Create a new dataframe from sources with columns "Num_Folio" and "Image" containing only distinct rows
folios_pages = folios[["Num_Folio","Num_Folio_clean","Alt_Num_CF","Image","registre"]].drop_duplicates(subset=["Num_Folio","Num_Folio_clean","Alt_Num_CF","Image","registre"]).reset_index(drop=True)
display(folios_pages)

Unnamed: 0,Num_Folio,Num_Folio_clean,Alt_Num_CF,Image,registre
0,11,11,,FRAD094_3P_000255_01_0015,MAT_1813
1,23bis,23bis,,FRAD094_3P_000255_01_0032,MAT_1813
2,34ter,34ter,,FRAD094_3P_000255_01_0044,MAT_1813
3,46bis,46bis,,FRAD094_3P_000255_01_0057,MAT_1813
4,107bis,107bis,,FRAD094_3P_000255_01_0125,MAT_1813
...,...,...,...,...,...
148,1218,1218,,FRAD094_3P_000264_01_0218,MAT_1848
149,1219,1219,,FRAD094_3P_000264_01_0219,MAT_1848
150,1220,1220,,FRAD094_3P_000264_01_0220,MAT_1848
151,1221,1221,,FRAD094_3P_000264_01_0221,MAT_1848


In [132]:
# Create a new RDF graph
g = Graph()

g.bind('source', srcuri)
g.bind('srctype', srctype)
g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

# Iterate over each row in the DataFrame
for index, row in folios_pages.iterrows():
    json = parse_record_id(row['Image'])
    MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]

    subject_uri = URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(row['Num_Folio'])}")
    g.add((subject_uri, RDF.type, rico.RecordPart))
    g.add((subject_uri, cad.isSourceType, URIRef(srctype.Folio)))
    g.add((subject_uri, cad.hasNumFolio, Literal(row["Num_Folio"],datatype=XSD.string)))
    g.add((subject_uri, rico.isOrWasConstituentOf,URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_page")))

    if not pd.isna(row['Alt_Num_CF']):
        g.add((subject_uri, cad.hasAlternativeNumFolio, Literal(int(row["Alt_Num_CF"]),datatype=XSD.string)))

print(g.serialize(format='turtle'))

@prefix cad: <http://data.ign.fr/def/cadastre#> .
@prefix rico: <https://www.ica.org/standards/RiC/ontology#> .
@prefix source: <http://data.ign.fr/id/source/> .
@prefix srctype: <http://data.ign.fr/id/codes/cadastre/sourceType/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

source:94_Gentilly_MAT_B_NB_1813_107bis a rico:RecordPart ;
    cad:hasNumFolio "107bis"^^xsd:string ;
    cad:isSourceType srctype:Folio ;
    rico:isOrWasConstituentOf source:94_Gentilly_FRAD094_3P_000255_01_0125_page .

source:94_Gentilly_MAT_B_NB_1813_11 a rico:RecordPart ;
    cad:hasNumFolio "11"^^xsd:string ;
    cad:isSourceType srctype:Folio ;
    rico:isOrWasConstituentOf source:94_Gentilly_FRAD094_3P_000255_01_0015_page .

source:94_Gentilly_MAT_B_NB_1813_122bis a rico:RecordPart ;
    cad:hasNumFolio "122bis"^^xsd:string ;
    cad:isSourceType srctype:Folio ;
    rico:isOrWasConstituentOf source:94_Gentilly_FRAD094_3P_000255_01_0146_page .

source:94_Gentilly_MAT_B_NB_1813_124bis a rico:RecordPa

#### Création des folios issus de "Tiré de" et "Porté à" qui ne sont pas dans la colonne 'Num_Folios'

In [133]:
# Iterate over each row in the DataFrame
for index, row in folios.iterrows():
    if row['Tire_de_clean'] != 'EMPTY':
        ls = row['Tire_de_clean'].split(";")
        for l in ls:
            if any(num.isdigit() for num in l) and 'omission' not in l:
                MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]

                subject_uri = URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(l)}")
                g.add((subject_uri, RDF.type, rico.RecordPart))
                g.add((subject_uri, cad.isSourceType, URIRef(srctype.Folio)))
                g.add((subject_uri, cad.hasNumFolio, Literal(l,datatype=XSD.string)))

In [134]:
# Iterate over each row in the DataFrame
for index, row in folios.iterrows():
    if row['Porte_a_clean'] != 'EMPTY':
        ls = row['Porte_a_clean'].split(";")
        for l in ls:
            #test if str has digit
            if any(num.isdigit() for num in l) and 'omission' not in l:
                MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]

                subject_uri = URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(l)}")
                g.add((subject_uri, RDF.type, rico.RecordPart))
                g.add((subject_uri, cad.isSourceType, URIRef(srctype.Folio)))
                g.add((subject_uri, cad.hasNumFolio, Literal(l,datatype=XSD.string)))

In [135]:
g.serialize(destination=f"{OUTPUT_FOLDER_PATH}/{COMMUNE}_sources_folios.ttl", format='turtle')

<Graph identifier=Nc90387244d184c59854a937c35388780 (<class 'rdflib.graph.Graph'>)>

### 1.3 Adresses

In [136]:
# Select the column as a new DataFrame
addresses = matrices[['registre','Lieu-dit_treated','Lieu-dit_type']].copy().drop_duplicates().reset_index(drop=True)
addresses

Unnamed: 0,registre,Lieu-dit_treated,Lieu-dit_type
0,MAT_1813,,
1,MAT_1813,Les Girantiers,District
2,MAT_1813,Rue Thiers,Thoroughfare
3,MAT_1813,Les Girantins,District
4,MAT_1813,Rue Thiers;10,Address
...,...,...,...
129,MAT_1848,Barrière d'Italie;8↑B↓,Address
130,MAT_1848,Barrière d'Italie;8↑A↓,Address
131,MAT_1848,Barrière d'Italie;6,Address
132,MAT_1848,Barrière d'Italie;4,Address


In [137]:
multipart_addresses_street = []
multipart_addresses_street_number = []
multipart_addresses_street_type = []
multipart_addresses_street_number_type = []


for index, row in addresses.iterrows():
    tag = str(row["Lieu-dit_treated"])
    if ';' in tag:
        add = tag.split(";")
        add_street_or_district = add[0]
        add_num_or_part = add[1]

        multipart_addresses_street.append(add_street_or_district)
        multipart_addresses_street_number.append(add_num_or_part)

        #test if digit
        if any(num.isdigit() for num in add_num_or_part):
            multipart_addresses_street_type.append('Thoroughfare')
            multipart_addresses_street_number_type.append('StreetNumber')
        else:
            multipart_addresses_street_type.append('District')
            multipart_addresses_street_number_type.append('Undefined')

    else:
        multipart_addresses_street.append('')
        multipart_addresses_street_number.append('')
        multipart_addresses_street_type.append('')
        multipart_addresses_street_number_type.append('')

addresses['part_street_district'] = multipart_addresses_street
addresses['part_street_number'] = multipart_addresses_street_number
addresses['part_street_district_type'] = multipart_addresses_street_type
addresses['part_street_number_type'] = multipart_addresses_street_number_type

#assign a distinct uuid for each group of rows with same values in Lieu-dit_treated and registre
addresses['address_uuid'] = [uuid.uuid4() for _ in range(len(addresses))]

addresses

Unnamed: 0,registre,Lieu-dit_treated,Lieu-dit_type,part_street_district,part_street_number,part_street_district_type,part_street_number_type,address_uuid
0,MAT_1813,,,,,,,8d5138e5-256e-4cca-8fbc-fd280c25fd16
1,MAT_1813,Les Girantiers,District,,,,,cd7b4663-6e3f-4bb5-a4df-19d421bb164e
2,MAT_1813,Rue Thiers,Thoroughfare,,,,,5111c723-79a3-413c-a362-f3e47cf7de76
3,MAT_1813,Les Girantins,District,,,,,65e78467-afda-44e7-927a-ea7be5553274
4,MAT_1813,Rue Thiers;10,Address,Rue Thiers,10,Thoroughfare,StreetNumber,cc6df861-1b42-4669-8584-3ea4b4afa528
...,...,...,...,...,...,...,...,...
129,MAT_1848,Barrière d'Italie;8↑B↓,Address,Barrière d'Italie,8↑B↓,Thoroughfare,StreetNumber,36f8fae6-eaad-47b1-8218-231189c6e2f5
130,MAT_1848,Barrière d'Italie;8↑A↓,Address,Barrière d'Italie,8↑A↓,Thoroughfare,StreetNumber,a507bf09-ba31-43f1-b28a-f4d41f813cc1
131,MAT_1848,Barrière d'Italie;6,Address,Barrière d'Italie,6,Thoroughfare,StreetNumber,a5788afa-d2ec-4626-a27b-f550abf2070e
132,MAT_1848,Barrière d'Italie;4,Address,Barrière d'Italie,4,Thoroughfare,StreetNumber,d6f2ba17-e816-483c-9a0e-5bc3496da4f6


In [138]:
import pandas as pd
import uuid

# Create a new column 'part_street_district_uuid'
addresses['part_street_district_uuid'] = None

# Create a dictionary to store the uuid for each unique part_street_district
uuid_dict = {}

# Iterate over the DataFrame
for i, row in addresses.iterrows():
    if row['part_street_district'] != '':
        # Check if the part_street_district value is equal to one of the Lieu-dit_treated values
        if row['part_street_district'] in addresses['Lieu-dit_treated'].values:
            # If yes, set the part_street_district_uuid to the uuid of the retrieved Lieu-dit_treated
            addresses.loc[i, 'part_street_district_uuid'] = addresses.loc[addresses['Lieu-dit_treated'] == row['part_street_district'], 'address_uuid'].values[0]
        else:
            # If no, check if the part_street_district value has other occurrences in the part_street_district column
            if row['part_street_district'] in addresses['part_street_district'].values:
                # If yes, check if the part_street_district value is already in the uuid_dict
                if row['part_street_district'] in uuid_dict:
                    # If yes, assign the same uuid
                    addresses.loc[i, 'part_street_district_uuid'] = uuid_dict[row['part_street_district']]
                else:
                    # If no, create a new uuid and add it to the uuid_dict
                    new_uuid = uuid.uuid4()
                    uuid_dict[row['part_street_district']] = new_uuid
                    addresses.loc[i, 'part_street_district_uuid'] = new_uuid
            else:
                # If no, create a new uuid
                addresses.loc[i, 'part_street_district_uuid'] = uuid.uuid4()

In [139]:
addresses

Unnamed: 0,registre,Lieu-dit_treated,Lieu-dit_type,part_street_district,part_street_number,part_street_district_type,part_street_number_type,address_uuid,part_street_district_uuid
0,MAT_1813,,,,,,,8d5138e5-256e-4cca-8fbc-fd280c25fd16,
1,MAT_1813,Les Girantiers,District,,,,,cd7b4663-6e3f-4bb5-a4df-19d421bb164e,
2,MAT_1813,Rue Thiers,Thoroughfare,,,,,5111c723-79a3-413c-a362-f3e47cf7de76,
3,MAT_1813,Les Girantins,District,,,,,65e78467-afda-44e7-927a-ea7be5553274,
4,MAT_1813,Rue Thiers;10,Address,Rue Thiers,10,Thoroughfare,StreetNumber,cc6df861-1b42-4669-8584-3ea4b4afa528,5111c723-79a3-413c-a362-f3e47cf7de76
...,...,...,...,...,...,...,...,...,...
129,MAT_1848,Barrière d'Italie;8↑B↓,Address,Barrière d'Italie,8↑B↓,Thoroughfare,StreetNumber,36f8fae6-eaad-47b1-8218-231189c6e2f5,33e02922-b2c4-4325-8ddc-5d1686c54845
130,MAT_1848,Barrière d'Italie;8↑A↓,Address,Barrière d'Italie,8↑A↓,Thoroughfare,StreetNumber,a507bf09-ba31-43f1-b28a-f4d41f813cc1,33e02922-b2c4-4325-8ddc-5d1686c54845
131,MAT_1848,Barrière d'Italie;6,Address,Barrière d'Italie,6,Thoroughfare,StreetNumber,a5788afa-d2ec-4626-a27b-f550abf2070e,33e02922-b2c4-4325-8ddc-5d1686c54845
132,MAT_1848,Barrière d'Italie;4,Address,Barrière d'Italie,4,Thoroughfare,StreetNumber,d6f2ba17-e816-483c-9a0e-5bc3496da4f6,33e02922-b2c4-4325-8ddc-5d1686c54845


In [140]:
from namespaces import *

# Create a new RDF graph
g = Graph()

landmarkuri = Namespace("http://data.ign.fr/id/landmark/")
g.bind('landmark', landmarkuri)
g.bind('source', srcuri)

g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

g.bind("ltype", ltype)
g.bind("lrtype", lrtype)
g.bind("atype", atype)
g.bind('cad_ltype', cad_ltype)

for index, row in addresses.iterrows():
    add_uri = URIRef(landmarkuri + f"{row['address_uuid']}")
    g.add((add_uri, RDF.type, add.Landmark))
    if pd.notnull(addresses.loc[index, 'Lieu-dit_type']):
        if ';' in row['Lieu-dit_treated']:
            name = row['Lieu-dit_treated'].split(";")

            g.add((add_uri, add.isLandmarkType, URIRef(ltype + row['part_street_number_type'])))

            if any(num.isdigit() for num in name[1]):
                g.add((add_uri, SKOS.prefLabel, Literal(name[1] + ' (' + name[0] + ', ' + COMMUNE + ')', datatype=XSD.string)))
                g.add((add_uri, SKOS.altLabel, Literal(name[1] + ' ' + name[0], datatype=XSD.string)))
                g.add((add_uri,SKOS.hiddenLabel,Literal(name[1],datatype=XSD.string))) #Street number from transcription
                relationode = BNode()
                g.add((URIRef(relationode.n3()), add.isLandmarkRelationType, lrtype.Along))
            else:
                g.add((add_uri, SKOS.prefLabel, Literal(name[0] + ' (' + name[1] + ', ' + COMMUNE + ')', datatype=XSD.string)))
                g.add((add_uri, SKOS.altLabel, Literal(name[0], datatype=XSD.string)))
                relationode = BNode()
                g.add((URIRef(relationode.n3()), add.isLandmarkRelationType, lrtype.Undefined))

            g.add((URIRef(relationode.n3()), RDF.type, add.LandmarkRelation))
            g.add((URIRef(relationode.n3()), add.locatum, add_uri))
            g.add((URIRef(relationode.n3()), add.relatum, URIRef(landmarkuri + str(row['part_street_district_uuid']))))

            #Street or district relation with section
            sectionNode = BNode()
            g.add((URIRef(sectionNode.n3()), RDF.type, add.LandmarkRelation))
            g.add((URIRef(sectionNode.n3()), add.isLandmarkRelationType, lrtype.Within))
            g.add((URIRef(sectionNode.n3()), add.locatum, URIRef(landmarkuri + str(row['part_street_district_uuid']))))
            if row['registre'] != 'MAT_1848':
                g.add((URIRef(sectionNode.n3()), add.relatum, URIRef(landmarkuri + 'da6a5c2c-e86d-43bb-8950-7169bd0df60a'))) #Section D Cadastre 1848
            else:
                g.add((URIRef(sectionNode.n3()), add.relatum, URIRef(landmarkuri + '87d7c2f6-306b-45a1-a833-5e17821c3102'))) #Section B Cadastre 1811

        else:
            g.add((add_uri, add.isLandmarkType, URIRef(ltype + f"{row['Lieu-dit_type']}")))
            g.add((add_uri, SKOS.prefLabel, Literal(row['Lieu-dit_treated'] + ', ' + COMMUNE, datatype=XSD.string)))
            g.add((add_uri, SKOS.altLabel, Literal(row['Lieu-dit_treated'], datatype=XSD.string)))

            relationode = BNode()
            g.add((URIRef(relationode.n3()), add.isLandmarkRelationType, lrtype.Within))
            g.add((URIRef(relationode.n3()), add.locatum, add_uri))

            sectionNode = BNode()
            g.add((URIRef(sectionNode.n3()), RDF.type, add.LandmarkRelation))
            g.add((URIRef(sectionNode.n3()), add.isLandmarkRelationType, lrtype.Within))
            g.add((URIRef(sectionNode.n3()), add.locatum, add_uri))
            if row['registre'] != 'MAT_1848':
                g.add((URIRef(sectionNode.n3()), add.relatum, URIRef(landmarkuri + 'da6a5c2c-e86d-43bb-8950-7169bd0df60a'))) #Section D Cadastre 1848
            else:
                g.add((URIRef(sectionNode.n3()), add.relatum, URIRef(landmarkuri + '87d7c2f6-306b-45a1-a833-5e17821c3102'))) #Section B Cadastre 1811

        g.add((add_uri, cad.sourcedFrom, URIRef(srcuri + f'94_{COMMUNE}_{row["registre"]}')))

print(g.serialize(format='turtle'))
g.serialize(destination=f"{OUTPUT_FOLDER_PATH}/{COMMUNE}_landmarks_lieu_dit.ttl", format='turtle')

@prefix add: <http://rdf.geohistoricaldata.org/def/address#> .
@prefix cad: <http://data.ign.fr/def/cadastre#> .
@prefix landmark: <http://data.ign.fr/id/landmark/> .
@prefix lrtype: <http://rdf.geohistoricaldata.org/id/codes/address/landmarkRelationType/> .
@prefix ltype: <http://rdf.geohistoricaldata.org/id/codes/address/landmarkType/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix source: <http://data.ign.fr/id/source/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<_:N01c2618def3046e3867f5ab2d29a935e> a add:LandmarkRelation ;
    add:isLandmarkRelationType lrtype:Along ;
    add:locatum landmark:e0e319ca-5bf1-4e43-a9d7-f609a8600d44 ;
    add:relatum landmark:33e02922-b2c4-4325-8ddc-5d1686c54845 .

<_:N01fb55758c6448d597b6f1597b247144> a add:LandmarkRelation ;
    add:isLandmarkRelationType lrtype:Within ;
    add:locatum landmark:f92c5b83-2693-434c-a9ef-cd51f8f77713 ;
    add:relatum landmark:da6a5c2c-e86d-43bb-8950-7169bd0df60a .

<_:N02badc6f32d74b16ae24

<Graph identifier=N82493dc9e32640639ab419b66face9bd (<class 'rdflib.graph.Graph'>)>

### 1.5 Propriétaires

In [141]:
from pandasql import sqldf
import pandas as pd

In [142]:
matrices_ = matrices[['registre','Num_Folio','Type_CF','Groupe_CF','Image','UUID','Ordre_de_lecture']].copy()

In [143]:
#add column with rows index
owners_df['row_index'] = owners_df.index
owners_df2 = owners_df[['registre','type_folio','folio','groupe_cf','row_index']]
owners_df2.dtypes

registre      object
type_folio    object
folio         object
groupe_cf     object
row_index      int64
dtype: object

In [144]:
query = '''SELECT M.registre AS m_registre, M.Type_CF AS m_type_folio, M.Num_Folio AS m_num_folio, M.Groupe_CF AS m_groupe_cf, M.Image AS m_image, M.UUID AS m_uuid, m.Ordre_de_lecture AS m_row_num_in_cf, O.row_index AS o_row_index
        FROM matrices_ AS M
        LEFT JOIN owners_df2 AS O
        ON (M.Num_Folio = O.folio AND M.registre = O.registre AND M.Groupe_CF = O.groupe_cf AND M.Type_CF = O.type_folio)
        '''

testDF = sqldf(query)
testDF.to_csv(ROOT + 'test.csv',index=False)
print(sqldf(query))

    m_registre m_type_folio m_num_folio  m_groupe_cf  \
0     MAT_1813         Bâti         108            1   
1     MAT_1813         Bâti         114            1   
2     MAT_1813         Bâti          82            1   
3     MAT_1813         Bâti          82            1   
4     MAT_1813         Bâti          64            1   
..         ...          ...         ...          ...   
440   MAT_1848     Non Bâti        1221            1   
441   MAT_1848     Non Bâti        1221            1   
442   MAT_1848     Non Bâti        1222            1   
443   MAT_1848     Non Bâti        1222            1   
444   MAT_1848     Non Bâti        1222            1   

                       m_image                                m_uuid  \
0    FRAD094_3P_000255_01_0586  b2b478a0-7b8f-4715-84c4-bcd5112e41eb   
1    FRAD094_3P_000255_01_0588  6af95196-31c4-49cc-914a-ed618b0c6646   
2    FRAD094_3P_000255_01_0579  afb83e4f-2351-42ca-af63-14ccd79c5621   
3    FRAD094_3P_000255_01_0579  432f72f

In [145]:
owners_matrices = pd.merge(owners_df, testDF, how='left', left_on=['row_index'], right_on=['o_row_index'])
#add a uuid to each distinct group of line with same values in registre, type_folio, folio and groupe_cf
owners_matrices.to_csv(ROOT + 'owners_matrices.csv',index=False)
owners_matrices

Unnamed: 0,cell,owners,changes,registre,type_folio,folio,groupe_cf,transcription,row_index,m_registre,m_type_folio,m_num_folio,m_groupe_cf,m_image,m_uuid,m_row_num_in_cf,o_row_index
0,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Legendre',...",[],MAT_1813,Bâti,108,1,Legendre H↑re↓ de →Fontainebleau,0,MAT_1813,Bâti,108,1,FRAD094_3P_000255_01_0586,b2b478a0-7b8f-4715-84c4-bcd5112e41eb,1,0
1,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Louves', '...",[],MAT_1813,Bâti,114,1,Louves,1,MAT_1813,Bâti,114,1,FRAD094_3P_000255_01_0588,6af95196-31c4-49cc-914a-ed618b0c6646,1,1
2,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Hardon', '...",[],MAT_1813,Bâti,82,1,"Hardon, Bourgeois ~~nourisseur~~→à Paris",2,MAT_1813,Bâti,82,1,FRAD094_3P_000255_01_0579,afb83e4f-2351-42ca-af63-14ccd79c5621,1,2
3,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Hardon', '...",[],MAT_1813,Bâti,82,1,"Hardon, Bourgeois ~~nourisseur~~→à Paris",2,MAT_1813,Bâti,82,1,FRAD094_3P_000255_01_0579,432f72f1-ba8a-453f-90ec-79b2b89e8592,2,2
4,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Faipot', '...",[],MAT_1813,Bâti,64,1,Faipot François→m↑d↓ de vin b↑re↓ de fontaineb...,3,MAT_1813,Bâti,64,1,FRAD094_3P_000255_01_0571,2d3be415-19f5-4684-996e-5fb3ccc434bf,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
440,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Nicolle', ...",[],MAT_1848,Non Bâti,1221,1,Nicolle J↑n↓ f↑ois↓ Paul,188,MAT_1848,Non Bâti,1221,1,FRAD094_3P_000264_01_0221,e0ac6603-2873-4ce4-8eff-855207af9abe,6,188
441,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Nicolle', ...",[],MAT_1848,Non Bâti,1221,1,Nicolle J↑n↓ f↑ois↓ Paul,188,MAT_1848,Non Bâti,1221,1,FRAD094_3P_000264_01_0221,8bb35f06-61a6-4ef0-8e33-613cab5c70a1,7,188
442,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Lacroix', ...",[],MAT_1848,Non Bâti,1222,1,Lacroix J↑n↓ Louis,189,MAT_1848,Non Bâti,1222,1,FRAD094_3P_000264_01_0222,3779a0f9-d748-4725-bab6-27ae5c56bad1,1,189
443,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Lacroix', ...",[],MAT_1848,Non Bâti,1222,1,Lacroix J↑n↓ Louis,189,MAT_1848,Non Bâti,1222,1,FRAD094_3P_000264_01_0222,86f61693-f811-4f84-8f35-0c64b3b523ce,2,189


In [146]:
#I want to create a dict with o_row_index as key and uuids as values
cf_uuid_dict = {}
sees = []
for index, row in owners_matrices.iterrows():
    if row['o_row_index'] not in sees:
        cf_uuid_dict[row['o_row_index']] = uuid.uuid4()
        sees.append(cf_uuid_dict[row['o_row_index']])
owners_matrices

Unnamed: 0,cell,owners,changes,registre,type_folio,folio,groupe_cf,transcription,row_index,m_registre,m_type_folio,m_num_folio,m_groupe_cf,m_image,m_uuid,m_row_num_in_cf,o_row_index
0,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Legendre',...",[],MAT_1813,Bâti,108,1,Legendre H↑re↓ de →Fontainebleau,0,MAT_1813,Bâti,108,1,FRAD094_3P_000255_01_0586,b2b478a0-7b8f-4715-84c4-bcd5112e41eb,1,0
1,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Louves', '...",[],MAT_1813,Bâti,114,1,Louves,1,MAT_1813,Bâti,114,1,FRAD094_3P_000255_01_0588,6af95196-31c4-49cc-914a-ed618b0c6646,1,1
2,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Hardon', '...",[],MAT_1813,Bâti,82,1,"Hardon, Bourgeois ~~nourisseur~~→à Paris",2,MAT_1813,Bâti,82,1,FRAD094_3P_000255_01_0579,afb83e4f-2351-42ca-af63-14ccd79c5621,1,2
3,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Hardon', '...",[],MAT_1813,Bâti,82,1,"Hardon, Bourgeois ~~nourisseur~~→à Paris",2,MAT_1813,Bâti,82,1,FRAD094_3P_000255_01_0579,432f72f1-ba8a-453f-90ec-79b2b89e8592,2,2
4,"{'registre': 'MAT_1813', 'type_folio': 'Bâti',...","[{'owner-id': 1, 'owner-lastname': 'Faipot', '...",[],MAT_1813,Bâti,64,1,Faipot François→m↑d↓ de vin b↑re↓ de fontaineb...,3,MAT_1813,Bâti,64,1,FRAD094_3P_000255_01_0571,2d3be415-19f5-4684-996e-5fb3ccc434bf,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
440,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Nicolle', ...",[],MAT_1848,Non Bâti,1221,1,Nicolle J↑n↓ f↑ois↓ Paul,188,MAT_1848,Non Bâti,1221,1,FRAD094_3P_000264_01_0221,e0ac6603-2873-4ce4-8eff-855207af9abe,6,188
441,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Nicolle', ...",[],MAT_1848,Non Bâti,1221,1,Nicolle J↑n↓ f↑ois↓ Paul,188,MAT_1848,Non Bâti,1221,1,FRAD094_3P_000264_01_0221,8bb35f06-61a6-4ef0-8e33-613cab5c70a1,7,188
442,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Lacroix', ...",[],MAT_1848,Non Bâti,1222,1,Lacroix J↑n↓ Louis,189,MAT_1848,Non Bâti,1222,1,FRAD094_3P_000264_01_0222,3779a0f9-d748-4725-bab6-27ae5c56bad1,1,189
443,"{'registre': 'MAT_1848', 'type_folio': 'Non Bâ...","[{'owner-id': 1, 'owner-lastname': 'Lacroix', ...",[],MAT_1848,Non Bâti,1222,1,Lacroix J↑n↓ Louis,189,MAT_1848,Non Bâti,1222,1,FRAD094_3P_000264_01_0222,86f61693-f811-4f84-8f35-0c64b3b523ce,2,189


* Créer les comptes fonciers
* Associer à chaque compte foncier ses propriétaires (ordonnés dans le temps)
* Associer à chaque compte foncier le landmark (état) qu'il mentionne

* ```o_row_index``` : id de l'article de mutation (et du compte foncier => 1..1)
donc
    * ArticleDeMutation : uuid_mutation
    * CompteFoncier : uuid
    * Owner : uuid_taxpayer = owner_id

* ```m_uuid``` : id de l'article de classement
donc
    * ArticleDeClassement : uuid

In [147]:
from namespaces import *

# Create a new RDF graph
g = Graph()

g.bind('landmark', landmarkuri)
g.bind('source', srcuri)
g.bind('taxpayer', owneruri)
g.bind('event', eventuri)

g.bind('cad_ltype', cad_ltype)
g.bind('cad_atype', cad_atype)
g.bind('cad_etype', cad_etype)
g.bind('ctype', ctype)
g.bind('srctype', srctype)
g.bind('mlclasse', mlclasse)
g.bind('rico', rico)
g.bind('add', add)
g.bind('cad', cad)

created_cf = []

for index, row in owners_matrices.iterrows():
    #Infos de l'image
    json = parse_record_id(row["m_image"])
    #UUID de l'article de classement (=uuid de la ligne)
    lineuuid_ = str(row['m_uuid'])

    #UUID du compte foncier
    cfuuid_ = str(cf_uuid_dict[row['o_row_index']])
    subject_uri = URIRef(srcuri + f"{cfuuid_}")

    #URI de l'article de classement
    articleclassementuri = URIRef(srcuri + f"{row['m_uuid']}_classement")
    g.add((articleclassementuri, RDF.type, rico.RecordPart))
    g.add((articleclassementuri, cad.isSourceType, URIRef(srctype.ArticleDeClassement)))
    g.add((articleclassementuri, DCTERMS.identifier, Literal(row['m_row_num_in_cf'],datatype=XSD.integer))) #Numéro d'ordre de l'article de classement dans le compte foncier

    #CF contient ArticleDeClassement
    g.add((subject_uri, rico.hasOrHadConstituent, articleclassementuri)) 

    if cfuuid_ not in created_cf:
        created_cf.append(cfuuid_)
        #URI du compte foncier
        g.add((subject_uri, RDF.type, rico.RecordPart))
        g.add((subject_uri, cad.isSourceType, URIRef(srctype.CompteFoncier)))
        g.add((subject_uri, rico.hasOrHadConstituent, URIRef(srcuri + f"{cfuuid_}_mutation"))) #CF contient ArticleDeMutation
        g.add((subject_uri, rico.isOrWasConstituentOf, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(row['m_num_folio'])}"))) #CF est contenu dans Folio
        g.add((subject_uri, DCTERMS.identifier, Literal(row['groupe_cf'],datatype=XSD.integer))) #Numéro d'ordre du compte foncier dans un folio

        #URI de l'article de mutation
        articlemutationuri = URIRef(srcuri + f"{cfuuid_}_mutation")
        g.add((articlemutationuri, RDF.type, rico.RecordPart))
        g.add((articlemutationuri, cad.isSourceType, URIRef(srctype.ArticleDeMutation)))

        ## Taxpayers contenu dans ArticleDeMutation
        ownersattribute = BNode()
        g.add((ownersattribute, RDF.type, add.Attribute))
        g.add((articlemutationuri, cad.hasCadastreAttribute, ownersattribute))
        g.add((ownersattribute, add.isAttributeType, URIRef(cad_atype.PlotTaxpayer)))

        owners_and_versions = {}
        for owner in row['owners']:
            ownersattributeversion = BNode()
            g.add((ownersattributeversion,RDF.type,add.AttributeVersion))
            g.add((ownersattribute, add.hasAttributeVersion, ownersattributeversion))
            owneruriinstance = URIRef(owneruri + f"{cfuuid_}_taxpayer_{owner['owner-id']}")
            g.add((owneruriinstance, RDF.type, cad.Taxpayer))
            g.add((ownersattributeversion, cad.hasTaxpayer, owneruriinstance))
            olabel = ''
            #test if json has a certain key
            if 'owner-lastname' in owner:
                g.add((owneruriinstance, cad.taxpayerLabel, Literal(owner['owner-lastname'],datatype=XSD.string)))
                olabel += owner['owner-lastname']
            if 'owner-firstname' in owner:
                g.add((owneruriinstance, cad.taxpayerFirstName, Literal(owner['owner-firstname'],datatype=XSD.string)))
                olabel += ' ' + owner['owner-firstname']
            if 'owner-status' in owner:
                g.add((owneruriinstance, cad.taxpayerStatus, Literal(owner['owner-status'],datatype=XSD.string)))
                olabel += ' (' + owner['owner-status'] + ')'
            if 'owner-activity' in owner:
                g.add((owneruriinstance, cad.taxpayerActivity, Literal(owner['owner-activity'],datatype=XSD.string)))
            if 'owner-address' in owner:
                g.add((owneruriinstance, cad.taxpayerAddress, Literal(owner['owner-address'],datatype=XSD.string)))
            #Create owner label
            g.add((owneruriinstance, RDFS.label, Literal(olabel,datatype=XSD.string)))
            g.add((owneruriinstance,cad.fromSource,articlemutationuri))
            owners_and_versions[owner['owner-id']] = ownersattributeversion

        if len(row) > 0:
            for change in row['changes']:
                changenode = BNode()
                g.add((URIRef(changenode.n3()), RDF.type, add.Change))
                g.add((URIRef(changenode.n3()), add.appliedTo, ownersattribute))
                g.add((URIRef(changenode.n3()), add.outdates, URIRef(owners_and_versions[change['owner-before']])))
                g.add((URIRef(changenode.n3()), add.makesEffective, URIRef(owners_and_versions[change['owner-after']])))
                g.add((URIRef(changenode.n3()),add.isChangeType,ctype.AttributeVersionTransition))
                event_uuid = uuid.uuid4()
                event_uri = URIRef(eventuri + f"{event_uuid}")
                g.add((URIRef(changenode.n3()), add.dependsOn, event_uri))
                if 'date' in change:
                    g.add((event_uri, RDF.type, add.Event))
                    time_ = BNode()
                    g.add((event_uri, add.hasTime, time_))
                    g.add((event_uri, cad.isEventType, cad_etype.TaxpayerMutation))
                    g.add((time_, RDF.type, add.TimeInstant))
                    g.add((time_, add.timeCalendar, time.Gregorian))
                    g.add((time_, add.Precision, time.Year))
                    g.add((time_, add.timeStamp, Literal(change['date'],datatype=XSD.dateTimeStamp)))
        #Create initial change
        initchangenode = BNode()
        g.add((URIRef(initchangenode.n3()), RDF.type, add.Change))
        g.add((URIRef(initchangenode.n3()), add.appliedTo, ownersattribute))
        g.add((URIRef(initchangenode.n3()), add.makesEffective, URIRef(owners_and_versions[1])))
        g.add((URIRef(initchangenode.n3()), add.isChangeType, ctype.AttributeVersionAppearance))
        event_uuid = uuid.uuid4()
        event_uri = URIRef(eventuri + f"{event_uuid}")
        g.add((URIRef(initchangenode.n3()), add.dependsOn, event_uri))

        #Create final change
        finalchangenode = BNode()
        g.add((URIRef(finalchangenode.n3()), RDF.type, add.Change))
        g.add((URIRef(finalchangenode.n3()), add.appliedTo, ownersattribute))
        g.add((URIRef(finalchangenode.n3()), add.outdates, URIRef(owners_and_versions[len(owners_and_versions)])))
        g.add((URIRef(finalchangenode.n3()), add.isChangeType, ctype.AttributeVersionDisappearance))
        event_uuid = uuid.uuid4()
        event_uri = URIRef(eventuri + f"{event_uuid}")
        g.add((URIRef(finalchangenode.n3()), add.dependsOn, event_uri))

print(g.serialize(format='turtle'))
g.serialize(destination=f"{OUTPUT_FOLDER_PATH}/{COMMUNE}_owners_cf_clas_mut.ttl", format='turtle')

@prefix add: <http://rdf.geohistoricaldata.org/def/address#> .
@prefix cad: <http://data.ign.fr/def/cadastre#> .
@prefix cad_atype: <http://data.ign.fr/id/codes/cadastre/attributeType/> .
@prefix cad_etype: <http://data.ign.fr/id/codes/cadastre/eventType/> .
@prefix ctype: <http://rdf.geohistoricaldata.org/id/codes/address/changeType/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix event: <http://data.ign.fr/id/event/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix rico: <https://www.ica.org/standards/RiC/ontology#> .
@prefix source: <http://data.ign.fr/id/source/> .
@prefix srctype: <http://data.ign.fr/id/codes/cadastre/sourceType/> .
@prefix taxpayer: <http://data.ign.fr/id/taxpayer/> .
@prefix time: <http://www.w3.org/2006/time#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<_:N002e8f397cba4175afcb469328c1cf46> a add:Change ;
    add:appliedTo _:N044997449775451aa10c7eb8743a9638 ;
    add:dependsOn event:901d1a0c-3578-40fd-8608-4498868dacbb ;
  

<Graph identifier=Nf73d953128f24336844c0b2353216f06 (<class 'rdflib.graph.Graph'>)>

### 1.X Création des états de parcelles

In [148]:
matrices

Unnamed: 0,ID,UUID,Type_CF,Num_Folio,Alt_Num_CF,Groupe_CF,Ordre_de_lecture,Voie,Num_Voie,Image,...,Porté à_treated,Ligne barrée ?,CF rayé ?,Spécification,Commentaire,Cote liée,registre,Num_Folio_clean,Tire_de_clean,Porte_a_clean
0,1,b2b478a0-7b8f-4715-84c4-bcd5112e41eb,Bâti,108,,1,1,,,FRAD094_3P_000255_01_0586,...,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,108,EMPTY,EMPTY
1,2,6af95196-31c4-49cc-914a-ed618b0c6646,Bâti,114,,1,1,,,FRAD094_3P_000255_01_0588,...,82,Oui,Oui,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,114,EMPTY,82
2,3,afb83e4f-2351-42ca-af63-14ccd79c5621,Bâti,82,,1,1,,,FRAD094_3P_000255_01_0579,...,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,82,114,EMPTY
3,4,432f72f1-ba8a-453f-90ec-79b2b89e8592,Bâti,82,,1,2,,,FRAD094_3P_000255_01_0579,...,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,82,EMPTY,EMPTY
4,5,2d3be415-19f5-4684-996e-5fb3ccc434bf,Bâti,64,,1,1,,,FRAD094_3P_000255_01_0571,...,,Non,Non,Matrice des propriétés bâties ne sont plus mis...,,,MAT_1813,64,EMPTY,EMPTY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
437,219,e0ac6603-2873-4ce4-8eff-855207af9abe,Non Bâti,1221,,1,6,,,FRAD094_3P_000264_01_0221,...,,Non,Non,,,,MAT_1848,1221,EMPTY,EMPTY
438,220,8bb35f06-61a6-4ef0-8e33-613cab5c70a1,Non Bâti,1221,,1,7,,,FRAD094_3P_000264_01_0221,...,,Non,Non,,,,MAT_1848,1221,EMPTY,EMPTY
439,221,3779a0f9-d748-4725-bab6-27ae5c56bad1,Non Bâti,1222,,1,1,,,FRAD094_3P_000264_01_0222,...,,Non,Non,,,,MAT_1848,1222,EMPTY,EMPTY
440,222,86f61693-f811-4f84-8f35-0c64b3b523ce,Non Bâti,1222,,1,2,,,FRAD094_3P_000264_01_0222,...,,Non,Non,,,,MAT_1848,1222,EMPTY,EMPTY


In [149]:
matrices.columns

Index(['ID', 'UUID', 'Type_CF', 'Num_Folio', 'Alt_Num_CF', 'Groupe_CF',
       'Ordre_de_lecture', 'Voie', 'Num_Voie', 'Image', 'Section_clean',
       'Parcelle_clean', 'Lieu-dit_transcript', 'Lieu-dit_clean',
       'Lieu-dit_treated', 'Lieu-dit_type', 'Propriétaires_transcript',
       'Nature_transcript', 'Nature_clean', 'Nature_treated', 'Date entrée',
       'Date entrée_treated', 'Date sortie', 'Date sortie_treated', 'Tiré de',
       'Tiré de_treated', 'Porté à', 'Porté à_treated', 'Ligne barrée ?',
       'CF rayé ?', 'Spécification', 'Commentaire', 'Cote liée', 'registre',
       'Num_Folio_clean', 'Tire_de_clean', 'Porte_a_clean'],
      dtype='object')

In [150]:
# Create a new RDF graph
g = Graph()

baseuri = Namespace("http://data.ign.fr/id/landmark/")
srcuri = Namespace("http://data.ign.fr/id/source/")
owneruri = Namespace("http://data.ign.fr/id/owner/")

cad_ltype = Namespace("http://data.ign.fr/id/codes/cadastre/landmarkType/")
cad_atype = Namespace("http://data.ign.fr/id/codes/cadastre/attributeType/")
lrtype = Namespace("http://rdf.geohistoricaldata.org/id/codes/address/landmarkRelationType/")

g.bind('landmark', baseuri)
g.bind('owner', owneruri)
g.bind('source', srcuri)
g.bind('cad_ltype', cad_ltype)
g.bind('cad_atype', cad_atype)

# Define the namespaces
cad = Namespace("http://data.ign.fr/def/cadastre#")
add = Namespace("http://rdf.geohistoricaldata.org/def/address#")
rico = Namespace("https://www.ica.org/standards/RiC/ontology#")
fpo = Namespace("https://github.com/johnBradley501/FPO/raw/master/fpo.owl#")
time = Namespace("http://www.w3.org/2006/time#")

g.bind('cad', cad)
g.bind('add', add)
g.bind('rico', rico)
g.bind('fpo', fpo)
g.bind('time',time)

# Iterate over each row in the DataFrame
for index, row in matrices.iterrows():

    json = parse_record_id(row["Image"])
    MATRICE_ID = matrices_metada[row['registre']]["MATRICE_ID"]
    PLAN = matrices_metada[row['registre']]["PLAN"]
    lineuuid_ = MATRICE_ID + '_' + str(row['ID']) #str(uuid.uuid4())

    subject_uri = URIRef(baseuri + f"{row['UUID']}")
    g.add((subject_uri, RDF.type, add.Landmark))
    g.add((subject_uri, add.isLandmarkType, cad_ltype.Plot))
    #g.add((subject_uri, DCTERMS.identifier, Literal(row['Section_clean'] + '-' + row['Parcelle_clean'], datatype=XSD.string)))

    #Folios
    g.add((subject_uri, cad.hasNumFolio, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(row['Num_Folio'])}")))
    tire_de = str(row['Tiré de_treated']).split(',')
    for f in tire_de:
        if any(char.isdigit() for char in str(f)):
            g.add((subject_uri, cad.takenFrom, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(f)}")))
        elif f != 'nan':
            g.add((subject_uri, cad.takenFrom, Literal(str(f))))
    porte_a = str(row['Porté à_treated']).split(',')
    for f in porte_a:
        if any(char.isdigit() for char in str(f)):
            g.add((subject_uri, cad.passedTo, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}_{str(f)}")))
        elif f != 'nan':
            g.add((subject_uri, cad.passedTo, Literal(str(f))))

    #Source
    rowSource = BNode()
    g.add((subject_uri, fpo.sourcedFrom, rowSource))
    g.add((rowSource, RDF.type, fpo.SourceCitation))
    g.add((rowSource, fpo.fromSource, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{MATRICE_ID}")))
    g.add((rowSource, rico.isComponentOfTransitive, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}")))
    g.add((rowSource, cad.hasExtractionID, Literal(lineuuid_)))
    g.add((rowSource, PROV.wasGeneratedBy, URIRef(f"http://data.ign.fr/id/codes/cadastre/activity/0002")))
    g.add((URIRef(f"http://data.ign.fr/id/codes/cadastre/activity/0002"), PROV.used, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}")))
    g.add((rowSource, rico.isOrWasDigitalInstanciation,URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_{lineuuid_}_area")))
    g.add((rowSource, cad.lineOrderInArea, Literal(row['Ordre de lecture'], datatype=XSD.integer)))

    #Create recordpart
    recordparturi = URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_{lineuuid_}_area")
    g.add((recordparturi, RDF.type, rico.RecordPart))
    g.add((recordparturi, rico.isOrWasIncludedIn, URIRef(srcuri + f"{json['departement']}_{COMMUNE}_{row['Image']}_page")))
    
    #Address
    if row['Lieu-dit_treated'] != 'nan':
        plotaddress = BNode()
        g.add((subject_uri, add.hasAttribute, plotaddress))
        g.add((plotaddress, add.isAttributeType, cad_atype.PlotAddress))
        plotaddressversion = BNode()
        g.add((plotaddress, add.hasAttributeVersion, plotaddressversion))
        g.add((plotaddressversion, RDF.type, add.LandmarkRelation))
        g.add((plotaddressversion, add.isLandmarkRelationType, lrtype.Undefined))
        g.add((plotaddressversion, add.locatum, subject_uri))
        g.add((plotaddressversion, add.relatum, URIRef(baseuri + str(row['address_uuid']))))

    #Owner

    #Nature
    if not pd.isnull(row['Nature_treated']):
        nature = BNode()
        g.add((subject_uri, add.hasNature, nature))
        g.add((nature, RDF.type, add.Nature))
        g.add((nature, RDFS.label, Literal(row['Nature_treated'], datatype=XSD.string)))
    
    #Time
    if not pd.isnull(row['Date entrée']) or not pd.isnull(['Date sortie']):
        hastime = BNode()
        g.add((subject_uri, add.hasTime, hastime))
        g.add((hastime, RDF.type, add.TimeInterval))
        
        if not pd.isna(row['Date entrée']) and not pd.isnull(row['Date entrée']):
            hasbeginning = BNode()
            g.add((hastime, add.hasBeginning, hasbeginning))
            g.add((hasbeginning,RDF.type, add.TimeInstant))
            g.add((hasbeginning, add.timeCalendar, time.Gregorian))
            g.add((hasbeginning, add.timePrecision, time.Year))
            g.add((hasbeginning, add.timeStamp, Literal(row['Date entrée'], datatype=XSD.date)))
        #else:
            #g.add((hastime, add.hasBeginning, Literal(MATRICE_START, datatype=XSD.date)))#date d'ouverture de la matrice
        if not pd.isnull(row['Date sortie']) and row['Date sortie'] != 'nan':
            hasend = BNode()
            g.add((hastime, add.hasBeginning, hasend))
            g.add((hasend,RDF.type, add.TimeInstant))
            g.add((hasend, add.timeCalendar, time.Gregorian))
            g.add((hasend, add.timePrecision, time.Year))
            g.add((hasend, add.timeStamp, Literal(row['Date entrée'], datatype=XSD.date)))
print(g.serialize(format='turtle'))

KeyError: 'Ordre de lecture'