In [1]:
!pip install rdflib

from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, XSD

import csv
import re
from urllib.parse import quote

Collecting rdflib
  Downloading rdflib-7.1.1-py3-none-any.whl.metadata (11 kB)
Collecting isodate<1.0.0,>=0.7.2 (from rdflib)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Downloading rdflib-7.1.1-py3-none-any.whl (562 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m562.4/562.4 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading isodate-0.7.2-py3-none-any.whl (22 kB)
Installing collected packages: isodate, rdflib
Successfully installed isodate-0.7.2 rdflib-7.1.1


In [20]:
path_csv = '/content/300356-0-monumentos-ciudad-madrid.csv'
path_ttl = '/content/monuments.ttl'

# Crea grafo a partir del fichero con la estructura de la ontología
g = Graph()
g.parse(path_ttl, format='turtle')

print(f"Tripletas cargadas: {len(g)}")

# Espacio de nombres
NS = Namespace("http://www.semanticweb.org/vic/ontologies/2024/10/monuments/")

# Mapeo de subtipos de monumentos
subtype_mapping = {
    "Escultura conceptual o abstracta": NS.AbstractSculpture,
    "Grupo Escultórico": NS.SculpturalGroup,
    "Elemento conmemorativo, Lápida": NS.MemorialElement,
    "Estatua": NS.Statue,
    "Puente, construcción civil": NS.CivilConstruction,
    "Fuente, Estanque, Lámina de agua": NS.Fountain,
    "Elemento de ornamentación": NS.Ornamentation,
    "Edificación singular": NS.UniqueBuilding,
    "Puerta, Arco triunfal": NS.Gate
}

# Carga de distritos
def load_districts(csv_reader):
    districts = set()
    for row in csv_reader:
        district_name = row['DISTRITO'].strip() if row['DISTRITO'] else None
        if district_name and district_name not in districts:
            district_uri = URIRef(NS[f"District_{district_name.replace(' ', '_')}"])
            g.add((district_uri, RDF.type, NS.District))
            g.add((district_uri, NS.name, Literal(district_name, datatype=XSD.string)))
            g.add((city_uri, NS.hasDistrict, district_uri))
            g.add((district_uri, NS.isDistrictOf, city_uri))
            districts.add(district_name)

# Carga de barrios
def load_neighborhoods(csv_reader):
    neighborhoods = set()
    for row in csv_reader:
        district_name = row['DISTRITO'].strip() if row['DISTRITO'] else None
        neighborhood_name = row['BARRIO'].strip() if row['BARRIO'] else None

        if neighborhood_name and district_name:
            district_uri = URIRef(NS[f"District_{district_name.replace(' ', '_')}"])
            neighborhood_normalized = neighborhood_name.replace(' ', '_')
            if neighborhood_name not in neighborhoods:
                neighborhood_uri = URIRef(NS[f"Neighborhood_{neighborhood_normalized}"])
                g.add((neighborhood_uri, RDF.type, NS.Neighborhood))
                g.add((neighborhood_uri, NS.name, Literal(neighborhood_name, datatype=XSD.string)))
                g.add((district_uri, NS.hasNeighborhood, neighborhood_uri))
                g.add((neighborhood_uri, NS.isNeighborhoodOf, district_uri))
                neighborhoods.add(neighborhood_name)

# Carga de monumentos
def load_monuments(csv_reader):
    authors_set = set()
    for row in csv_reader:
        # Determina clase de monumento
        monument_class = subtype_mapping.get(row['SUBTIPO'], NS.Monument)
        monument_uri = URIRef(NS[f"{monument_class.split('/')[-1]}_{row['PK']}"])
        g.add((monument_uri, RDF.type, monument_class))

        # Añade propiedades
        g.add((monument_uri, NS.PK, Literal(row['PK'], datatype=XSD.string)))
        g.add((monument_uri, NS.name, Literal(row['NOMBRE'], datatype=XSD.string)))
        g.add((monument_uri, NS.description, Literal(row['DESCRIPCION'], datatype=XSD.string)))
        g.add((monument_uri, NS.date, Literal(row['FECHA'], datatype=XSD.string)))
        if row['CODIGO-POSTAL']:
          g.add((monument_uri, NS.postalCode, Literal(row['CODIGO-POSTAL'], datatype=XSD.string)))

        address_parts = [
            row['CLASE-VIAL'].strip() if row['CLASE-VIAL'] else "",
            row['NOMBRE-VIA'].strip() if row['NOMBRE-VIA'] else "",
            row['NUM'].strip() if row['NUM'] else ""
        ]
        address = " ".join(part for part in address_parts if part).strip()
        if address != "":
            g.add((monument_uri, NS.address, Literal(address, datatype=XSD.string)))

        # Relación con la ciudad
        g.add((monument_uri, NS.isLocatedInCity, city_uri))
        g.add((city_uri, NS.cityHasMonument, monument_uri))

        # Relación con barrios y distritos
        district_name = row['DISTRITO'].strip() if row['DISTRITO'] else None
        neighborhood_name = row['BARRIO'].strip() if row['BARRIO'] else None

        if district_name:
            district_uri = URIRef(NS[f"District_{district_name.replace(' ', '_')}"])
            g.add((monument_uri, NS.isLocatedInDistrict, district_uri))
            g.add((district_uri, NS.districtHasMonument, monument_uri))

        if neighborhood_name:
            neighborhood_uri = URIRef(NS[f"Neighborhood_{neighborhood_name.replace(' ', '_')}"])
            g.add((monument_uri, NS.isLocatedInNeighborhood, neighborhood_uri))
            g.add((neighborhood_uri, NS.neighborhoodHasMonument, monument_uri))

        # Carga de autores y su relación con el monumento
        if row['AUTORES']:
          authors = row['AUTORES'].split(" - ")
          for author in authors:
            author_clean = author.strip().replace(' ', '_')
            if author_clean not in authors_set:
              author_uri = URIRef(NS[f"Author_{author_clean}"])
              g.add((author_uri, RDF.type, NS.Author))
              g.add((author_uri, NS.name, Literal(author.strip(), datatype=XSD.string)))
              authors_set.add(author_clean)

            g.add((author_uri, NS.isAuthorOf, monument_uri))
            g.add((monument_uri, NS.hasAuthor, author_uri))

        # Carga de geometría y su relación con el monumento
        geometry_uri = URIRef(NS[f"Geometry_{row['PK']}"])
        g.add((geometry_uri, RDF.type, NS.Geometry))
        g.add((geometry_uri, NS.geo_asWKT, Literal(f"POINT({row['LONGITUD']} {row['LATITUD']})", datatype=XSD.string)))
        g.add((monument_uri, NS.hasGeometry, geometry_uri))

    print(f"Direcciones: {addresses}")

# Crea ciudad y división administrativa
city_uri = URIRef(NS["City_Madrid"])
admin_div_uri = URIRef(NS["AdministrativeDivision_Madrid"])
g.add((city_uri, RDF.type, NS.City))
g.add((city_uri, NS.name, Literal("Madrid", datatype=XSD.string)))
g.add((admin_div_uri, RDF.type, NS.AdministrativeDivision))
g.add((admin_div_uri, NS.name, Literal("Madrid", datatype=XSD.string)))
g.add((admin_div_uri, NS.hasCity, city_uri))
g.add((city_uri, NS.isCityOf, admin_div_uri))

# Ejecutar las funciones
with open(path_csv, 'r', encoding='latin-1') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=';')
    load_districts(reader)

with open(path_csv, 'r', encoding='latin-1') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=';')
    load_neighborhoods(reader)

with open(path_csv, 'r', encoding='latin-1') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=';')
    load_monuments(reader)

# Guardar el resultado
print(f"Tripletas después de la importación: {len(g)}")
g.serialize(destination="/content/populated_monuments.ttl", format="turtle")


Tripletas cargadas: 177
Direcciones: 0
Tripletas después de la importación: 31647


<Graph identifier=Na688d71d170042348766e3748b3f7f19 (<class 'rdflib.graph.Graph'>)>