In [1]:
import pandas as pd
from rdflib import Graph, Literal, RDF, Namespace, BNode
from rdflib.namespace import FOAF, XSD

# Load CSV data
df = pd.read_csv('../Data/Datos_CLOFBO-Plata.tsv', sep='\t', encoding='ISO-8859-1')

# Define Namespaces
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
DC = Namespace("https://www.dublincore.org/specifications/dublin-core/dcmi-terms/")
DWC = Namespace("http://rs.tdwg.org/dwc/terms/")

# Create an RDF graph
g = Graph()

# Bind namespaces to the graph
g.bind("foaf", FOAF)
g.bind("dc", DC)
g.bind("dwc", DWC)

# Add data from CSV to RDF graph
for index, row in df.iterrows():
    agent = BNode()
    g.add((agent, RDF.type, FOAF.Agent))
    if pd.notna(row['dc:type']):
        g.add((agent, DC.type, Literal(row['dc:type'], datatype=XSD.string)))
    if pd.notna(row['dc:modified']):
        g.add((agent, DC.modified, Literal(row['dc:modified'], datatype=XSD.string)))
    if pd.notna(row['dc:bibliographicCitation']):
        g.add((agent, DC.bibliographicCitation, Literal(row['dc:bibliographicCitation'], datatype=XSD.string)))
    if pd.notna(row['dwc:basisOfRecord']):
        g.add((agent, DWC.basisOfRecord, Literal(row['dwc:basisOfRecord'], datatype=XSD.string)))
    if pd.notna(row['dwc:recordNumber']):
        g.add((agent, DWC.recordNumber, Literal(row['dwc:recordNumber'], datatype=XSD.string)))
    if pd.notna(row['dwc:recordedBy']):
        g.add((agent, DWC.recordedBy, Literal(row['dwc:recordedBy'], datatype=XSD.string)))
    if pd.notna(row['dwc:individualCount']):
        g.add((agent, DWC.individualCount, Literal(row['dwc:individualCount'], datatype=XSD.string)))
    if pd.notna(row['dwc:sex']):
        g.add((agent, DWC.sex, Literal(row['dwc:sex'], datatype=XSD.string)))
    if pd.notna(row['dwc:occurrenceStatus']):
        g.add((agent, DWC.occurrenceStatus, Literal(row['dwc:occurrenceStatus'], datatype=XSD.string)))
    if pd.notna(row['dwc:eventDate']):
        g.add((agent, DWC.eventDate, Literal(row['dwc:eventDate'], datatype=XSD.string)))
    if pd.notna(row['dwc:verbatimEventDate']):
        g.add((agent, DWC.verbatimEventDate, Literal(row['dwc:verbatimEventDate'], datatype=XSD.string)))
    if pd.notna(row['dwc:waterBody']):
        g.add((agent, DWC.waterBody, Literal(row['dwc:waterBody'], datatype=XSD.string)))
    if pd.notna(row['dwc:country']):
        g.add((agent, DWC.country, Literal(row['dwc:country'], datatype=XSD.string)))
    if pd.notna(row['dwc:countryCode']):
        g.add((agent, DWC.countryCode, Literal(row['dwc:countryCode'], datatype=XSD.string)))
    if pd.notna(row['dwc:county']):
        g.add((agent, DWC.county, Literal(row['dwc:county'], datatype=XSD.string)))
    if pd.notna(row['dwc:municipality']):
        g.add((agent, DWC.municipality, Literal(row['dwc:municipality'], datatype=XSD.string)))
    if pd.notna(row['dwc:locality']):
        g.add((agent, DWC.locality, Literal(row['dwc:locality'], datatype=XSD.string)))
    if pd.notna(row['dwc:verbatimLocality']):
        g.add((agent, DWC.verbatimLocality, Literal(row['dwc:verbatimLocality'], datatype=XSD.string)))
    if pd.notna(row['dwc:verbatimElevation']):
        g.add((agent, DWC.verbatimElevation, Literal(row['dwc:verbatimElevation'], datatype=XSD.string)))
    if pd.notna(row['dwc:verbatimCoordinateSystem']):
        g.add((agent, DWC.verbatimCoordinateSystem, Literal(row['dwc:verbatimCoordinateSystem'], datatype=XSD.string)))
    if pd.notna(row['dwc:verbatimCoordinates']):
        g.add((agent, DWC.verbatimCoordinates, Literal(row['dwc:verbatimCoordinates'], datatype=XSD.string)))
    if pd.notna(row['dwc:verbatimLatitude']):
        g.add((agent, DWC.verbatimLatitude, Literal(row['dwc:verbatimLatitude'], datatype=XSD.string)))
    if pd.notna(row['dwc:verbatimLongitude']):
        g.add((agent, DWC.verbatimLongitude, Literal(row['dwc:verbatimLongitude'], datatype=XSD.string)))
    if pd.notna(row['dwc:family']):
        g.add((agent, DWC.family, Literal(row['dwc:family'], datatype=XSD.string)))
    if pd.notna(row['dwc:scienficName']):
        g.add((agent, DWC.scienficName, Literal(row['dwc:scienficName'], datatype=XSD.string)))
    if pd.notna(row['dwc:originalNameUsage']):
        g.add((agent, DWC.originalNameUsage, Literal(row['dwc:originalNameUsage'], datatype=XSD.string)))
    if pd.notna(row['dwc:verbatimIdentification']):
        g.add((agent, DWC.verbatimIdentification, Literal(row['dwc:verbatimIdentification'], datatype=XSD.string)))
    if pd.notna(row['dwc:identifiedBy']):
        g.add((agent, DWC.identifiedBy, Literal(row['dwc:identifiedBy'], datatype=XSD.string)))
    if pd.notna(row['dwc:typeStatus']):
        g.add((agent, DWC.typeStatus, Literal(row['dwc:typeStatus'], datatype=XSD.string)))
    if pd.notna(row['dwc:MeasurementOrFact']):
        g.add((agent, DWC.MeasurementOrFact, Literal(row['dwc:MeasurementOrFact'], datatype=XSD.string)))
    if pd.notna(row['dwc:measurementValue']):
        g.add((agent, DWC.measurementValue, Literal(row['dwc:measurementValue'], datatype=XSD.string)))
    if pd.notna(row['dwc:measurementUnit']):
        g.add((agent, DWC.measurementUnit, Literal(row['dwc:measurementUnit'], datatype=XSD.string)))

# Prepare JSON-LD with explicit context
context = {
    "foaf": "http://xmlns.com/foaf/0.1/",
    "dwc": "http://rs.tdwg.org/dwc/terms/",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "dc": "https://www.dublincore.org/specifications/dublin-core/dcmi-terms/"
}    
    
# Serialize graph to JSON-LD
jsonld_data = g.serialize(format='json-ld', indent=4, context=context)

# Write JSON-LD to file
with open("../data/data.jsonld", "w", encoding='ISO-8859-1') as f:
    f.write(jsonld_data)

# Open JSON-LD with the right encoding
with open("../data/data.jsonld", "r", encoding='ISO-8859-1') as f:
    jsonld_data = f.read()

# Load JSON-LD file into a new graph
g_jsonld = Graph()
g_jsonld.parse(data=jsonld_data, format="json-ld")

# SPARQL Query with the correct prefixes
sparql_query = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
    PREFIX dc: <https://www.dublincore.org/specifications/dublin-core/dcmi-terms/>

    SELECT ?verbatimIdentification ?scientificName ?verbatimLocality ?family WHERE {
        ?agent a foaf:Agent ;
               dwc:verbatimIdentification ?verbatimIdentification ;
               dwc:scientificName ?scientificName ;
               dwc:verbatimLocality ?verbatimLocality ;
               dwc:family ?family .
    }
"""

# Execute SPARQL query
results = g_jsonld.query(sparql_query)

# Print results
for row in results:
    print(f"{row['scientificName']}")


KeyError: 'dwc:family'