In [9]:
import pandas as pd
from rdflib import Graph, Literal, RDF, Namespace, BNode
from rdflib.namespace import FOAF, XSD

# Load CSV data
df = pd.read_csv('../Data/Datos_CLOFBO-Plata.tsv', sep='\t', encoding='ISO-8859-1')

# Define Namespaces
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
DC = Namespace("https://www.dublincore.org/specifications/dublin-core/dcmi-terms/")
DWC = Namespace("http://rs.tdwg.org/dwc/terms/")

# Create an RDF graph
g = Graph()

# Bind namespaces to the graph
g.bind("foaf", FOAF)
g.bind("dc", DC)
g.bind("dwc", DWC)

# Add data from CSV to RDF graph
for index, row in df.iterrows():
    agent = BNode()
    g.add((agent, RDF.type, FOAF.Agent))
    if pd.notna(row['dc_type']):
        g.add((agent, DC.type, Literal(row['dc_type'], datatype=XSD.string)))
    if pd.notna(row['dc_modified']):
        g.add((agent, DC.modified, Literal(row['dc_modified'], datatype=XSD.string)))
    if pd.notna(row['dc_bibliographicCitation']):
        g.add((agent, DC.bibliographicCitation, Literal(row['dc_bibliographicCitation'], datatype=XSD.string)))
    if pd.notna(row['dwc_basisOfRecord']):
        g.add((agent, DWC.basisOfRecord, Literal(row['dwc_basisOfRecord'], datatype=XSD.string)))
    if pd.notna(row['dwc_recordNumber'])_
        g.add((agent, DWC.recordNumber, Literal(row['dwc_recordNumber'], datatype=XSD.string)))
    if pd.notna(row['dwc_recordedBy'])_
        g.add((agent, DWC.recordedBy, Literal(row['dwc_recordedBy'], datatype=XSD.string)))
    if pd.notna(row['dwc_individualCount'])_
        g.add((agent, DWC.individualCount, Literal(row['dwc_individualCount'], datatype=XSD.string)))
    if pd.notna(row['dwc_sex'])_
        g.add((agent, DWC.sex, Literal(row['dwc_sex'], datatype=XSD.string)))
    if pd.notna(row['dwc_occurrenceStatus'])_
        g.add((agent, DWC.occurrenceStatus, Literal(row['dwc_occurrenceStatus'], datatype=XSD.string)))
    if pd.notna(row['dwc_eventDate'])_
        g.add((agent, DWC.eventDate, Literal(row['dwc_eventDate'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimEventDate'])_
        g.add((agent, DWC.verbatimEventDate, Literal(row['dwc_verbatimEventDate'], datatype=XSD.string)))
    if pd.notna(row['dwc_waterBody'])_
        g.add((agent, DWC.waterBody, Literal(row['dwc_waterBody'], datatype=XSD.string)))
    if pd.notna(row['dwc_country'])_
        g.add((agent, DWC.country, Literal(row['dwc_country'], datatype=XSD.string)))
    if pd.notna(row['dwc_countryCode'])_
        g.add((agent, DWC.countryCode, Literal(row['dwc_countryCode'], datatype=XSD.string)))
    if pd.notna(row['dwc_county'])_
        g.add((agent, DWC.county, Literal(row['dwc_county'], datatype=XSD.string)))
    if pd.notna(row['dwc_municipality'])_
        g.add((agent, DWC.municipality, Literal(row['dwc_municipality'], datatype=XSD.string)))
    if pd.notna(row['dwc_locality'])_
        g.add((agent, DWC.locality, Literal(row['dwc_locality'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimLocality'])_
        g.add((agent, DWC.verbatimLocality, Literal(row['dwc_verbatimLocality'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimElevation'])_
        g.add((agent, DWC.verbatimElevation, Literal(row['dwc_verbatimElevation'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimCoordinateSystem'])_
        g.add((agent, DWC.verbatimCoordinateSystem, Literal(row['dwc_verbatimCoordinateSystem'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimCoordinates'])_
        g.add((agent, DWC.verbatimCoordinates, Literal(row['dwc_verbatimCoordinates'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimLatitude'])_
        g.add((agent, DWC.verbatimLatitude, Literal(row['dwc_verbatimLatitude'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimLongitude'])_
        g.add((agent, DWC.verbatimLongitude, Literal(row['dwc_verbatimLongitude'], datatype=XSD.string)))
    if pd.notna(row['dwc_family'])_
        g.add((agent, DWC.family, Literal(row['dwc_family'], datatype=XSD.string)))
    if pd.notna(row['dwc_scienficName'])_
        g.add((agent, DWC.scienficName, Literal(row['dwc_scienficName'], datatype=XSD.string)))
    if pd.notna(row['dwc_originalNameUsage'])_
        g.add((agent, DWC.originalNameUsage, Literal(row['dwc_originalNameUsage'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimIdentification'])_
        g.add((agent, DWC.verbatimIdentification, Literal(row['dwc_verbatimIdentification'], datatype=XSD.string)))
    if pd.notna(row['dwc_identifiedBy'])_
        g.add((agent, DWC.identifiedBy, Literal(row['dwc_identifiedBy'], datatype=XSD.string)))
    if pd.notna(row['dwc_typeStatus'])_
        g.add((agent, DWC.typeStatus, Literal(row['dwc_typeStatus'], datatype=XSD.string)))
    if pd.notna(row['dwc_MeasurementOrFact'])_
        g.add((agent, DWC.MeasurementOrFact, Literal(row['dwc_MeasurementOrFact'], datatype=XSD.string)))
    if pd.notna(row['dwc_measurementValue'])_
        g.add((agent, DWC.measurementValue, Literal(row['dwc_measurementValue'], datatype=XSD.string)))
    if pd.notna(row['dwc_measurementUnit'])_
        g.add((agent, DWC.measurementUnit, Literal(row['dwc_measurementUnit'], datatype=XSD.string)))

# Prepare JSON-LD with explicit context
context = {
    "foaf": "http://xmlns.com/foaf/0.1/",
    "dwc": "http://rs.tdwg.org/dwc/terms/",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "dc": "https://www.dublincore.org/specifications/dublin-core/dcmi-terms/"
}    
    
# Serialize graph to JSON-LD
jsonld_data = g.serialize(format='json-ld', indent=4, context=context)

# Write JSON-LD to file
with open("../Data/data.jsonld", "w", encoding='ISO-8859-1') as f_
    f.write(jsonld_data)

# Open JSON-LD with the right encoding
with open("../Data/data.jsonld", "r", encoding='ISO-8859-1') as f_
    jsonld_data = f.read()

# Load JSON-LD file into a new graph
g_jsonld = Graph()
g_jsonld.parse(data=jsonld_data, format="json-ld")

# SPARQL Query with the correct prefixes
sparql_query = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
    PREFIX dc: <https://www.dublincore.org/specifications/dublin-core/dcmi-terms/>

    SELECT ?verbatimIdentification ?scientificName ?verbatimLocality ?family WHERE {
        ?agent a foaf_Agent ;
               dwc:verbatimIdentification ?verbatimIdentification ;
               dwc:scientificName ?scientificName ;
               dwc:verbatimLocality ?verbatimLocality ;
               dwc:family ?family .
    }
"""

# Execute SPARQL query
results = g_jsonld.query(sparql_query)


# Print results
for row in results_
    print(f"{row['scientificName']}")
