In [23]:
import pandas as pd
from rdflib import Graph, Literal, RDF, Namespace, BNode
from rdflib.namespace import FOAF, XSD

# Load CSV data
df = pd.read_csv('../Data/Datos_CLOFBO-Plata.tsv', sep='\t', encoding='ISO-8859-1')

# Define Namespaces
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
DC = Namespace("https://www.dublincore.org/specifications/dublin-core/dcmi-terms/")
DWC = Namespace("http://rs.tdwg.org/dwc/terms/")

# Create an RDF graph
g = Graph()

# Bind namespaces to the graph
g.bind("foaf", FOAF)
g.bind("dc", DC)
g.bind("dwc", DWC)

# Add data from CSV to RDF graph
for index, row in df.iterrows():
    agent = BNode()
    g.add((agent, RDF.type, FOAF.Agent))
    if pd.notna(row['dc_type']):
        g.add((agent, DC.dc_type, Literal(row['dc_type'], datatype=XSD.string)))
    if pd.notna(row['dc_modified']):
        g.add((agent, DC.dc_modified, Literal(row['dc_modified'], datatype=XSD.string)))
    if pd.notna(row['dc_bibliographicCitation']):
        g.add((agent, DC.dc_bibliographicCitation, Literal(row['dc_bibliographicCitation'], datatype=XSD.string)))
    if pd.notna(row['dwc_basisOfRecord']):
        g.add((agent, DWC.dwc_basisOfRecord, Literal(row['dwc_basisOfRecord'], datatype=XSD.string)))
    if pd.notna(row['dwc_recordNumber']):
        g.add((agent, DWC.dwc_recordNumber, Literal(row['dwc_recordNumber'], datatype=XSD.string)))
    if pd.notna(row['dwc_recordedBy']):
        g.add((agent, DWC.dwc_recordedBy, Literal(row['dwc_recordedBy'], datatype=XSD.string)))
    if pd.notna(row['dwc_individualCount']):
        g.add((agent, DWC.dwc_individualCount, Literal(row['dwc_individualCount'], datatype=XSD.string)))
    if pd.notna(row['dwc_sex']):
        g.add((agent, DWC.dwc_sex, Literal(row['dwc_sex'], datatype=XSD.string)))
    if pd.notna(row['dwc_occurrenceStatus']):
        g.add((agent, DWC.dwc_occurrenceStatus, Literal(row['dwc_occurrenceStatus'], datatype=XSD.string)))
    if pd.notna(row['dwc_eventDate']):
        g.add((agent, DWC.dwc_eventDate, Literal(row['dwc_eventDate'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimEventDate']):
        g.add((agent, DWC.dwc_verbatimEventDate, Literal(row['dwc_verbatimEventDate'], datatype=XSD.string)))
    if pd.notna(row['dwc_waterBody']):
        g.add((agent, DWC.dwc_waterBody, Literal(row['dwc_waterBody'], datatype=XSD.string)))
    if pd.notna(row['dwc_country']):
        g.add((agent, DWC.dwc_country, Literal(row['dwc_country'], datatype=XSD.string)))
    if pd.notna(row['dwc_countryCode']):
        g.add((agent, DWC.dwc_countryCode, Literal(row['dwc_countryCode'], datatype=XSD.string)))
    if pd.notna(row['dwc_county']):
        g.add((agent, DWC.dwc_county, Literal(row['dwc_county'], datatype=XSD.string)))
    if pd.notna(row['dwc_municipality']):
        g.add((agent, DWC.dwc_municipality, Literal(row['dwc_municipality'], datatype=XSD.string)))
    if pd.notna(row['dwc_locality']):
        g.add((agent, DWC.dwc_locality, Literal(row['dwc_locality'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimLocality']):
        g.add((agent, DWC.dwc_verbatimLocality, Literal(row['dwc_verbatimLocality'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimElevation']):
        g.add((agent, DWC.dwc_verbatimElevation, Literal(row['dwc_verbatimElevation'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimCoordinateSystem']):
        g.add((agent, DWC.dwc_verbatimCoordinateSystem, Literal(row['dwc_verbatimCoordinateSystem'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimCoordinates']):
        g.add((agent, DWC.dwc_verbatimCoordinates, Literal(row['dwc_verbatimCoordinates'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimLatitude']):
        g.add((agent, DWC.dwc_verbatimLatitude, Literal(row['dwc_verbatimLatitude'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimLongitude']):
        g.add((agent, DWC.dwc_verbatimLongitude, Literal(row['dwc_verbatimLongitude'], datatype=XSD.string)))
    if pd.notna(row['dwc_family']):
        g.add((agent, DWC.dwc_family, Literal(row['dwc_family'], datatype=XSD.string)))
    if pd.notna(row['dwc_scienficName']):
        g.add((agent, DWC.dwc_scienficName, Literal(row['dwc_scienficName'], datatype=XSD.string)))
    if pd.notna(row['dwc_originalNameUsage']):
        g.add((agent, DWC.dwc_originalNameUsage, Literal(row['dwc_originalNameUsage'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimIdentification']):
        g.add((agent, DWC.dwc_verbatimIdentification, Literal(row['dwc_verbatimIdentification'], datatype=XSD.string)))
    if pd.notna(row['dwc_identifiedBy']):
        g.add((agent, DWC.dwc_identifiedBy, Literal(row['dwc_identifiedBy'], datatype=XSD.string)))
    if pd.notna(row['dwc_typeStatus']):
        g.add((agent, DWC.dwc_typeStatus, Literal(row['dwc_typeStatus'], datatype=XSD.string)))
    if pd.notna(row['dwc_MeasurementOrFact']):
        g.add((agent, DWC.dwc_MeasurementOrFact, Literal(row['dwc_MeasurementOrFact'], datatype=XSD.string)))
    if pd.notna(row['dwc_measurementValue']):
        g.add((agent, DWC.dwc_measurementValue, Literal(row['dwc_measurementValue'], datatype=XSD.string)))
    if pd.notna(row['dwc_measurementUnit']):
        g.add((agent, DWC.dwc_measurementUnit, Literal(row['dwc_measurementUnit'], datatype=XSD.string)))

# Prepare JSON-LD with explicit context
context = {
    "foaf": "http://xmlns.com/foaf/0.1/",
    "dwc": "http://rs.tdwg.org/dwc/terms/",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "dc": "https://www.dublincore.org/specifications/dublin-core/dcmi-terms/"
}    
    
# Serialize graph to JSON-LD
jsonld_data = g.serialize(format='json-ld', indent=4, context=context)

# Write JSON-LD to file
with open("../Data/data.jsonld", "w", encoding='ISO-8859-1') as f:
    f.write(jsonld_data)

# Open JSON-LD with the right encoding
with open("../Data/data.jsonld", "r", encoding='ISO-8859-1') as f:
    jsonld_data = f.read()

# Load JSON-LD file into a new graph
g_jsonld = Graph()
g_jsonld.parse(data=jsonld_data, format="json-ld")

# SPARQL Query with the correct prefixes
sparql_query = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
    PREFIX dc: <https://www.dublincore.org/specifications/dublin-core/dcmi-terms/>

    SELECT ?dwc_verbatimIdentification ?dwc_scientificName ?dwc_verbatimLocality ?dwc_family WHERE {
        ?agent a foaf:Agent ;
               dwc:dwc_verbatimIdentification ?dwc_verbatimIdentification ;
               dwc:dwc_scientificName ?dwc_scientificName .
    }
"""

# Execute SPARQL query
results = g_jsonld.query(sparql_query)

for row in results:
    print(row)

# Print results
for row in results:
    print(f"{row['dwc_scientificName']}")
