In [3]:
import pandas as pd
from rdflib import Graph, Literal, RDF, Namespace, BNode
from rdflib.namespace import FOAF, XSD

# Load CSV data
df = pd.read_csv('../Data/Datos_CLOFBO-Plata.tsv', sep='\t', encoding='ISO-8859-1')

# Define Namespaces
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
DC = Namespace("https://www.dublincore.org/specifications/dublin-core/dcmi-terms/")
DWC = Namespace("http://rs.tdwg.org/dwc/terms/")

# Create an RDF graph
g = Graph()

# Bind namespaces to the graph
g.bind("foaf", FOAF)
g.bind("dc", DC)
g.bind("dwc", DWC)

# Add data from CSV to RDF graph
for index, row in df.iterrows():
    agent = BNode()
    g.add((agent, RDF.type, FOAF.Agent))
    if pd.notna(row['dc_type']):
        g.add((agent, DC.dc_type, Literal(row['dc_type'], datatype=XSD.string)))
    if pd.notna(row['dc_modified']):
        g.add((agent, DC.dc_modified, Literal(row['dc_modified'], datatype=XSD.string)))
    if pd.notna(row['dc_bibliographicCitation']):
        g.add((agent, DC.dc_bibliographicCitation, Literal(row['dc_bibliographicCitation'], datatype=XSD.string)))
    if pd.notna(row['dwc_basisOfRecord']):
        g.add((agent, DWC.dwc_basisOfRecord, Literal(row['dwc_basisOfRecord'], datatype=XSD.string)))
    if pd.notna(row['dwc_catalogNumber']):
        g.add((agent, DWC.dwc_catalogNumber, Literal(row['dwc_catalogNumber'], datatype=XSD.string)))
    if pd.notna(row['dwc_recordNumber']):
        g.add((agent, DWC.dwc_recordNumber, Literal(row['dwc_recordNumber'], datatype=XSD.string)))
    if pd.notna(row['dwc_recordedBy']):
        g.add((agent, DWC.dwc_recordedBy, Literal(row['dwc_recordedBy'], datatype=XSD.string)))
    if pd.notna(row['dwc_individualCount']):
        g.add((agent, DWC.dwc_individualCount, Literal(row['dwc_individualCount'], datatype=XSD.string)))
    if pd.notna(row['dwc_sex']):
        g.add((agent, DWC.dwc_sex, Literal(row['dwc_sex'], datatype=XSD.string)))
    if pd.notna(row['dwc_occurrenceStatus']):
        g.add((agent, DWC.dwc_occurrenceStatus, Literal(row['dwc_occurrenceStatus'], datatype=XSD.string)))
    if pd.notna(row['dwc_eventDate']):
        g.add((agent, DWC.dwc_eventDate, Literal(row['dwc_eventDate'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimEventDate']):
        g.add((agent, DWC.dwc_verbatimEventDate, Literal(row['dwc_verbatimEventDate'], datatype=XSD.string)))
    if pd.notna(row['dwc_waterBody']):
        g.add((agent, DWC.dwc_waterBody, Literal(row['dwc_waterBody'], datatype=XSD.string)))
    if pd.notna(row['dwc_country']):
        g.add((agent, DWC.dwc_country, Literal(row['dwc_country'], datatype=XSD.string)))
    if pd.notna(row['dwc_countryCode']):
        g.add((agent, DWC.dwc_countryCode, Literal(row['dwc_countryCode'], datatype=XSD.string)))
    if pd.notna(row['dwc_county']):
        g.add((agent, DWC.dwc_county, Literal(row['dwc_county'], datatype=XSD.string)))
    if pd.notna(row['dwc_municipality']):
        g.add((agent, DWC.dwc_municipality, Literal(row['dwc_municipality'], datatype=XSD.string)))
    if pd.notna(row['dwc_locality']):
        g.add((agent, DWC.dwc_locality, Literal(row['dwc_locality'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimLocality']):
        g.add((agent, DWC.dwc_verbatimLocality, Literal(row['dwc_verbatimLocality'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimElevation']):
        g.add((agent, DWC.dwc_verbatimElevation, Literal(row['dwc_verbatimElevation'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimCoordinateSystem']):
        g.add((agent, DWC.dwc_verbatimCoordinateSystem, Literal(row['dwc_verbatimCoordinateSystem'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimCoordinates']):
        g.add((agent, DWC.dwc_verbatimCoordinates, Literal(row['dwc_verbatimCoordinates'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimLatitude']):
        g.add((agent, DWC.dwc_verbatimLatitude, Literal(row['dwc_verbatimLatitude'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimLongitude']):
        g.add((agent, DWC.dwc_verbatimLongitude, Literal(row['dwc_verbatimLongitude'], datatype=XSD.string)))
    if pd.notna(row['dwc_family']):
        g.add((agent, DWC.dwc_family, Literal(row['dwc_family'], datatype=XSD.string)))
    if pd.notna(row['dwc_scientificName']):
        g.add((agent, DWC.dwc_scientificName, Literal(row['dwc_scientificName'], datatype=XSD.string)))
    if pd.notna(row['dwc_originalNameUsage']):
        g.add((agent, DWC.dwc_originalNameUsage, Literal(row['dwc_originalNameUsage'], datatype=XSD.string)))
    if pd.notna(row['dwc_verbatimIdentification']):
        g.add((agent, DWC.dwc_verbatimIdentification, Literal(row['dwc_verbatimIdentification'], datatype=XSD.string)))
    if pd.notna(row['dwc_identifiedBy']):
        g.add((agent, DWC.dwc_identifiedBy, Literal(row['dwc_identifiedBy'], datatype=XSD.string)))
    if pd.notna(row['dwc_typeStatus']):
        g.add((agent, DWC.dwc_typeStatus, Literal(row['dwc_typeStatus'], datatype=XSD.string)))
    if pd.notna(row['dwc_MeasurementOrFact']):
        g.add((agent, DWC.dwc_MeasurementOrFact, Literal(row['dwc_MeasurementOrFact'], datatype=XSD.string)))
    if pd.notna(row['dwc_measurementValue']):
        g.add((agent, DWC.dwc_measurementValue, Literal(row['dwc_measurementValue'], datatype=XSD.string)))
    if pd.notna(row['dwc_measurementUnit']):
        g.add((agent, DWC.dwc_measurementUnit, Literal(row['dwc_measurementUnit'], datatype=XSD.string)))

# Prepare JSON-LD with explicit context
context = {
    "foaf": "http://xmlns.com/foaf/0.1/",
    "dwc": "http://rs.tdwg.org/dwc/terms/",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "dc": "https://www.dublincore.org/specifications/dublin-core/dcmi-terms/"
}    
    
# Serialize graph to JSON-LD
jsonld_data = g.serialize(format='json-ld', indent=4, context=context)

# Write JSON-LD to file
with open("../Data/data.jsonld", "w", encoding='ISO-8859-1') as f:
    f.write(jsonld_data)

# Open JSON-LD with the right encoding
with open("../Data/data.jsonld", "r", encoding='ISO-8859-1') as f:
    jsonld_data = f.read()

# Load JSON-LD file into a new graph
g_jsonld = Graph()
g_jsonld.parse(data=jsonld_data, format="json-ld")

# SPARQL Query with the correct prefixes
sparql_query = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
    PREFIX dc: <https://www.dublincore.org/specifications/dublin-core/dcmi-terms/>

    SELECT ?dwc_scientificName ?dwc_family ?dwc_verbatimIdentification ?dc_type ?dwc_catalogNumber ?dwc_occurrenceStatus WHERE {
        ?agent a foaf:Agent ;
               dwc:dwc_scientificName ?dwc_scientificName ;
               dwc:dwc_family ?dwc_family ;
               dc:dc_type ?dc_type ; 
               dwc:dwc_occurrenceStatus ?dwc_occurrenceStatus .
               
    OPTIONAL { ?agent a foaf:Agent ;
                dwc:dwc_catalogNumber ?dwc_catalogNumber ;
                dwc:dwc_verbatimIdentification ?dwc_verbatimIdentification . }
    }
"""

# Execute SPARQL query
results = g_jsonld.query(sparql_query)

# Print results
for row in results:
    print(f"{row['dwc_scientificName']}")


Bario sanctaefilomenae (Steindachner 1907)
Psalidodon rutilus (Jenyns 1842)
Piaractus brachypomus (Cuvier 1818)
Pseudobunocephalus iheringii (Boulenger 1891)
Cichlasoma portalegrense (Hensel 1870)
Serrasalmus Lacepède 1803
Acrobrycon tarijae Fowler 1940
Astyanax bimaculatus (Linnaeus 1758)
Bunocephalus Kner 1955
Gymnogeophagus balzanii (Pearson 1891)
Bryconamericus exodon Eigenmann 1907
Curimata Bosc 1817
Cichlidae
Metynnis otuquensis Ahl 1924
Pimelodella gracilis (Valenciennes 1835)
Farlowella knerii (Steindachner 1882)
Bryconamericus Eigenmann 1907
Anadoras weddellii (Castelnau 1855)
Saxatilia lepidota (Heckel 1840)
Trachelyopterus striatulus (Steindachner 1877)
Rhinodoras dorbignyi (Kner 1855)
Aphyocharax nattereri (Steindachner 1882)
Pseudoplatystoma fasciatum (Linnaeus 1766)
Rineloricaria catamarcensis (Berg 1895)
Pimelodella mucosa Eigenmann & Ward 1907
Spatuloricaria evansii (Boulenger 1892)
Entomocorus benjamini Eigenmann 1917
Poptella paraguayensis (Eigenmann 1907)
Ctenobrycon

In [4]:
unique_scientific_names = {str(row['dwc_scientificName']) for row in results}
for species_name in unique_scientific_names:
    positive_list = [
        row for row in results 
        if "present" in str(row['dwc_occurrenceStatus'])
    ]
positive_scientific_names = {str(row['dwc_scientificName']) for row in positive_list}
    
for species_name in unique_scientific_names:
    negative_list = [
        row for row in results 
        if "absent" in str(row['dwc_occurrenceStatus'])
    ]
negative_scientific_names = {str(row['dwc_scientificName']) for row in negative_list}

# Collect unique scientific names in a set
unique_family_names = {str(row['dwc_family']) for row in results}

# Sort the unique names alphabetically
sorted_family_names = sorted(unique_family_names)

# Print each scientific name
for name in sorted_family_names:
    print(name)
    filtered_results = [
        row for row in results 
        if name in str(row['dwc_family'])
    ]
    unique_scientific_names = {str(row['dwc_scientificName']) for row in filtered_results}
    sorted_scientific_names = sorted(unique_scientific_names - negative_scientific_names)
    for species_name in sorted_scientific_names:
        filtered_results_2 = [
            row for row in filtered_results 
            if species_name in str(row['dwc_scientificName'])
        ]
        #print(filtered_results_2)
        type_set = {str(row['dc_type']) for row in filtered_results_2}
        string = "Text"
        type_x = ""
        if string in type_set:
            type_x = " listing"
        string = "PhysicalObject"
        type_y = ""
        if string in type_set:
            type_y = " observation"
        string = "StillImage"
        type_z = ""
        if string in type_set:
            type_z = " illustration"
        catalogNumber_set = {str(row['dwc_catalogNumber']) for row in filtered_results_2}
        #print(catalogNumber_set)
        catalog = ""
        string = "None"
        if catalogNumber_set != {string}:
            catalog = " specimen"
        else:
            catalog = ""
        print("\t" + species_name + ":" + type_x + type_y + type_z + catalog)
        '''
        unique_verbatim_names = {str(row['dwc_verbatimIdentification']) for row in filtered_results_2}
        sorted_verbatim_names = sorted(unique_verbatim_names)
        for verbatim_name in sorted_verbatim_names:
            print("\t" + "\t" + verbatim_name)
        '''

Acestrorhamphidae
	Astyanacinus moorii (Boulenger 1892): listing observation
	Astyanax Baird & Girard 1854: listing observation
	Astyanax abramis (Jenyns 1842): listing observation
	Astyanax lacustris (Lütken 1875): observation specimen
	Astyanax lineatus (Perugia 1891): listing observation
	Ctenobrycon kennedyi (Eigenmann 1903): listing observation specimen
	Gymnocorymbus ternetzi (Boulenger 1895): listing observation
	Hemigrammus Gill 1858: listing
	Hemigrammus lunatus Durbin 1918: listing observation
	Hemigrammus ulreyi (Boulenger 1895): listing observation specimen
	Hyphessobrycon Durbin 1908: listing
	Megalamphodus eques (Steindachner 1882): listing observation
	Moenkhausia Eigenmann 1903: listing
	Moenkhausia dichroura (Kner 1858): listing observation illustration specimen
	Oligosarcus bolivianus (Fowler 1940): listing observation
	Poptella paraguayensis (Eigenmann 1907): listing observation illustration
	Psalidodon alleni (Eigenmann & McAtee 1907): listing observation specimen
	

	Bryconops Kner 1858: observation illustration
	Bryconops melanurus (Bloch 1974): listing
	Piabucus melanostoma Holmberg 1891: listing observation
Lebiasinidae
	Lebiasinidae: listing observation
	Pyrrhulina australis Eigenmann & Kennedy 1903: listing observation
Lepidosirenidae
	Lepidosiren paradoxa Fitzinger 1837: listing
Loricariidae
	Ancistrus Kner 1854: listing observation
	Ancistrus bufonius (Valenciennes 1840): listing
	Ancistrus cirrhosus (Valenciennes 1836): listing
	Farlowella knerii (Steindachner 1882): listing
	Farlowella nattereri Steindachner 1910: listing
	Glyptoperichthys Weber 1991: listing
	Hypoptopoma Günther 1868: listing observation
	Hypoptopoma inexspectacum (Holmberg 1893): listing observation
	Hypostomus Lacepède 1803: listing
	Hypostomus borellii (Boulenger 1897): listing observation illustration
	Hypostomus cochliodon Kner 1854: listing observation
	Hypostomus punctatus Valenciennes 1840: listing
	Hypostomus robinii Valenciennes 1840: observation
	Loricaria Lin

In [5]:
unique_scientific_names = {str(row['dwc_scientificName']) for row in results}
for species_name in unique_scientific_names:
    positive_list = [
        row for row in results 
        if "present" in str(row['dwc_occurrenceStatus'])
    ]
positive_scientific_names = {str(row['dwc_scientificName']) for row in positive_list}
    
for species_name in unique_scientific_names:
    negative_list = [
        row for row in results 
        if "absent" in str(row['dwc_occurrenceStatus'])
    ]
negative_scientific_names = {str(row['dwc_scientificName']) for row in negative_list}

# Collect unique scientific names in a set
unique_family_names = {str(row['dwc_family']) for row in results}

# Sort the unique names alphabetically
sorted_family_names = sorted(unique_family_names)

# Print each scientific name
for name in sorted_family_names:
    filtered_results = [
        row for row in results 
        if name in str(row['dwc_family'])
    ]
    unique_scientific_names = {str(row['dwc_scientificName']) for row in filtered_results}
    sorted_scientific_names = sorted(unique_scientific_names.intersection(negative_scientific_names))
    if not sorted_scientific_names:
        pass
    else:
        print(name)
        for species_name in sorted_scientific_names:
            filtered_results_2 = [
                row for row in filtered_results 
                if species_name in str(row['dwc_scientificName'])
            ]
            #print(filtered_results_2)
            type_set = {str(row['dc_type']) for row in filtered_results_2}
            string = "Text"
            type_x = ""
            if string in type_set:
                type_x = " listing"
            string = "PhysicalObject"
            type_y = ""
            if string in type_set:
                type_y = " observation"
            string = "StillImage"
            type_z = ""
            if string in type_set:
                type_z = " illustration"
            catalogNumber_set = {str(row['dwc_catalogNumber']) for row in filtered_results_2}
            #print(catalogNumber_set)
            catalog = ""
            string = "None"
            if catalogNumber_set != {string}:
                catalog = " specimen"
            else:
                catalog = ""
            print("\t" + species_name + ":" + type_x + type_y + type_z + catalog)
            '''
            unique_verbatim_names = {str(row['dwc_verbatimIdentification']) for row in filtered_results_2}
            sorted_verbatim_names = sorted(unique_verbatim_names)
            for verbatim_name in sorted_verbatim_names:
                print("\t" + "\t" + verbatim_name)
            '''

Acestrorhamphidae
	Astyanax bimaculatus (Linnaeus 1758): listing observation
	Bario sanctaefilomenae (Steindachner 1907): listing observation illustration specimen
	Moenkhausia intermedia Eigenmann 1908: listing
	Psalidodon fasciatus (Cuvier 1819): listing
	Psalidodon rutilus (Jenyns 1842): listing observation
Acestrorhynchidae
	Acestrorhynchus altus Menezes 1969: listing
Anostomidae
	Leporinus fasciatus (Bloch, 1794): listing
	Megaleporinus trifasciatus (Steindachner 1876): listing
Auchenipteridae
	Auchenipterus nuchalis (Spix & Agassiz 1829): listing observation
	Trachycorystes Bleeker 1858: listing
Characidae
	Charax gibbosus (Linnaeus 1758): listing
	Galeocharax gulo (Cope, 1870): listing
	Serrapinnus piaba (Lütken 1875): listing observation specimen
Cichlidae
	Acaronia Myers 1940: listing
	Apistogramma taeniata (Günther 1862): listing
	Cichlasoma boliviense Kullander 1983: listing observation
	Cichlasoma portalegrense (Hensel 1870): listing observation
	Crenicichla lacustris (Cast

In [6]:
print(negative_scientific_names)

{'Acaronia Myers 1940', 'Galeocharax gulo (Cope, 1870)', 'Trachycorystes Bleeker 1858', 'Acestrorhynchus altus Menezes 1969', 'Crenicichla lacustris (Castelnau 1855)', 'Psalidodon rutilus (Jenyns 1842)', 'Myloplus rubripinnis Müller & Troschel 1844', 'Geophagus Heckel 1840', 'Cichlasoma boliviense Kullander 1983', 'Prochilodus nigricans Agassiz 1829', 'Auchenipterus nuchalis (Spix & Agassiz 1829)', 'Moenkhausia intermedia Eigenmann 1908', 'Steindachnerina binotata (Pearson 1924)', 'Cichlasoma portalegrense (Hensel 1870)', 'Astyanax bimaculatus (Linnaeus 1758)', 'Megaleporinus trifasciatus (Steindachner 1876)', 'Charax gibbosus (Linnaeus 1758)', 'Psalidodon fasciatus (Cuvier 1819)', 'Pyrrhulina brevis Steindachner 1876', 'Apistogramma taeniata (Günther 1862)', 'Parodon suborbitalis Valenciennes 1850', 'Serrapinnus piaba (Lütken 1875)', 'Steindachnerina dobula (Günther 1868)', 'Bario sanctaefilomenae (Steindachner 1907)', 'Leporinus fasciatus (Bloch, 1794)'}


In [143]:
print(positive_scientific_names.intersection_update({'Oligosarcus schindleri'}))

None


In [144]:
print(positive_scientific_names)

set()


In [153]:
unique_scientific_names = {str(row['dwc_scientificName']) for row in results}
for species_name in unique_scientific_names:
    positive_list = [
        row for row in results 
        if "present" in str(row['dwc_occurrenceStatus'])
    ]
positive_scientific_names = {str(row['dwc_scientificName']) for row in positive_list}
    
for species_name in unique_scientific_names:
    negative_list = [
        row for row in results 
        if "absent" in str(row['dwc_occurrenceStatus'])
    ]
negative_scientific_names = {str(row['dwc_scientificName']) for row in negative_list}

# Collect unique scientific names in a set
unique_family_names = {str(row['dwc_family']) for row in results}

# Sort the unique names alphabetically
sorted_family_names = sorted(unique_family_names)

# Print each scientific name
for name in sorted_family_names:
    print(name)
    filtered_results = [
        row for row in results 
        if name in str(row['dwc_family'])
    ]
    unique_scientific_names = {str(row['dwc_scientificName']) for row in filtered_results}
    sorted_scientific_names = sorted(unique_scientific_names.intersection(negative_scientific_names))
    print(sorted_scientific_names)

Acestrorhamphidae
['Astyanax bimaculatus (Linnaeus 1758)', 'Bario sanctaefilomenae (Steindachner 1907)', 'Moenkhausia intermedia Eigenmann 1908', 'Psalidodon fasciatus (Cuvier 1819)', 'Psalidodon rutilus (Jenyns 1842)']
Acestrorhynchidae
['Acestrorhynchus altus Menezes 1969']
Achiridae
[]
Anablepidae
[]
Anostomidae
['Leporinus fasciatus (Bloch, 1794)', 'Megaleporinus trifasciatus (Steindachner 1876)']
Aspredinidae
[]
Auchenipteridae
[]
Belonidae
[]
Bryconidae
[]
Callichthyidae
[]
Cetopsidae
[]
Characidae
['Charax gibbosus (Linnaeus 1758)']
Cichlidae
[]
Crenuchidae
[]
Curimatidae
[]
Cynodontidae
[]
Doradidae
[]
Engraulidae
[]
Erythrinidae
[]
Gasteropelecidae
[]
Gymnotidae
[]
Hemiodontidae
[]
Heptapteridae
[]
Hypopomidae
[]
Iguanodectidae
[]
Lebiasinidae
[]
Lepidosirenidae
[]
Loricariidae
[]
Parodontidae
[]
Pimelodidae
[]
Poeciliidae
[]
Potamotrygonidae
[]
Pristigasteridae
[]
Prochilodontidae
[]
Pseudopimelodidae
[]
Rhamphichthyidae
[]
Rivulidae
[]
Sciaenidae
[]
Serrasalmidae
[]
Sternopy