In [1]:
import pandas as pd
import numpy as np
NaN = np.nan
from pyobistools.validation.check_scientificname_and_ids import check_scientificname_and_ids
import plotly.express as px
pd.set_option('max_colwidth', None)
from ckanapi import RemoteCKAN
import nest_asyncio
nest_asyncio.apply()
import requests

### Info about this notebook series

This series of notebook is meant to serve as an educational tool to learn how to use the PyObis biodiversity data validation package: + https://github.com/cioos-siooc/pyobistools

Command to install PyObis (currently not hosted on Pypi)
+ pip install git+https://github.com/cioos-siooc/pyobistools@main#egg=pyobistools

Darwin Core documentation: 
+ https://dwc.tdwg.org/

Darwin Core file types required fields: 
+ https://ioos.github.io/bio_mobilization_workshop/01-introduction/index.html
+ https://ioos.github.io/bio_mobilization_workshop/04-create-schema/index.html

### Notebook to test Pyobis' function 'check_scientifiname_and_ids'

##### Function 'check_scientifiname_and_ids' description
This function is used to evaluate the validity of scientific names, scientific name Ids and taxon ranks with Worms and Itis databases

##### Function 'check_scientifiname_and_ids' arguments
+ data: Dataframe of the data to evaluate
+ value: Type of analysis to run for each row in the dataset 
    + 'names': Analyzes scientific names validity with Worms and Itis (Itis optional)
    + 'names_ids': Above plus analyzes scientific name Ids
    + 'names_taxons_ids': Above plus analyzes taxon rank
+ itis_usage (default =  False): Option to validate data also with Itis if Worms service does not provide a positive answer for a given scientific name


Load different types of DWC files:

In [2]:
occurrence_core =      pd.read_csv('https://catalogue.preprod.ogsl.ca/data/ncc/ca-cioos_b52bb8be-d225-442c-94ab-6f107a64b8a6/saint_john_river_saltmarshes_species-occurrence_2021.csv')
occurrence_extension = pd.read_csv('https://catalogue.preprod.ogsl.ca/data/ismer/ca-cioos_0edfc94f-06d0-4823-84ef-df91e6c11fe6/ismer_occurrence_endofaune_cote-nord_2009.csv')
occurrence_extension.head(3)

Unnamed: 0,eventID,occurrenceID,scientificName,aphiaID,scientificNameID,taxonRank,occurrenceStatus,organismQuantityType,organismQuantity,basisofRecord
0,0003_Infauna_CoteNord_CamilleRobineau_BE10C1,0003_Infauna_CoteNord_CamilleRobineau_BE10C1_1,Mesodesma arctatum,156805.0,urn:lsid:marinespecies.org:taxname:156805,species,present,Individual count,2,HumanObservation
1,0003_Infauna_CoteNord_CamilleRobineau_BE10C1,0003_Infauna_CoteNord_CamilleRobineau_BE10C1_2,Nephtys ciliata,130356.0,urn:lsid:marinespecies.org:taxname:130356,species,present,Individual count,1,HumanObservation
2,0003_Infauna_CoteNord_CamilleRobineau_BE10C2,0003_Infauna_CoteNord_CamilleRobineau_BE10C2_3,Leucon nasicoides,148682.0,urn:lsid:marinespecies.org:taxname:148682,species,present,Individual count,2,HumanObservation


Try the check_scientificname_and_ids function:

In [3]:
check_scientificname_and_ids(occurrence_core, 'names').head()

3 : 200: Worms Cichorieae 
2 : 200: Worms Doellingeria umbellata 
5 : 200: Worms Plantago 
4 : 200: Worms Senecio viscosus 
0 : 204: Worms  smartweeds 
1 : 200: Worms Sonchus arvensis 
13 : 204: Worms Mentha canadensis 
8 : 204: Worms Sporobolus michauxianus 
12 : 204: Worms Juncus gerardi 
6 : 204: Worms Scutellaria lateriflora 
7 : 204: Worms Stellaria graminea 
10 : 200: Worms Sagittaria latifolia 
11 : 204: Worms Linaria vulgaris 
17 : 204: Worms Betula papyrifera 
21 : 204: Worms Athyrium angustum 
26 : 200: Worms Xanthium strumarium 
24 : 204: Worms Persicaria maculosa 
18 : 204: Worms Chelone glabra 
16 : 200: Worms Glyceria 
30 : 200: Worms Tanacetum vulgare 
29 : 204: Worms Viburnum opulus 
35 : 204: Worms Prunella vulgaris 
20 : 200: Worms Jacobaea vulgaris 
34 : 204: Worms Thalictrum pubescens 
15 : 200: Worms Vicia cracca 
19 : 200: Worms Centaurea nigra 
22 : 200: Worms Bryophyta 
31 : 204: Worms Pastinaca sativa 
25 : 204: Worms Euphorbia maculata 
9 : 200: Worms Cichoriu

Unnamed: 0_level_0,Dataset Values,Validation,Database values,Database values,Database values,Database values,Database values,Database values,Database values
Unnamed: 0_level_1,scientificname,Exact_Match,TaxonID,Status,Unacceptreason,Taxon_Rank,Valid_TaxonID,Valid_Name,LSID
0,Phalaris arundinacea,Non/no,,,,,,,
109,Thuja occidentalis,Non/no,,,,,,,
67,Rhagonycha fulva,Non/no,,,,,,,
66,Epalpus signifer,Non/no,,,,,,,
110,Cornus sericea,Non/no,,,,,,,


In [4]:
check_scientificname_and_ids(occurrence_extension, 'names').head()

2 : 200: Worms Microspio 
3 : 200: Worms Idotea balthica 
17 : 200: Worms Mya arenaria 
13 : 200: Worms Psammonyx nobilis 
7 : 200: Worms Littorina obtusata 
14 : 200: Worms Nematoda 
4 : 200: Worms Leucon nasicoides 
10 : 200: Worms Oligochaeta 
12 : 200: Worms Eteone longa 
16 : 200: Worms Mesodesma arctatum 
1 : 200: Worms Mytilus edulis 
8 : 200: Worms Gammarus oceanicus 
18 : 200: Worms Macoma balthica 
15 : 200: Worms Hydrobia minuta 
9 : 200: Worms Oediceros borealis 
6 : 200: Worms Nephtys ciliata 
11 : 200: Worms Littorina saxatilis 
5 : 200: Worms Nereis diversicolor 


Unnamed: 0_level_0,Dataset Values,Validation,Database values,Database values,Database values,Database values,Database values,Database values,Database values
Unnamed: 0_level_1,scientificname,Exact_Match,TaxonID,Status,Unacceptreason,Taxon_Rank,Valid_TaxonID,Valid_Name,LSID
2,Leucon nasicoides,Non/no,148682,alternate representation,,Species,110619,Leucon (Leucon) nasicoides,urn:lsid:marinespecies.org:taxname:148682
4,Psammonyx nobilis,Non/no,158140,unaccepted,superseded recombination,Species,1255501,Wecomedon nobilis,urn:lsid:marinespecies.org:taxname:158140
8,Nereis diversicolor,Non/no,340537,deleted,AphiaID resurrection,,152302,Hediste diversicolor,urn:lsid:marinespecies.org:taxname:340537
11,Hydrobia minuta,Non/no,152020,unaccepted,preoccupied name,Species,574096,Ecrobia truncata,urn:lsid:marinespecies.org:taxname:152020
0,Mesodesma arctatum,Oui/Yes,156805,accepted,,Species,156805,Mesodesma arctatum,urn:lsid:marinespecies.org:taxname:156805


In [5]:
table1, table2 = check_scientificname_and_ids(occurrence_core, 'names_ids')
table1.head(3)

0 : 204: Worms  smartweeds 
2 : 200: Worms Doellingeria umbellata 
11 : 204: Worms Linaria vulgaris 
13 : 204: Worms Mentha canadensis 
6 : 204: Worms Scutellaria lateriflora 
15 : 200: Worms Vicia cracca 
10 : 200: Worms Sagittaria latifolia 
3 : 200: Worms Cichorieae 
21 : 204: Worms Athyrium angustum 
9 : 200: Worms Cichorium intybus 
12 : 204: Worms Juncus gerardi 
27 : 200: Worms Tanacetum 
25 : 204: Worms Euphorbia maculata 
1 : 200: Worms Sonchus arvensis 
23 : 200: Worms Symphyotrichum 
18 : 204: Worms Chelone glabra 
17 : 204: Worms Betula papyrifera 
7 : 204: Worms Stellaria graminea 
32 : 200: Worms Onoclea sensibilis 
20 : 200: Worms Jacobaea vulgaris 
33 : 200: Worms Cirsium 
5 : 200: Worms Plantago 
16 : 200: Worms Glyceria 
30 : 200: Worms Tanacetum vulgare 
31 : 204: Worms Pastinaca sativa 
4 : 200: Worms Senecio viscosus 
8 : 204: Worms Sporobolus michauxianus 
14 : 204: Worms Thuja occidentalis 
24 : 204: Worms Persicaria maculosa 
28 : 200: Worms Tussilago farfara 
2

Unnamed: 0_level_0,Dataset Values,Validation,Database values,Database values,Database values,Database values,Database values,Database values,Database values
Unnamed: 0_level_1,scientificname,Exact_Match,TaxonID,Status,Unacceptreason,Taxon_Rank,Valid_TaxonID,Valid_Name,LSID
0,Phalaris arundinacea,Non/no,,,,,,,
109,Thuja occidentalis,Non/no,,,,,,,
67,Rhagonycha fulva,Non/no,,,,,,,


In [9]:
table2.head(5)

Unnamed: 0_level_0,Ref. ID,Validation,Validation,Dataset Values,Dataset Values,Database values,Database values
Unnamed: 0_level_1,OccurrenceID,ScientificName_Validation,scientificNameID_Validation,ScientificName,ScientificNameID,Valid_Name,LSID
0,NCC-ST-JOHN-RIVER-SALTMARSHES-2021-001,Non/no,Non/No,Phalaris arundinacea,urn:lsid:itis.gov:itis_tsn:41335,,
1,NCC-ST-JOHN-RIVER-SALTMARSHES-2021-002,Non/no,Non/No,Sporobolus michauxianus,,,
2,NCC-ST-JOHN-RIVER-SALTMARSHES-2021-003,Non/no,Non/No,Schoenoplectus pungens,urn:lsid:itis.gov:itis_tsn:508146,,
3,NCC-ST-JOHN-RIVER-SALTMARSHES-2021-004,Non/no,Non/No,Oxybasis rubra,,,
5,NCC-ST-JOHN-RIVER-SALTMARSHES-2021-006,Non/no,Non/No,Linaria vulgaris,urn:lsid:itis.gov:itis_tsn:33216,,


In [10]:
table3, table4 = check_scientificname_and_ids(occurrence_core, 'names_taxons_ids')
table3.head(3)

5 : 200: Worms Plantago 
13 : 204: Worms Mentha canadensis 
0 : 204: Worms  smartweeds 
3 : 200: Worms Cichorieae 
21 : 204: Worms Athyrium angustum 
18 : 204: Worms Chelone glabra 
17 : 204: Worms Betula papyrifera 
16 : 200: Worms Glyceria 
15 : 200: Worms Vicia cracca 
25 : 204: Worms Euphorbia maculata 
24 : 204: Worms Persicaria maculosa 
10 : 200: Worms Sagittaria latifolia 
1 : 200: Worms Sonchus arvensis 
12 : 204: Worms Juncus gerardi 
11 : 204: Worms Linaria vulgaris 
14 : 204: Worms Thuja occidentalis 
4 : 200: Worms Senecio viscosus 
7 : 204: Worms Stellaria graminea 
19 : 200: Worms Centaurea nigra 
9 : 200: Worms Cichorium intybus 
30 : 200: Worms Tanacetum vulgare 
2 : 200: Worms Doellingeria umbellata 
34 : 204: Worms Thalictrum pubescens 
8 : 204: Worms Sporobolus michauxianus 
53 : 200: Worms Poaceae 
55 : 204: Worms Hylotelephium telephium 
35 : 204: Worms Prunella vulgaris 
46 : 200: Worms Zizania 
44 : 200: Worms Typha latifolia 
50 : 204: Worms Lysimachia maritima

Unnamed: 0_level_0,Dataset Values,Validation,Database values,Database values,Database values,Database values,Database values,Database values,Database values
Unnamed: 0_level_1,ScientificName,Exact_Match,TaxonID,Status,Unacceptreason,Taxon_Rank,Valid_TaxonID,Valid_Name,LSID
0,Phalaris arundinacea,Non/no,,,,,,,
109,Thuja occidentalis,Non/no,,,,,,,
67,Rhagonycha fulva,Non/no,,,,,,,


In [11]:
table4.head(3)

Unnamed: 0_level_0,Ref. ID,Validation,Validation,Validation,Dataset Values,Dataset Values,Dataset Values,Database values,Database values,Database values
Unnamed: 0_level_1,OccurrenceID,ScientificName_Validation,TaxonRank_Validation,scientificNameID_Validation,ScientificName,TaxonRank,ScientificNameID,Valid_Name,Taxon_Rank,LSID
0,NCC-ST-JOHN-RIVER-SALTMARSHES-2021-001,Non/no,Non/No,Non/No,Phalaris arundinacea,species,urn:lsid:itis.gov:itis_tsn:41335,,,
1,NCC-ST-JOHN-RIVER-SALTMARSHES-2021-002,Non/no,Non/No,Non/No,Sporobolus michauxianus,species,,,,
2,NCC-ST-JOHN-RIVER-SALTMARSHES-2021-003,Non/no,Non/No,Non/No,Schoenoplectus pungens,species,urn:lsid:itis.gov:itis_tsn:508146,,,


Try the check_scientificname_and_ids function - itis_usage:

In [11]:
check_scientificname_and_ids(occurrence_core, 'names', itis_usage= True).head()

12 : 204: Worms Juncus gerardi 
5 : 200: Worms Plantago 
11 : 204: Worms Linaria vulgaris 
8 : 204: Worms Sporobolus michauxianus 
20 : 200: Worms Jacobaea vulgaris 
1 : 200: Worms Sonchus arvensis 
9 : 200: Worms Cichorium intybus 
2 : 200: Worms Doellingeria umbellata 
0 : 204: Worms  smartweeds 
3 : 200: Worms Cichorieae 
14 : 204: Worms Thuja occidentalis 
4 : 200: Worms Senecio viscosus 
13 : 204: Worms Mentha canadensis 
6 : 204: Worms Scutellaria lateriflora 
15 : 200: Worms Vicia cracca 
7 : 204: Worms Stellaria graminea 
18 : 204: Worms Chelone glabra 
25 : 204: Worms Euphorbia maculata 
16 : 200: Worms Glyceria 
22 : 200: Worms Bryophyta 
31 : 204: Worms Pastinaca sativa 
10 : 200: Worms Sagittaria latifolia 
21 : 204: Worms Athyrium angustum 
29 : 204: Worms Viburnum opulus 
30 : 200: Worms Tanacetum vulgare 
28 : 200: Worms Tussilago farfara 
23 : 200: Worms Symphyotrichum 
34 : 204: Worms Thalictrum pubescens 
49 : 204: Worms Epalpus signifer 
59 : 204: Worms Argentina ans

Unnamed: 0_level_0,Dataset Values,Validation,Database values,Database values,Database values,Database values,Database values,Database values,Database values
Unnamed: 0_level_1,scientificname,Exact_Match,TaxonID,Status,Unacceptreason,Taxon_Rank,Valid_TaxonID,Valid_Name,LSID
74,Spergularia marina,Non/no,418675.0,unaccepted,,Species,395048.0,Spergularia salina,urn:lsid:marinespecies.org:taxname:418675
71,Stellaria graminea,Non/no,,,,,,,
87,daisies,Non/no,,,,,,,
96,smartweeds,Non/no,,,,,,,
116,Sorbus,Non/no,25123.0,,,,25123.0,X Amelasorbus,urn:lsid:itis.gov:itis_tsn:25123
