diff --git a/pytaxize/col/col.py b/pytaxize/col/col.py index c0a6e65..74ab496 100644 --- a/pytaxize/col/col.py +++ b/pytaxize/col/col.py @@ -14,6 +14,7 @@ warnings.warn("Pandas library not installed, dataframes disabled") pd = None + def children(name=None, id=None, format=None, start=None, checklist=None): """ Search Catalogue of Life for for direct children of a particular taxon. diff --git a/pytaxize/gbif/parse.py b/pytaxize/gbif/parse.py index 9c91c9d..c0e0838 100644 --- a/pytaxize/gbif/parse.py +++ b/pytaxize/gbif/parse.py @@ -7,6 +7,7 @@ warnings.warn("Pandas library not installed, dataframes disabled") pd = None + def parse(name, as_dataframe=False): """ Parse taxon names using the GBIF name parser. diff --git a/pytaxize/gn/gnr.py b/pytaxize/gn/gnr.py index a867f50..551d9fd 100644 --- a/pytaxize/gn/gnr.py +++ b/pytaxize/gn/gnr.py @@ -5,9 +5,11 @@ from pytaxize.refactor import Refactor import os + class NoResultException(Exception): pass + def gnr_datasources(): """ Get data sources for the Global Names Resolver. diff --git a/pytaxize/ids/ids.py b/pytaxize/ids/ids.py index c05dfa1..2d421c7 100644 --- a/pytaxize/ids/ids.py +++ b/pytaxize/ids/ids.py @@ -4,6 +4,7 @@ from pytaxize.ncbi import ncbi from ..gbif.gbif_utils import * + class NoResultException(Exception): pass @@ -63,7 +64,10 @@ def ncbi(self, verbose=True): rank_taken = z["Rank"] result = _make_id(id[0], fname, z["Rank"], "ncbi") if len(id) > 1: - result = [ _make_id(w["TaxId"], w["ScientificName"], w["Rank"], "ncbi") for w in res[fname] ] + result = [ + _make_id(w["TaxId"], w["ScientificName"], w["Rank"], "ncbi") + for w in res[fname] + ] out.append(result) # out = _flatten(out) if isinstance(out, dict): @@ -71,6 +75,7 @@ def ncbi(self, verbose=True): if isinstance(out[0], list): out = out[0] return out + # def col(self, verbose=True): # """ # Get Catalogue of Life taxonomic identifiers @@ -134,6 +139,7 @@ def ncbi(self, verbose=True): # return out + def _make_id(id, name, rank, type): if id is None: uri = None @@ -146,6 +152,7 @@ def _make_id(id, name, rank, type): "uri": uri, } + def _converter(x): if x.__class__.__name__ == "str": return [x] @@ -156,6 +163,7 @@ def _converter(x): def _flatten(x): return [item for sublist in x for item in sublist] + id_uris = { "col": { "species": "http://www.catalogueoflife.org/col/details/species/id/%s", @@ -167,6 +175,7 @@ def _flatten(x): }, } + def _make_id_uri(rank, which, x): if rank is not None: if rank.lower() == "species": diff --git a/pytaxize/itis/itis.py b/pytaxize/itis/itis.py index ae9019b..75d449d 100644 --- a/pytaxize/itis/itis.py +++ b/pytaxize/itis/itis.py @@ -76,6 +76,7 @@ def getcommentdetailfromtsn(tsn, as_dataframe=False, **kwargs): [z.pop("class") for z in out["comments"]] return _df(out["comments"], as_dataframe) + def getcommonnamesfromtsn(tsn, as_dataframe=False, **kwargs): """ Get common names from tsn @@ -95,6 +96,7 @@ def getcommonnamesfromtsn(tsn, as_dataframe=False, **kwargs): [z.pop("class") for z in out["commonNames"]] return _df(out["commonNames"], as_dataframe) + def getcoremetadatafromtsn(tsn, as_dataframe=False, **kwargs): """ Get core metadata from tsn @@ -113,6 +115,7 @@ def getcoremetadatafromtsn(tsn, as_dataframe=False, **kwargs): out.pop("class") return _df([out], as_dataframe) + def getcoveragefromtsn(tsn, as_dataframe=False, **kwargs): """ Get coverge from tsn @@ -150,6 +153,7 @@ def getcredibilityratingfromtsn(tsn, as_dataframe=False, **kwargs): out.pop("class") return _df(out, as_dataframe) + def getcredibilityratings(**kwargs): """ Get possible credibility ratings @@ -168,6 +172,7 @@ def getcredibilityratings(**kwargs): out.pop("class") return out["credibilityValues"] + def getcurrencyfromtsn(tsn, as_dataframe=False, **kwargs): """ Get currency from tsn @@ -204,6 +209,7 @@ def getdatedatafromtsn(tsn, as_dataframe=False, **kwargs): out.pop("class") return _df(out, as_dataframe) + def getexpertsfromtsn(tsn, as_dataframe=False, **kwargs): """ Get expert information for the TSN. @@ -219,6 +225,7 @@ def getexpertsfromtsn(tsn, as_dataframe=False, **kwargs): out.pop("class") return _df(out["experts"], as_dataframe) + def gettaxonomicranknamefromtsn(tsn, as_dataframe=False, **kwargs): """ Returns the kingdom and rank information for the TSN. @@ -236,6 +243,7 @@ def gettaxonomicranknamefromtsn(tsn, as_dataframe=False, **kwargs): tt.pop("class") return _df(tt, as_dataframe) + def getfullhierarchyfromtsn(tsn, as_dataframe=False, **kwargs): """ Get full hierarchy from ts @@ -257,6 +265,7 @@ def getfullhierarchyfromtsn(tsn, as_dataframe=False, **kwargs): [z.pop("class") for z in hier] return _df(hier, as_dataframe) + # def _fullrecord(verb, args, **kwargs): # out = Refactor(itis_base + verb, payload=args, request="get").json(**kwargs) # toget = [ @@ -290,10 +299,12 @@ def getfullhierarchyfromtsn(tsn, as_dataframe=False, **kwargs): # return [parsedat(x) for x in toget] + def _fullrecord(verb, args, **kwargs): out = Refactor(itis_base + verb, payload=args, request="get").json(**kwargs) return out + def getfullrecordfromlsid(lsid, **kwargs): """ Returns the full ITIS record for the TSN in the LSID, found by comparing the @@ -346,6 +357,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): [z.pop("class") for z in out["geoDivisions"]] return _df(out["geoDivisions"], as_dataframe) + # def getgeographicvalues(**kwargs): # """ # Get all possible geographic values @@ -353,7 +365,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # :param **kwargs: Curl options passed on to `requests.get` # Usage:: - + # from pytaxize import itis # itis.getgeographicvalues() # """ @@ -370,7 +382,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get global species completeness from tsn # Usage:: - + # from pytaxize import itis # itis.getglobalspeciescompletenessfromtsn(180541) # """ @@ -390,7 +402,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # :param tsn: TSN for a taxonomic group (numeric) # Usage:: - + # from pytaxize import itis # itis.gethierarchydownfromtsn(tsn = 161030) # """ @@ -409,7 +421,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # :param tsn: TSN for a taxonomic group (numeric) # Usage:: - + # from pytaxize import itis # itis.gethierarchyupfromtsn(tsn = 36485) # itis.gethierarchyupfromtsn(tsn = 37906) @@ -427,7 +439,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get itis term # Usage:: - + # from pytaxize import itis # itis._itisterms("buya") # """ @@ -468,7 +480,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get itis terms from common names # Usage:: - + # from pytaxize import itis # itis.getitistermsfromcommonname("buya") # """ @@ -480,7 +492,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get itis terms # Usage:: - + # from pytaxize import itis # # fails # itis.getitisterms("bear") @@ -493,7 +505,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get itis terms from scientific names # Usage:: - + # from pytaxize import itis # itis.getitistermsfromscientificname("ursidae") # itis.getitistermsfromscientificname("Ursus") @@ -516,7 +528,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # rank, whilc this function only gets immediate names downstream. # Usage:: - + # from pytaxize import itis # # Get full hierarchy # itis.hierarchy(tsn=180543) @@ -549,7 +561,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get jurisdictional origin from tsn # Usage:: - + # from pytaxize import itis # itis.getjurisdictionaloriginfromtsn(180543) # """ @@ -568,7 +580,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get jurisdiction origin values # Usage:: - + # from pytaxize import itis # itis.getjurisdictionoriginvalues() # """ @@ -585,7 +597,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get possible jurisdiction values # Usage:: - + # from pytaxize import itis # itis.getjurisdictionvalues() # """ @@ -601,7 +613,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get kingdom names from tsn # Usage:: - + # from pytaxize import itis # itis.getkingdomnamefromtsn(202385) # """ @@ -619,7 +631,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get all possible kingdom names # Usage:: - + # from pytaxize import itis # itis.getkingdomnames() # """ @@ -636,7 +648,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Provides the date the ITIS database was last updated. # Usage:: - + # from pytaxize import itis # itis.getlastchangedate() # """ @@ -655,7 +667,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Gets the unique LSID for the TSN, or an empty result if there is no match. # Usage:: - + # from pytaxize import itis # # valid TSN # itis.getlsidfromtsn(155166) @@ -679,7 +691,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Returns a list of the other sources used for the TSN. # Usage:: - + # from pytaxize import itis # itis.getothersourcesfromtsn(182662) # """ @@ -703,7 +715,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Returns the parent TSN for the entered TSN. # Usage:: - + # from pytaxize import itis # itis.getparenttsnfromtsn(202385) # """ @@ -719,7 +731,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Returns a list of the pulications used for the TSN. # Usage:: - + # from pytaxize import itis # itis.getpublicationsfromtsn(70340) # """ @@ -752,7 +764,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # their kingdom and rank ID values. # Usage:: - + # from pytaxize import itis # itis.getranknames() # """ @@ -768,7 +780,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # there is no match or the TSN is invalid. # Usage:: - + # from pytaxize import itis # itis.getrecordfromlsid("urn:lsid:itis.gov:itis_tsn:180543") # """ @@ -797,7 +809,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Returns the review year for the TSN. # Usage:: - + # from pytaxize import itis # itis.getreviewyearfromtsn(180541) # """ @@ -814,7 +826,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # (names and indicators) of the scientific name. # Usage:: - + # from pytaxize import itis # itis.getscientificnamefromtsn(531894) # """ @@ -862,7 +874,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Returns the author information for the TSN. # Usage:: - + # from pytaxize import itis # itis.gettaxonauthorshipfromtsn(183671) # """ @@ -878,7 +890,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Returns the usage information for the TSN. # Usage:: - + # from pytaxize import itis # itis.gettaxonomicusagefromtsn(526852) # """ @@ -894,7 +906,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Get tsn by vernacular language not the international language code (character) # Usage:: - + # from pytaxize import itis # itis.gettsnbyvernacularlanguage("french") # """ @@ -912,7 +924,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Gets the TSN corresponding to the LSID, or an empty result if there is no match. # Usage:: - + # from pytaxize import itis # itis.gettsnfromlsid(lsid="urn:lsid:itis.gov:itis_tsn:28726") # itis.gettsnfromlsid("urn:lsid:itis.gov:itis_tsn:0") @@ -933,7 +945,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Returns the unacceptability reason, if any, for the TSN. # Usage:: - + # from pytaxize import itis # itis.getunacceptabilityreasonfromtsn(183671) # """ @@ -951,7 +963,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Provides a list of the unique languages used in the vernacular table. # Usage:: - + # from pytaxize import itis # itis.getvernacularlanguages() # """ @@ -967,7 +979,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Search for tsn by common name # Usage:: - + # from pytaxize import itis # itis.searchbycommonname(x="american bullfrog") # itis.searchbycommonname("ferret-badger") @@ -986,7 +998,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Search for tsn by common name beginning with # Usage:: - + # from pytaxize import itis # itis.searchbycommonnamebeginswith("inch") # """ @@ -1005,7 +1017,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Search for tsn by common name ending with # Usage:: - + # from pytaxize import itis # itis.searchbycommonnameendswith("snake") # """ @@ -1023,7 +1035,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # `itis.searchbycommonnamebeginswith` and `itis.searchbycommonnameendswith` # Usage:: - + # from pytaxize import itis # itis.searchcommon("inch") # itis.searchcommon("inch", which = "end") @@ -1039,7 +1051,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Search by scientific name # Usage:: - + # from pytaxize import itis # itis.searchbyscientificname(x="Tardigrada") # itis.searchbyscientificname("Quercus douglasii") @@ -1056,7 +1068,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Search for any match # Usage:: - + # from pytaxize import itis # itis.searchforanymatch(x=202385) # itis.searchforanymatch(x="dolphin") @@ -1081,7 +1093,7 @@ def getgeographicdivisionsfromtsn(tsn, as_dataframe=False, **kwargs): # Search for any matched page for descending (logical) # Usage:: - + # from pytaxize import itis # itis.searchforanymatchpaged(x=202385, pagesize=100, pagenum=1, ascend=False) # itis.searchforanymatchpaged("Zy", pagesize=100, pagenum=1, ascend=False) @@ -1229,6 +1241,7 @@ def _tolower(y): def gettag(y): return y.tag.split("}")[1] + def _df(x, as_dataframe=False): if as_dataframe and pd: if isinstance(x, dict): @@ -1238,6 +1251,7 @@ def _df(x, as_dataframe=False): else: return x + if __name__ == "__main__": import doctest diff --git a/pytaxize/ncbi/ncbi.py b/pytaxize/ncbi/ncbi.py index 92dec15..1b4c327 100644 --- a/pytaxize/ncbi/ncbi.py +++ b/pytaxize/ncbi/ncbi.py @@ -80,11 +80,13 @@ def func(name): temp.append(func(sci_com[i])) return lists2dict(temp, sci_com) -def _entrez(path = "esearch", args = {}): + +def _entrez(path="esearch", args={}): url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/%s.fcgi" % path - tt = Refactor(url, args, request = 'get').xml() + tt = Refactor(url, args, request="get").xml() return tt + if __name__ == "__main__": import doctest diff --git a/pytaxize/sci2comm.py b/pytaxize/sci2comm.py index dd7de92..0a9be12 100644 --- a/pytaxize/sci2comm.py +++ b/pytaxize/sci2comm.py @@ -4,8 +4,9 @@ from pytaxize.refactor import Refactor from pytaxize.ids import Ids -def sci2comm(sci=None, id=None, db='ncbi', **kwargs): - """ + +def sci2comm(sci=None, id=None, db="ncbi", **kwargs): + """ Get common names from scientific names. :param: sci (str) One or more scientific names or partial names. @@ -27,23 +28,24 @@ def sci2comm(sci=None, id=None, db='ncbi', **kwargs): pytaxize.sci2comm('Pomatomus saltatrix') pytaxize.sci2comm('Loxodonta africana') """ - x = Ids(sci) - out = x.ncbi() - if len(out) > 1: - res = [_ncbi_common_names(w["id"], **kwargs) for w in out] - else: - res = _ncbi_common_names(out[0]["id"], **kwargs) - if isinstance(sci, str): - sci = [sci] - return dict(zip(sci, res)) + x = Ids(sci) + out = x.ncbi() + if len(out) > 1: + res = [_ncbi_common_names(w["id"], **kwargs) for w in out] + else: + res = _ncbi_common_names(out[0]["id"], **kwargs) + if isinstance(sci, str): + sci = [sci] + return dict(zip(sci, res)) + def _ncbi_common_names(x, **kwargs): - key = os.environ.get("ENTREZ_KEY") - if key is None: - raise Exception("ENTREZ_KEY is not defined") - - query = {"db": "taxonomy", "ID": x, "api_key": key} - url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" - res = Refactor(url, query, "get").xml(**kwargs) - z = res.xpath("//TaxaSet/Taxon/OtherNames/GenbankCommonName") - return [w.text for w in z] + key = os.environ.get("ENTREZ_KEY") + if key is None: + raise Exception("ENTREZ_KEY is not defined") + + query = {"db": "taxonomy", "ID": x, "api_key": key} + url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" + res = Refactor(url, query, "get").xml(**kwargs) + z = res.xpath("//TaxaSet/Taxon/OtherNames/GenbankCommonName") + return [w.text for w in z] diff --git a/pytaxize/tax.py b/pytaxize/tax.py index fa64f0d..8906bd1 100644 --- a/pytaxize/tax.py +++ b/pytaxize/tax.py @@ -14,9 +14,11 @@ warnings.warn("Pandas library not installed, dataframes disabled") pd = None + class NoResultException(Exception): pass + def names_list(rank="genus", size=10, as_dataframe=False): """ Get a random vector of species names. @@ -47,19 +49,21 @@ def names_list(rank="genus", size=10, as_dataframe=False): else: return "rank must be one of species, genus, family, or order" + def names_list_helper(size, path, as_dataframe=False): pnpath = resource_filename(__name__, path) if as_dataframe: - dat = pd.read_csv(pnpath) - return dat["names"][:size].tolist() + dat = pd.read_csv(pnpath) + return dat["names"][:size].tolist() else: - with open(pnpath, newline='') as f: - reader = csv.reader(f) - next(reader) - dat = [] - for row in reader: - dat.append(row) - return [w[0] for w in dat][:size] + with open(pnpath, newline="") as f: + reader = csv.reader(f) + next(reader) + dat = [] + for row in reader: + dat.append(row) + return [w[0] for w in dat][:size] + def vascan_search(q, format="json", raw=False): """ @@ -120,7 +124,7 @@ def scrapenames( detect_language=None, all_data_sources=None, data_source_ids=None, - as_dataframe=False + as_dataframe=False, ): """ Resolve names using Global Names Recognition and Discovery. @@ -195,7 +199,7 @@ def scrapenames( meta = res meta.pop("names") if as_dataframe: - data = _df(data, True) + data = _df(data, True) return {"meta": meta, "data": data} # if out["status"] != 303: # sys.exit("Woops, something went wrong") diff --git a/pytaxize/taxo.py b/pytaxize/taxo.py index 9a15653..4af94d6 100644 --- a/pytaxize/taxo.py +++ b/pytaxize/taxo.py @@ -10,6 +10,7 @@ warnings.warn("Pandas library not installed, dataframes disabled") pd = None + class NoResultException(Exception): pass @@ -46,6 +47,7 @@ def taxo_datasources(as_dataframe=False): else: return out + def taxo_resolve(query, source=None, code=None, http="get"): """ Uses Taxosaurus to resolve scientific names diff --git a/pytaxize/utils.py b/pytaxize/utils.py index f6b37c7..8b2f7b2 100644 --- a/pytaxize/utils.py +++ b/pytaxize/utils.py @@ -1,5 +1,6 @@ from functools import reduce + def assert_range_numeric(x, start, stop): if x is None: return