This notebook runs through GAP species habitat and range items to fix a number of issues and add new information on the items based on information already in the items.

In [1]:
import pysb
import time
from IPython.display import display

In [2]:
_habitatMapRoot = "527d0a83e4b0850ea0518326"
_rangeMapRoot = "5951527de4b062508e3b1e79"

In [3]:
sb = pysb.SbSession()
username = input("Username: ")
sb.loginc(str(username))

Username: sbristol@usgs.gov
········


<pysb.SbSession.SbSession at 0x107c1edd8>

This block builds data structures into memory for use in this code by looping over search results from ScienceBase. This is much more efficient than working through item by item with individual ScienceBase requests.

In [11]:
rangeMaps = []
rangeMapsDone = []
rangeMapGapToID = {}
rangeMapSearchResults = sb.find_items("parentId="+_rangeMapRoot+"&fields=title,identifiers,webLinks&max=100")
while rangeMapSearchResults is not None:
    for rangeItem in rangeMapSearchResults["items"]:
        rangeItem.pop("link",None)
        rangeItem.pop("relatedItems",None)
        rangeMaps.append(rangeItem)
        rangeMapGapToID[rangeItem["identifiers"][[i for i,_ in enumerate(rangeItem["identifiers"]) if _["type"] == "GAP_SpeciesCode"][0]]["key"]] = {"id":rangeItem["id"],"title":rangeItem["title"]}
    rangeMapSearchResults = sb.next(rangeMapSearchResults)

habitatMaps = []
habitatMapsDone = []
habitatMapGapToID = {}
habitatMapSearchResults = sb.find_items("parentId="+_habitatMapRoot+"&fields=title,identifiers,webLinks,dates&max=100")
while habitatMapSearchResults is not None:
    for habitatItem in habitatMapSearchResults["items"]:
        habitatItem.pop("link",None)
        habitatItem.pop("relatedItems",None)
        habitatMaps.append(habitatItem)
        habitatMapGapToID[habitatItem["identifiers"][[i for i,_ in enumerate(habitatItem["identifiers"]) if _["type"] == "GAP_SpeciesCode"][0]]["key"]] = {"id":habitatItem["id"],"title":habitatItem["title"],"dates":habitatItem["dates"]}
    habitatMapSearchResults = sb.next(habitatMapSearchResults)


This code block makes changes to habitat map items, including the following:
* Updates the citation string to use the proper DOI link
* Adds a web link to the associated range map

In [None]:
count = len(habitatMapsDone)
for habitatMap in habitatMaps:
    if habitatMap["id"] not in habitatMapsDone:
        _thisDOI = habitatMap["identifiers"][[i for i,_ in enumerate(habitatMap["identifiers"]) if _["type"] == "doi"][0]]["key"].split(":")[1]
        _thisGapCode = habitatMap["identifiers"][[i for i,_ in enumerate(habitatMap["identifiers"]) if _["type"] == "GAP_SpeciesCode"][0]]["key"]

        updateItem = {}
        updateItem["id"] = habitatMap["id"]
        updateItem["citation"] = "U.S. Geological Survey - Gap Analysis Project, 2017, "+habitatMap["title"]+", https://doi.org/"+_thisDOI+"."

        linkToRangeMap = {}
        linkToRangeMap["type"] = "webLink"
        linkToRangeMap["typeLabel"] = "Web Link"
        linkToRangeMap["title"] = rangeMapGapToID[_thisGapCode]["title"]
        linkToRangeMap["uri"] = "https://www.sciencebase.gov/catalog/item/"+rangeMapGapToID[_thisGapCode]["id"]
        updateItem["webLinks"] = []
        updateItem["webLinks"].append(linkToRangeMap)

        sb.update_item(updateItem)
        habitatMapsDone.append(habitatMap["id"])
        count = count + 1
        print (updateItem["id"], count)
        time.sleep(0.15)


This code block makes changes to range map items, including the following:
* Fixes the title to match the conveition used for the habitat maps (common name with scientific name in parentheses)
* Adds a citation string with the proper DOI link
* Adds a web link to the associated habitat map, keeping the link to the DOI for the HUC data

In [None]:
count = len(rangeMapsDone)
for rangeMap in rangeMaps:
    if rangeMap["id"] not in rangeMapsDone:
        try:
            _thisScientificName = rangeMap["identifiers"][[i for i,_ in enumerate(rangeMap["identifiers"]) if _["type"] == "ScientificName"][0]]["key"]
            _thisCommonName = rangeMap["identifiers"][[i for i,_ in enumerate(rangeMap["identifiers"]) if _["type"] == "CommonName"][0]]["key"]
            _thisDOI = rangeMap["identifiers"][[i for i,_ in enumerate(rangeMap["identifiers"]) if _["type"] == "doi"][0]]["key"].split(":")[1]
            _thisGapCode = rangeMap["identifiers"][[i for i,_ in enumerate(rangeMap["identifiers"]) if _["type"] == "GAP_SpeciesCode"][0]]["key"]

            updateItem = {}
            updateItem["id"] = rangeMap["id"]
            updateItem["title"] = _thisCommonName+" ("+_thisScientificName+") Range Map"
            updateItem["citation"] = "U.S. Geological Survey - Gap Analysis Project, 2017, "+rangeMap["title"]+", https://doi.org/"+_thisDOI+"."

            linkToHabitatMap = {}
            linkToHabitatMap["type"] = "webLink"
            linkToHabitatMap["typeLabel"] = "Web Link"
            linkToHabitatMap["title"] = habitatMapGapToID[_thisGapCode]["title"]
            linkToHabitatMap["uri"] = "https://www.sciencebase.gov/catalog/item/"+habitatMapGapToID[_thisGapCode]["id"]
            updateItem["webLinks"] = []
            updateItem["webLinks"].append(linkToHabitatMap)

            hucDataLink = {"type": "webLink","typeLabel": "Web Link","uri": "https://doi.org/10.5066/F7DZ0754","title": "Source data for strHUC12RNG in species range"}
            updateItem["webLinks"].append(hucDataLink)

            sb.update_item(updateItem)
            rangeMapsDone.append(rangeMap["id"])
            count = count + 1
            print (updateItem["id"], count)
            time.sleep(0.15)
        except Exception as e:
            print (rangeMap["id"], e)
