In [1]:
import requests,io,configparser
from IPython.display import display
import pandas as pd

In [2]:
# Get API keys and any other config details from a file that is external to the code.
config = configparser.RawConfigParser()
config.read_file(open(r'../config/stuff.py'))

In [3]:
# Build base URL with API key using input from the external config.
def getBaseURL():
    gc2APIKey = config.get('apiKeys','apiKey_GC2_BCB').replace('"','')
    apiBaseURL = "https://gc2.mapcentia.com/api/v1/sql/bcb?key="+gc2APIKey
    return apiBaseURL

This notebook repeats the same basic process I set up for the 2005 states. I made a run at processing the original Excel files directly, but there was so much variability in those, that the exceptions would have been a mess. Thanks to Abby for having already processed those into individual text files that are nicely standardized.

The 2015 lists introduced a new concept where we asked the states whether or not the given species was included on their 2005 list. I translated this into a property that I called "firstyear." I used the range of "no" values that the states used in their information to make firstyear = true. This is essentially what we are after here, a way to determine whether a species is showing up for a state the first time. Once we have all the data in the system, we can run a couple of interesting queries using these assertions:

* For firstyear = false, we can check to see if we find that species in the 2005 list for the state. If not, we might want to investigate to see if we did something wrong in getting data out of the SWAP reports (whoever did that work way back when).
* For firstyear = true, we can validate to make sure the species was not listed for 2005. We can then provide a report showing both those species that are explicitly new for a state and those that are new in our data. The former will have a greater certainty.

In [16]:
# Query ScienceBase for the 2015 states, returning the files structure along with tags (where we get state name)
sbQ = "https://www.sciencebase.gov/catalog/items?q=2015&parentId=56d720ece4b015c306f442d5&format=json&fields=files,tags&max=100"
sbR = requests.get(sbQ).json()

In [40]:
totalRecords = 0
sgcn_year = 2015

for item in sbR['items']:
    sgcn_state = item['tags'][0]['name']
    sourceid = "https://www.sciencebase.gov/catalog/item/"+item['id']
    for file in item['files']:
        if file['name'][-9:] == '_2015.txt':
            stateList = requests.get(file['url']).content
            try:
                stateListPD = pd.read_csv(io.StringIO(stateList.decode('utf-8')))
            except:
                pass

            try:
                stateListPD = pd.read_csv(io.StringIO(stateList.decode('utf-8')), sep='\t')
            except:
                pass

            try:
                stateListPD = pd.read_csv(io.StringIO(stateList.decode('iso-8859-1')), sep='\t')
            except:
                pass

    for ir in stateListPD.itertuples():
        if type(ir[1]) is float:
            scientificname_submitted = ""
        else:
            scientificname_submitted = ir[1].replace("'","''")
        
        if type(ir[2]) is float:
            commonname_submitted = ""
        else:
            commonname_submitted = ir[2].replace("'","''")
        
        taxonomicgroup_submitted = ir[3]
        
        if ir[4] in ["N","n","No","no"]:
            firstyear = True
        else:
            firstyear = False
        
        q = "INSERT INTO sgcn.sgcn \
            (sourceid,sgcn_year,sgcn_state,scientificname_submitted,commonname_submitted,taxonomicgroup_submitted,firstyear) \
            VALUES ('"+sourceid+"',"+str(sgcn_year)+",'"+sgcn_state+"','"+scientificname_submitted+"','"+commonname_submitted+"','"+taxonomicgroup_submitted+"',"+str(firstyear)+")"
        r = requests.get(getBaseURL()+"&q="+q).json()
        display (r)
        totalRecords = totalRecords+1

print ("Total Records Processed: "+str(totalRecords))            

INSERT INTO sgcn.sgcn             (sourceid,sgcn_year,sgcn_state,scientificname_submitted,commonname_submitted,taxonomicgroup_submitted,firstyear)             VALUES ('https://www.sciencebase.gov/catalog/item/56d725c7e4b015c306f4580a',2015,'Pennsylvania','Cryptobranchus alleganiensis alleganiensis','Eastern Hellbender','Amphibians',False)


{'_execution_time': 0.066,
 'affected_rows': 1,
 'auth_check': {'auth_level': None, 'session': None, 'success': True},
 'success': True}

INSERT INTO sgcn.sgcn             (sourceid,sgcn_year,sgcn_state,scientificname_submitted,commonname_submitted,taxonomicgroup_submitted,firstyear)             VALUES ('https://www.sciencebase.gov/catalog/item/56d725c7e4b015c306f4580a',2015,'Pennsylvania','Necturus maculosus','Mudpuppy','Amphibians',True)


{'_execution_time': 0.059,
 'affected_rows': 1,
 'auth_check': {'auth_level': None, 'session': None, 'success': True},
 'success': True}

INSERT INTO sgcn.sgcn             (sourceid,sgcn_year,sgcn_state,scientificname_submitted,commonname_submitted,taxonomicgroup_submitted,firstyear)             VALUES ('https://www.sciencebase.gov/catalog/item/56d725c7e4b015c306f4580a',2015,'Pennsylvania','Ambystoma jeffersonianum','Jefferson Salamander','Amphibians',False)


{'_execution_time': 0.06,
 'affected_rows': 1,
 'auth_check': {'auth_level': None, 'session': None, 'success': True},
 'success': True}

INSERT INTO sgcn.sgcn             (sourceid,sgcn_year,sgcn_state,scientificname_submitted,commonname_submitted,taxonomicgroup_submitted,firstyear)             VALUES ('https://www.sciencebase.gov/catalog/item/56d725c7e4b015c306f4580a',2015,'Pennsylvania','Ambystoma laterale','Blue-spotted Salamander','Amphibians',True)


{'_execution_time': 0.062,
 'affected_rows': 1,
 'auth_check': {'auth_level': None, 'session': None, 'success': True},
 'success': True}

INSERT INTO sgcn.sgcn             (sourceid,sgcn_year,sgcn_state,scientificname_submitted,commonname_submitted,taxonomicgroup_submitted,firstyear)             VALUES ('https://www.sciencebase.gov/catalog/item/56d725c7e4b015c306f4580a',2015,'Pennsylvania','Ambystoma opacum','Marbled Salamander','Amphibians',False)


{'_execution_time': 0.065,
 'affected_rows': 1,
 'auth_check': {'auth_level': None, 'session': None, 'success': True},
 'success': True}

Total Records Processed: 5
