# Get papers & journals by Scopus authors

This 

In [1]:
from scopus import ScopusSearch
import pandas as pd
import time
import requests
import xml.etree.ElementTree as ElementTree
import re

  from collections import Callable


In [36]:
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 500)

In [2]:
def getText(elem):
    ''' 
    Get the text from the XML response and return a string.
    '''
    try:
        msg = elem.text  
        msg = msg.replace('<br>', '')
    except:
        msg = ""

    if msg is None:
        msg = "not available"

    return msg

In [3]:
def searchOpenURL(row):
    '''(pandas.DataFrame) --> pandas.Series
    
    This function takes a row of a pandas DataFram and gets the ISSN of a journal. When used with the pandas apply function, this function uses the ISSNs to run a HTTP query against a library OpenURL link resolver server, retrieves the XML response, and parses out the package name and coverage dates. Using getText, this function returns two columns in a panda Series for each row: a statement of availability, and a statement of coverage (package names and the dates they cover).
    
    '''
    # Create and run an HTTP request against the open URL link resolver 
    r = requests.get('http://na01.alma.exlibrisgroup.com/view/uresolver/01UTON_UW/openurl?svc_dat=CTO&issn={}'.format(row['q_issn']))
    # Parse the XML response and store it as root
    root = ElementTree.fromstring(r.content)
    # Create a dict of namespace values for use later on, so that the queries of the stored XML response can be cleaner
    ns = {'resolver': 'http://com/exlibris/urm/uresolver/xmlbeans/u'}
    # Create an empty dict that will be used to store the coverage statements for each journal. Key will be the package name, value will be the coverage dates.
    coverage_statement = {}
    
    # get all full-text services
    
    # if there is a full-text service
    if root.findall('.//resolver:context_service[@service_type="getFullTxt"]',ns) != []:
        # set the availability statement to show that there is a full-text
        avail_statement = 'Full-text available'
        print('Full-text available for ' + row['q_issn'])
        # for each full-text service
        for service in root.findall('.//resolver:context_service[@service_type="getFullTxt"]',ns):
            # Create empty str variables to store the details of the full-text service
            servicePackageName = ''
            serviceCoverage = ''
            # get package name of the full-text service and add it to the temporary str variable
            package = service.find('.//resolver:key[@id="package_public_name"]',ns)
            servicePackageName = getText(package)
            
            # get coverage date statement of the full-text service and add it to the temptorary str variable
            avail = service.find('.//resolver:key[@id="Availability"]',ns)
            serviceCoverage = getText(avail)
            # Add the details of this full-text service to the dict
            coverage_statement[servicePackageName] = serviceCoverage
    # When there is no full-text service
    else:
        # set the availability statement to show that there is no full-text
        avail_statement = 'No full-text available'
        print('Full-text not available for ' + row['q_issn'])
    # Return the availability and coverage statements as a pandas Series
    return pd.Series([avail_statement, coverage_statement])

In [4]:
def coverageStatement_availParser(row):
    '''
    (pd.Series) -> pd.Series
    
    This function parses out info from the coverage statements for all packages, and updates the availability statements for the journals to reflect those journals that don't have full-text coverage, those that do up to the present, those with embargo and those with full-text access, but not to the present.
    
    '''
    # Create an empy str for the coverage statement value
    avail_statement = ''
    # Only do run this function if there are full-text resources
    if row['coverage'] != {}:
        # Create an empty variable that will change if the function should stop
        stop = 0
        # Check all coverage statements in the dict, and if any ONE of them doesn't contain the words 'most recent' or 'until' (i.e., its up to the current), set the availability statement to available to present and stop.
        for value in row['coverage'].values():
            # Skip values that don't contain any data
            if value != '':
                if not any(s in value for s in ('Most recent', 'until')):
                    avail_statement = 'Full-text available to present'
                    stop = 1
                    break
        # If there was no coverage statement where there was full-text to the present, continue
        if stop == 0:
            for value in row['coverage'].values():
                if value != '':
                    # If there is any ONE line coverage statement that is for an embargo
                    if 'Most recent' in value:
                        avail_statement = 'Full-text available with embargo'
                        stop = 1
                        break
        # If there is no statement up to the present, nor for an embargo, then it must be available, but not complete.
        if stop == 0:
            for value in row['coverage'].values():
                if value != '':
                    if 'until' in value:
                        avail_statement = 'Full-text available, but not complete'
    else:
        avail_statement = 'No full-text available'
    return pd.Series([avail_statement])

In [32]:
def coverageStatement_yearsParser(row):
    '''
    (pd.Series) -> pd.Series
    
    This function takes the coverage statements provided by the link resolver, parses out the dates, and creates a one line date range of coverage.
    '''
    
    tempList = []
    # Don't do this if there are no coverage statements
    if row['coverage'] != {}:
        # iterate over every statement in the dict 
        for value in row['coverage'].values():
            # Only do this if the coverage statement has data on the date coverage
            if 'Available from' in value:
                # For coverage statements to the present
                if not any(s in value for s in ('Most recent', 'until')):
                    tempList.append((re.search(r"Available from (\d{4})", value).group(1)) + ' - present')
                # For coverage statements with an embargo
                if 'Most recent' in value:
                    tempList.append((re.search(r"Available from (\d{4})", value).group(1))  + ' - ' + re.search(r'Most recent (.*?)\(s\)', value).group(1) + ' ago')
                # For coverage statements that aren't current to the present
                if 'until' in value:
                    tempList.append((re.search(r"Available from (\d{4})", value).group(1)) + ' - ' + (re.search(r"until (\d{4})", value).group(1)))
    return pd.Series([tempList])

In [29]:
# Start a timer
start_time = time.time()

# Import list of researchers as a dataframe
df_facultyMembers = pd.read_csv('Data/researchers.csv')

In [6]:
# Create an empty list for author Scopus IDs
authors = []

# Populate list with Scopus IDs from the facutly member dataframe
for scopusID in df_facultyMembers['scopusID']:
    authors.append(scopusID)

In [7]:
# Create the query of Scopus IDs that will be used in the API call
# Create an empty string
query = ''
# Add every Scopus ID to the string except for the last one, because the query command must not end with "OR"
for author in authors[:-1]:
    query = query + 'AU-ID(' + str(author) + ') OR '
# Add the last Scopus ID  
query = query + 'AU-ID(' + str(authors[-1]) + ')'

In [8]:
# Search the Scopus API for all publications by these authors, in one search
s = ScopusSearch('{}'.format(query), refresh=True, subscriber=True)

In [9]:
print("--- %s seconds ---" % (time.time() - start_time))

--- 61.955257177352905 seconds ---


In [10]:
# Convert API results into a dataframe
df = pd.DataFrame(pd.DataFrame(s.results))

In [11]:
# Convert appropriate data into ints
df["citedby_count"] = pd.to_numeric(df["citedby_count"])
df["openacess"] = pd.to_numeric(df["openaccess"])
df["author_count"] = pd.to_numeric(df["author_count"])

In [12]:
# Add number of times a journal has been published in
df['count'] = df.groupby('source_id')['source_id'].transform('count')

In [37]:
df

Unnamed: 0,eid,doi,pii,pubmed_id,title,subtype,creator,afid,affilname,affiliation_city,...,pageRange,description,authkeywords,citedby_count,openaccess,fund_acr,fund_no,fund_sponsor,openacess,count
0,2-s2.0-85064014829,10.1038/s41598-019-41955-y,,30948747,Area-dependent change of response in the rat’s inferior colliculus to intracochlear electrical stimulation following neonatal cochlear damage,ar,Hatano M.,60024784;60017592;60012468,Kanazawa University;Carleton University;University of Windsor,Kanazawa;Ottawa;Windsor,...,,"© 2019, The Author(s). To understand brain changes caused by auditory sensory deprivation, we recorded local-field potentials in the inferior colliculus of young adult rats with neonatal cochlear damage produced by systemic injections of amikacin. The responses were elicited by electrical stimulation of the entire cochlea and recorded at various locations along a dorsolateral-ventromedial axis of the inferior colliculus. We found that hair cells were completely destroyed and spiral ganglion neurons were severely damaged in the basal cochleae of amikacin-treated animals. Hair cells as well as spiral ganglion neurons were damaged also in the middle and apical areas of the cochlea, with the damage being greater in the middle than the apical area. Amplitudes of local-field potentials were reduced in the ventromedial inferior colliculus, but enhanced in the dorsolateral inferior colliculus. Latencies of responses were increased over the entire structure. The enhancement of responses in the dorsolateral inferior colliculus was in contrast with the damage of hair cells and spiral ganglion cells in the apical part of the cochlea. This contrast along with the overall increase of latencies suggests that early cochlear damage can alter neural mechanisms within the inferior colliculus and/or the inputs to this midbrain structure.",,0,1,,undefined,Natural Sciences and Engineering Research Council of Canada,1,2.0
1,2-s2.0-85065527826,10.1016/j.scitotenv.2019.04.311,S0048969719318467,,"Distinguishing point and non-point sources of dissolved nutrients, metals, and legacy contaminants in the Detroit River",ar,Maguire T.,60012468;60002306,University of Windsor;University of Calgary,Windsor;Calgary,...,1-8,"© 2019 Elsevier B.V. Water quality impacts to the Laurentian Great Lakes create bi-national issues that have been subject of investigation since the 1970s. However, distinguishing upgradient sources of nutrients, metals and legacy contaminants in rivers remains a challenge, as they are derived from multiple sources and flows typically vary throughout the region. These complications are especially pertinent in the Lake Huron to Lake Erie corridor and Detroit River. The Detroit River supplies 90% of the water to the western basin of Lake Erie (5300 m 3 /s)and is subject to a variety of co-occurring potential sources (e.g., agriculture, urbanization, and upgradient water bodies)of water quality indicators that limit source disaggregation. To find the source signal in the noise we used an integrative interpretation of dissolved chemical and isotopic parameters with sediment chemical, isotopic, and contaminant indicators. The approach combines archival data to distinguish point and non-point sources, and upgradient water bodies as sources of nutrients, metals and contaminants to the Detroit River and ultimately the western basin of Lake Erie. Persistent organic pollutants and metals cluster together as an urban group. Regional dissolved orthro-phosphate (PO 4 )in the water column also groups with urban point sources rather than agricultural sources. Urbanization as the primary source of PO 4 in the Detroit River highlights the need for continued research on urban impacts and assessments of broader best management practices protecting Lake Erie.",Detroit River | Dissolved nutrients | Laurentian Great Lakes | Non-point source | Point source,0,0,,GCXE16R166,,0,11.0
2,2-s2.0-85065401518,10.1016/j.ecoleng.2019.03.014,S0925857419301077,,Capacity for bioreactors and riparian rehabilitation to enhance nitrate attenuation in agricultural streams,ar,Goeller B.,60020585;60010259;60005686,University of Canterbury;ESR - Environmental Science and Research;University of Auckland,Christchurch;Porirua;Auckland,...,65-77,"© 2019 Elsevier B.V. Globally, small agricultural waterways fed by springs, tile drains, and seeps can disproportionately contribute to downstream nutrient loading, which is associated with declines in water quality and ecosystem functions. Treating nitrate using a multiple tool, multiple-scale approach in small waterways could offer improved management of these sources. We used a before-after-control-impact design to test the suitability of three small (<30 m 3 ) edge-of-field denitrifying woodchip bioreactors and stream bank re-shaping and riparian planting. Over three-and-a-half-years, riparian rehabilitation enhanced nitrate flux attenuation compared to pre-rehabilitation, but only under relatively low flow conditions. In comparison, there were no significant changes in nitrate flux in a control waterway under any flow condition. N fluxes always increased in both the control and treatment waterways when reaches gained water downstream. Nitrate removal efficiencies for all three bioreactors ranged from <10 to >99%, with performance variations due to short residence times and fluctuations in source water chemistry. A single tile drain bioreactor removed 0.41 kg NO 3 -N d −1 , equivalent to ∼10% of the mean daily tile drain nitrate load. Greenhouse gas fluxes from the tile drain bioreactor were similar to the surrounding pasture (CO 2 -C mean: 185–286 mg C m 2 h −1 ; N 2 O-N mean: 49–90 μg N m 2 h −1 ), suggesting no negative impacts from the bioreactor. Overall, our results suggest a multiple-tool, multiple-scale application of rehabilitation tools can reduce downstream N fluxes, but only under certain flow conditions. Thus, local rehabilitation tools, like those trialed here, will need to be scaled appropriately if they are to significantly attenuate nutrient losses from small agricultural waterways. Moreover, these will not replace catchment-scale nutrient plans to address losses from land and legacy groundwater N pollution.",Agricultural land-use | Denitrification | Denitrifying woodchip bioreactor | Greenhouse gas | Nitrate-nitrogen flux | Tile drainage,0,0,,undefined,,0,1.0
3,2-s2.0-85063386854,10.1016/j.scitotenv.2019.03.308,S0048969719313063,30928741,Springs drive downstream nitrate export from artificially-drained agricultural headwater catchments,ar,Goeller B.,60020585,University of Canterbury,Christchurch,...,119-128,"© 2019 Elsevier B.V. Excessive nutrient loading from small agricultural headwaters can substantially degrade downstream water quality and ecological conditions. But, our understanding of the scales and locations to implement nutrient attenuation tools within these catchments is poor. To help inform farm- and catchment-scale management, we quantified nitrate export in nine one-kilometre-long lowland agricultural headwaters fed by tile and open tributary drains in a region with high groundwater nitrate (<1 to >15 mg L −1 NO 3 -N) over four years. Across-catchment differences in upstream spring water nitrate concentrations predicted differences in annual nitrate loads at catchment outlets (range <1–72 megagrams NO 3 -N 365 d −1 ), and nitrate loads were higher in wet seasons and wet years, reflecting strong groundwater influences. Partitioning the sources of variability in catchment nitrate fluxes revealed that ~60% of variation was accounted for by a combination of fluxes from up-stream springs and contributions from tile and open tributary drains (46% and 15%, respectively), with ~40% of unexplained residual variation likely due to groundwater upwellings. Although tile and open tributary drains contributed comparatively less to catchment loads (tile drains: <0.01 and up to 50 kg NO 3 -N d −1 ; open drains: <5 kg and up to 100 kg NO 3 -N d −1 ), mitigation targeted at these localised, farm-scale sources will contribute to decreasing downstream nitrate fluxes. However, high nitrate loads from groundwater mean current NO 3 -N waterway management and rehabilitation practices targeting waterway stock exclusion by fencing alone will be insufficient to reduce annual NO 3 -N export. Moreover, managing catchment nutrient fluxes will need to acknowledge contributions from groundwater as well as farm-scale losses from land. Overall, our results highlight how nutrient fluxes in spring-fed waterways can be highly dynamic, dominated more by groundwater than local run-off, and point to the scales and locations where nitrate attenuation tools should be implemented.",Agricultural land-use | Groundwater | Headwater catchments | Nitrate‑nitrogen flux | Subsurface drainage,1,0,,undefined,,0,11.0
4,2-s2.0-85063729352,10.1016/j.jglr.2019.03.008,S0380133019300632,,Risk-based classification and interactive map of watersheds contributing anthropogenic stress to Laurentian Great Lakes coastal ecosystems,ar,Host G.,60012468;60009875,University of Windsor;University of Minnesota Duluth,Windsor;Duluth,...,609-618,"© 2019 The Authors We describe development anthropogenic stress indices for coastal margins of the Laurentian Great Lakes basin. Indices were derived based on the response of species assemblages to watershed-scale stress from agriculture and urbanization. Metrics were calculated for five groups of wetland biota: diatoms, wetland vegetation, aquatic invertebrates, fishes, and birds. Previously published community change points of these assemblages were used to classify each watershed as ‘least-disturbed’, ‘at-risk’, or ‘degraded’ based on community response to these stressors. The end products of this work are an on-line map utility and downloadable data that characterize the degree of agricultural land use and development in all watersheds of the US and Canadian Great Lakes basin. Discrepancies between the observed biological condition and putative anthropogenic stress can be used to determine if a site is more degraded than predicted based on watershed characteristics, or if remediation efforts are having beneficial impacts on site condition. This study provides a landscape-scale evaluation of wetland condition that is a critical first step for multi-scale assessments to help prioritize conservation or restoration efforts.",Ecological thresholds | Environmental indicators | Environmental stress | Great Lakes | Watershed,0,1,,GL-00E00623-0,,1,63.0
5,2-s2.0-85059859084,10.1016/j.ecolind.2019.01.010,S1470160X19300093,,Evidence for interactions among environmental stressors in the Laurentian Great Lakes,ar,Smith S.,60032179;60032083;60025778;60025659;60012468;60011347;60009841;60009408;60002709,"University of Wisconsin-Madison;University at Buffalo, State University of New York;University of Michigan, Ann Arbor;Grand Valley State University;University of Windsor;United States Geological Survey;Central Michigan University;Wayne State University;Delaware State University",Madison;Buffalo;Ann Arbor;Allendale;Windsor;Reston;Mount Pleasant;Detroit;Dover,...,203-211,"© 2019 Elsevier Ltd Co-occurrence of environmental stressors is ubiquitous in ecosystems, but cumulative effects are difficult to predict for effective indicator development. Individual stressors can amplify (synergies) or lessen (antagonisms) each other's impacts or have fully independent effects (additive). Here we use the Laurentian Great Lakes, where a multitude of stressors have been studied for decades, as a case study for considering insights from both a systematic literature review and an expert elicitation (or structured expert judgment) to identify stressor interactions. In our literature search for pairs of stressors and interaction-related keywords, relatively few studies (9%, or 6/65) supported additive interactions with independent stressor effects. Instead, both antagonisms (42%, or 27/65) and synergies (49%, or 32/65) were common. We found substantial evidence for interactions of invasive dreissenid mussels with nutrient loading and between pairs of invasive species (predominantly dreissenids × round goby), yet both sets of records included mixtures of synergies and antagonisms. Complete quantification of individual and joint effects of stressors was rare, but effect sizes for dreissenid mussels × nutrient loading supported an antagonism. Our expert elicitation included discussion in focus groups and a follow-up survey. This process highlighted the potential for synergies of nutrient loading with dreissenid mussels and climate change as seen from the literature review. The elicitation also identified additional potential interactions less explored in the literature, particularly synergies of nutrient loading with hypoxia and wetland loss. To stimulate future research, we built a conceptual model describing interactions among dreissenid mussels, climate change, and nutrient loading. Our case study illustrates the value of considering results from both elicitations and systematic reviews to overcome data limitations. The simultaneous occurrence of synergies and antagonisms in a single ecosystem underscores the challenge of predicting the cumulative effects of stressors to guide indicator development and other management and restoration decisions.",Cumulative impact | Data synthesis | Global change | Meta-analysis | Structured expert judgment | Systematic review,0,0,U-M,undefined,University of Michigan,0,2.0
6,2-s2.0-85062851819,10.1016/j.aquaculture.2019.03.008,S004484861732553X,,Inter-population differences in farmed Chinook salmon product quantity and quality,ar,Lajoie C.,60012468;100476248,University of Windsor;Yellow Island Aquaculture Ltd.,Windsor,...,23-29,"© 2019 Elsevier B.V. In British Columbia, Atlantic salmon (Salmo salar) are the top finfish aquaculture export of the province, although native Chinook salmon (Oncorhynchus tshawytscha) are also farmed locally. Few commercial facilities rear Chinook salmon, limiting the availability and development of their broodstocks, potentially reducing the ability to improve product quantity and quality. Due to the potential for inbreeding in these stocks, a need to determine whether product quantity and quality can be improved through outbreeding with wild populations exists. In this study, we examined the effects of outbreeding on farmed salmon by comparing product quantity and quality metrics in six experimental populations of outbred (wild × farmed) Chinook salmon and one farmed (control) population. Specifically, we measured fillet yield, slaughter yield, lipid content and flesh colour score in three-year old market-sized salmon immediately post-slaughter. We found significant differences across populations for slaughter yield, fillet yield and flesh colour score but found no differences across populations in lipid content. For flesh colour score, slaughter and fillet yield, the control farmed population performed similarly to the highest performing outbred populations. These results suggest that outbreeding inbred farmed populations with wild populations can maintain high product quality while adding new genes to a population.",Farmed | Multiple populations | Outbreeding | Quality | Wild,0,0,,240909549,Natural Sciences and Engineering Research Council of Canada,0,9.0
7,2-s2.0-85066327778,10.3390/w11050962,,,Long-term changes in the zooplankton community of Lake Maggiore in response to multiple stressors: A functional principal components analysis,ar,Arfè A.,60021796;60021199;60012468;60012306,Università Bocconi;Consiglio Nazionale delle Ricerche;University of Windsor;University of Milano - Bicocca,Milan;Rome;Windsor;Milan,...,,"© 2019 by the authors. We describe the long-term (1981-2008) dynamics of several physico-chemical and biological variables and how their changes may have influenced zooplankton structure in Lake Maggiore (Italy). Data was available for the 1981-1992 and 1995-2008 periods. Standardized time-series for temperature and total phosphorus (TP), chlorophyll-a, phytoplankton density (cel m-3), and cell size (μm3), as well as zooplankton structure (Copepoda, Cladocera, and Rotifera density, ind m-3) were smoothed using penalized B-splines and analyzed using Functional Principal Components (FPCs) to assess their dominant modes of variation. The first four FPCs explained 55% of 1981-1992 and 65% of 1995-2008 overall variation. Results showed that temperature fluctuated during the study period, particularly during 1988-1992 with a general tendency to increase. TP showed a declining trend with some reversions in the pattern observed in the years 1992, 1999, and 2000. Phytoplankton estimators and chlorophyll-a concentration showed a variable trend along the study period. Zooplankton groups also had a variable trend along the study period with a general increase in density of large carnivorous (mainly Bythotrephes longimanus) and a decrease of large herbivorous (mainly Daphnia), and a similar increase in the ratio of raptorial to microphagous rotifers. Our results suggest that the lake experienced a strong trophic change associated with oligotrophication, followed by pronounced climate-induced changes during the latter period. TP concentration was strongly associated with changes in abundance of some zooplankton taxa.",B-Splines smoothing | Functional Data Analysis | Limnology | Monitoring ecological dynamics | Oligotrophication | Phytoplankton | Zooplankton,0,1,,undefined,,1,2.0
8,2-s2.0-85064043149,10.1111/jfb.13958,,30868595,Resource partitioning between two young-of-year cownose rays Rhinoptera bonasus and R. brasiliensis within a communal nursery inferred by trophic biomarkers,ar,de Sousa Rangel B.,60012468;60008088;60006028,University of Windsor;Universidade de Sao Paulo - USP;UNESP-Universidade Estadual Paulista,Windsor;Sao Paulo;Sao Paulo,...,781-788,"© 2019 The Fisheries Society of the British Isles Although interspecific trophic interactions plays a principal role within elasmobranch communal nurseries, little is known over variation in foraging strategies adopted by young-of-year of sympatric species. To test the hypothesis of dietary resource partitioning between batoids within a communal nursery, we investigated two cownose ray species, Rhinoptera bonasus and R. brasiliensis, which occur in heterospecific groups, a strategy predicted to increase survival and foraging success. Using two biochemical tracers, fatty acids (FA) and stable isotopes (δ 15 N and δ 13 C), the combined effects of maternal investment and the formation of heterospecific groups implying competition for, or partitioning of available food resources were investigated. Through univariate and multivariate analyses of biochemical tracers in several tissues (fin clip, muscle, liver, red blood cells; RBC) and plasma, our results revealed significant interspecific differences in tracers between the two species. Total FAs (∑saturated FA, ∑monounsaturated FA and ∑polyunsaturated FA) and trophic biomarkers (i.e., docosahexaenoic acid, arachidonic acid, oleic acid and δ 15 N) were the principle tracers responsible for the differences detected. These data revealed that R. brasiliensis was less enriched in physiologically important essential FAs than R. bonasus. Our findings suggest that these congeneric species differ in maternal investment strategy and moderately partition food resources over relatively fine spatial scales within a single nursery habitat to limit competition. These results provide further knowledge on the foraging strategies adopted by batoids in communal nursery areas, information that is required for improving spatial conservation and management planning.",elasmobranch | fatty acids | life history strategy | maternal investment | stable isotopes | trophic ecology,0,0,,FAPESP 2014/16320–7,Coordenação de Aperfeiçoamento de Pessoal de Nível Superior,0,23.0
9,2-s2.0-85061345149,10.1016/j.chemosphere.2018.12.139,S0045653518324743,30721805,"Dioxins in Great Lakes fish: Past, present and implications for future monitoring",ar,Gandhi N.,60016849;60012468;60009800,University of Toronto;University of Windsor;Ontario Ministry of the Environment,Toronto;Windsor;Toronto,...,479-488,"© 2018 Dioxins/furans are considered among the most toxic anthropogenic chemicals, and are ubiquitous in the environment including in the North American Great Lakes, which contain one fifth of the world's surface freshwater. Our exposure to dioxins/furans is mainly through contaminated diet. Elevated levels of dioxins/furans in Great Lakes fish have resulted in issuance of fish consumption advisories. Here we examine spatial/temporal trends of dioxins/furans in the edible portion (fillet) of fish from the Canadian waters of the Great Lakes using the data collected by the Province of Ontario, Canada. Our analyses show that the Toxic Equivalent (TEQ) dioxin/furan concentrations declined between 1989 and 2013 in Lake Trout from Lakes Ontario, Huron and Superior by 91%, 78% and 73%, respectively, but increased in Lake Whitefish from Lake Erie by 138%. An expanded dataset created by combining our data with historical Lake Ontario Lake Trout measurements from the literature showed a greater decline of >96% (from 64 to 2.3 pg/g) between 1977 and 2013. Measurements collected for 30 types of fish show overall low levels but local/regional concerns at some locations in Lakes Huron, Erie and Ontario. Dioxins/furans are globally present in foodstuff and “zero concentration” target is considered impractical. Based on the observations for the Great Lakes in the context of risk to human health from eating fish, it is concluded that comprehensive monitoring of dioxins/furans can be replaced with targeted locations and/or indicator species, and the saved resources can be more efficiently utilized for monitoring of other priority or emerging contaminants.",Advisories | Dioxins/furans/dioxin like PCB | Fish consumption | Great lakes | Health risk | Spatial and temporal trends,0,0,,undefined,Government of Ontario,0,15.0


In [14]:
# Create a new dataframe of journals, with duplicates of journals removed
df_journals = df.drop_duplicates('source_id')

In [15]:
# Get rid of unnessary columns
df_journals = df_journals[['publicationName','source_id', 'issn', 'eIssn', 'aggregationType', 'count']]

In [16]:
# Sort the dataframe by publication count
df_journals = df_journals.sort_values(by=['count'], ascending=False)

In [17]:
# Reset the index count for this dataframe
df_journals = df_journals.reset_index(drop=True)

In [18]:
# delete all rows where aggregate type is not a journal
indexNames = df_journals[df_journals['aggregationType'] != 'Journal'].index
df_journals.drop(indexNames, inplace=True)

# delete all rows where there is no source ID
indexNames = df_journals[df_journals.source_id.isnull()].index
df_journals.drop(indexNames, inplace=True)

# delete all rows where there is no ISSN and eISSN
indexNames = df_journals[df_journals.issn.isnull() & df_journals.eIssn.isnull()].index
df_journals.drop(indexNames , inplace=True)

In [19]:
# Create a column with an ISSN for searching, and populate with the values for the eISSN
df_journals['q_issn'] = df_journals['eIssn']

In [20]:
# For those rows where there is no eISSN, replace them with the ISSN
df_journals.loc[df_journals['q_issn'].isnull(),'q_issn'] = df_journals['issn']

In [21]:
df_journals

Unnamed: 0,publicationName,source_id,issn,eIssn,aggregationType,count,q_issn
0,Journal of Great Lakes Research,17510,03801330,,Journal,63.0,03801330
1,Canadian Journal of Fisheries and Aquatic Scie...,12016,0706652X,12057533,Journal,57.0,12057533
2,Environmental Toxicology and Chemistry,25094,07307268,15528618,Journal,38.0,15528618
3,Animal Behaviour,24580,00033472,,Journal,29.0,00033472
4,Biological Invasions,13257,13873547,15731464,Journal,29.0,15731464
5,Environmental Science and Technology,21537,0013936X,15205851,Journal,26.0,15205851
6,Molecular Ecology,20297,09621083,1365294X,Journal,24.0,1365294X
7,Journal of Fish Biology,22560,00221112,10958649,Journal,23.0,10958649
8,Hydrobiologia,15168,00188158,15735117,Journal,23.0,15735117
9,PLoS ONE,10600153309,,19326203,Journal,22.0,19326203


In [22]:
# Search the Open URL link resolver to find the coverage for these journals
df_journals[['availability', 'coverage']] = df_journals.apply(searchOpenURL ,axis=1)

Full-text available for 03801330
Full-text available for 03801330
Full-text available for 12057533
Full-text available for 15528618
Full-text available for 00033472
Full-text available for 15731464
Full-text available for 15205851
Full-text available for 1365294X
Full-text available for 10958649
Full-text available for 15735117
Full-text available for 19326203
Full-text available for 14209101
Full-text available for 13652427
Full-text available for 14724642
Full-text available for 20457758
Full-text available for 1600048X
Full-text available for 14657279
Full-text available for 18791298
Full-text available for 14712954
Full-text available for 14390310
Full-text available for 14320800
Full-text available for 13652540
Full-text available for 03603199
Full-text available for 00298549
Full-text available for 19395590
Full-text available for 19395582
Full-text available for 01718630
Full-text available for 00084301
Full-text available for 15729737
Full-text available for 18791026
Full-text 

Full-text available for 00099120
Full-text available for 14320983
Full-text available for 0379864X
Full-text available for 03043770
Full-text available for 11758805
Full-text not available for 00770825
Full-text available for 14698714
Full-text available for 10970029
Full-text available for 01689525
Full-text available for 01427873
Full-text available for 20457758
Full-text available for 09258574
Full-text available for 14322048
Full-text not available for 00431370
Full-text available for 07302312
Full-text available for 15253961
Full-text available for 09218181
Full-text available for 09254773
Full-text available for 08926638
Full-text available for 14442906
Full-text available for 07349750
Full-text available for 09291903
Full-text available for 10728368
Full-text available for 00144827
Full-text available for 0006291X
Full-text available for 13861980
Full-text available for 10889051
Full-text available for 10689265
Full-text available for 07308000
Full-text not available for 2530064

In [25]:
# Update the availability statements based on the coverage dates (emabargo, not to the present)
df_journals[['availability']] = df_journals.apply(coverageStatement_availParser,axis=1)

In [33]:
# Create a column of just the date ranges for the journals
df_journals[['coverageRange']] = df_journals.apply(coverageStatement_yearsParser,axis=1)

In [34]:
df_journals.availability.value_counts()

Full-text available to present           327
Full-text available with embargo         24 
No full-text available                   19 
Full-text available, but not complete    19 
Name: availability, dtype: int64

In [38]:
df_journals

Unnamed: 0,publicationName,source_id,issn,eIssn,aggregationType,count,q_issn,availability,coverage,coverageRange
0,Journal of Great Lakes Research,17510,03801330,,Journal,63.0,03801330,Full-text available to present,"{'CRKN Elsevier ScienceDirect': 'Available from 1995 volume: 21 issue: 1.', 'Scholars Portal': 'Available from 1975 volume: 1 issue: 1.', 'BioOne Complete': 'Available from 2006 until 2010.'}","[1995 - present, 1975 - present, 2006 - 2010]"
1,Canadian Journal of Fisheries and Aquatic Sciences,12016,0706652X,12057533,Journal,57.0,12057533,Full-text available to present,"{'EBSCOhost Academic Search Complete': 'Available from 2001.Most recent 1 year(s) not available.', 'Gale Cengage Academic OneFile': 'Available from 2006.Most recent 1 year(s) not available.', 'Gale Cengage CPI.Q': 'Available from 2006.Most recent 1 year(s) not available.', 'Canadian Science Publishing (NRC Research Press) Current': 'Available from 1980 volume: 37 issue: 1.', 'Canadian Science Publishing (NRC Research Press) Back Issues': 'Available from 1980 volume: 37 issue: 1 until 1995 volume: 52 issue: 12.', 'Canadian Business & Current Affairs Database: Business (CBCA)': 'Available from 1998 until 2007.', 'Canadian Business & Current Affairs Database (CBCA)': 'Available from 1998 until 2007.', 'Canadian Business & Current Affairs Database: Reference (CBCA)': 'Available from 1998 until 2007.', 'SciTech Premium Collection': 'Available from 1998 until 2007.'}","[2001 - 1 year ago, 2006 - 1 year ago, 2006 - 1 year ago, 1980 - present, 1980 - 1995, 1998 - 2007, 1998 - 2007, 1998 - 2007, 1998 - 2007]"
2,Environmental Toxicology and Chemistry,25094,07307268,15528618,Journal,38.0,15528618,Full-text available to present,"{'CRKN Wiley Online Library': 'Available from 1997 volume: 16 issue: 1.', 'Scholars Portal': 'Available from 1995 volume: 14 issue: 1.', 'SciTech Premium Collection': 'Available from 2005 until 2009.'}","[1997 - present, 1995 - present, 2005 - 2009]"
3,Animal Behaviour,24580,00033472,,Journal,29.0,00033472,Full-text available to present,"{'Elsevier SD Freedom Collection': 'Available from 1993 volume: 45 issue: 1.', 'CRKN Elsevier ScienceDirect': 'Available from 1993 volume: 45 issue: 1.', 'Scholars Portal': 'Available from 1993 volume: 45 issue: 1.'}","[1993 - present, 1993 - present, 1993 - present]"
4,Biological Invasions,13257,13873547,15731464,Journal,29.0,15731464,Full-text available to present,"{'Canadian Research Knowledge Network SpringerLink Current': 'Available from 1999 volume: 1 issue: 1.', 'Scholars Portal': 'Available from 1999 volume: 1 issue: 1.', 'SciTech Premium Collection': 'Available from 1999.Most recent 1 year(s) not available.'}","[1999 - present, 1999 - present, 1999 - 1 year ago]"
5,Environmental Science and Technology,21537,0013936X,15205851,Journal,26.0,15205851,Full-text available to present,"{'American Chemical Society Legacy Archive': 'Available from 1967 volume: 1 issue: 1.', 'CRKN American Chemical Society Journals': 'Available from 1995 volume: 30 issue: 1.'}","[1967 - present, 1995 - present]"
6,Molecular Ecology,20297,09621083,1365294X,Journal,24.0,1365294X,Full-text available to present,"{'CRKN Wiley Online Library': 'Available from 1992 volume: 1 issue: 1.', 'Scholars Portal': 'Available from 1992 volume: 1 issue: 1.'}","[1992 - present, 1992 - present]"
7,Journal of Fish Biology,22560,00221112,10958649,Journal,23.0,10958649,Full-text available to present,"{'CRKN Wiley Online Library': 'Available from 1969 volume: 1 issue: 1.', 'Scholars Portal': 'Available from 1969 volume: 1 issue: 1.'}","[1969 - present, 1969 - present]"
8,Hydrobiologia,15168,00188158,15735117,Journal,23.0,15735117,Full-text available to present,"{'Canadian Research Knowledge Network SpringerLink Current': 'Available from 1997 volume: 342 issue: 1.', 'EBSCOhost Academic Search Complete': 'Available from 2003.Most recent 1 year(s) not available.', 'Gale Cengage Academic OneFile': 'Available from 2010 until 2011.Available from 2013.Most recent 1 year(s) not available.', 'SpringerLink Historical Archives Biomedical and Life Sciences': 'Available from 1948 volume: 1 until 1996 volume: 341.', 'SpringerLink Historical Archives Earth and Environmental Sciences': 'Available from 1948 volume: 1 until 1996 volume: 341.', 'Scholars Portal': 'Available from 1948 volume: 1 issue: 1.', 'SciTech Premium Collection': 'Available from 1997.Most recent 1 year(s) not available.'}","[1997 - present, 2003 - 1 year ago, 2010 - 1 year ago, 2010 - 2011, 1948 - 1996, 1948 - 1996, 1948 - present, 1997 - 1 year ago]"
9,PLoS ONE,10600153309,,19326203,Journal,22.0,19326203,Full-text available to present,"{'DOAJ Directory of Open Access Journals': 'Available from 2006.', 'EBSCOhost Academic Search Complete': 'Available from 2008.', 'PubMed Central': 'Available from 2006 volume: 1.', 'Gale Cengage Academic OneFile': 'Available from 2006.', 'SciTech Premium Collection': 'Available from 2006.', 'Nursing & Allied Health Database': 'Available from 2006.'}","[2006 - present, 2008 - present, 2006 - present, 2006 - present, 2006 - present, 2006 - present]"


In [42]:
df_journals[df_journals['availability'] == 'No full-text available']

Unnamed: 0,publicationName,source_id,issn,eIssn,aggregationType,count,q_issn,availability,coverage,coverageRange
108,Marine and Freshwater Research,27846,13231650,,Journal,3.0,13231650,No full-text available,{},[]
128,Archiv fur Hydrobiologie,13046,00039136,,Journal,3.0,00039136,No full-text available,{},[]
156,Fundamental and Applied Limnology,5400152636,18639135,,Journal,2.0,18639135,No full-text available,{},[]
166,Emu,21574,01584197,,Journal,2.0,01584197,No full-text available,{},[]
172,British Phycological Journal,67985,00071617,,Journal,2.0,00071617,No full-text available,{},[]
175,Zootaxa,4700151916,11755326,11755334,Journal,2.0,11755334,No full-text available,{},[]
207,Oceanological Studies,66993,1505232X,,Journal,1.0,1505232X,No full-text available,{},[]
218,Progress in cell cycle research,19022,10872957,,Journal,1.0,10872957,No full-text available,{},[]
221,American Fisheries Society Symposium,29410,08922284,,Journal,1.0,08922284,No full-text available,{},[]
235,Canadian Acoustics - Acoustique Canadienne,12971,07116659,,Journal,1.0,07116659,No full-text available,{},[]


In [44]:
df_journals[df_journals['availability'] == 'Full-text available with embargo']

Unnamed: 0,publicationName,source_id,issn,eIssn,aggregationType,count,q_issn,availability,coverage,coverageRange
17,Proceedings of the Royal Society B: Biological Sciences,130030,9628452,14712954,Journal,13.0,14712954,Full-text available with embargo,"{'Highwire Press Free': 'Available from 1905 volume: 76 issue: 507 until 1947 volume: 135 issue: 878.', 'JSTOR Life Sciences Collection': 'Available from 1990 volume: 241 issue: 1300.Most recent 4 year(s) not available.', 'PubMed Central': 'Available from 1997 volume: 264.Most recent 1 year(s) not available.'}","[1905 - 1947, 1990 - 4 year ago, 1997 - 1 year ago]"
34,Journal of Experimental Biology,29605,220949,,Journal,9.0,00220949,Full-text available with embargo,{'Highwire Press Company of Biologists': 'Available from 1923 volume: 1 issue: 1.Most recent 6 month(s) not available.'},[1923 - 6 month ago]
50,Physiological and Biochemical Zoology,23318,15222152,,Journal,7.0,15222152,Full-text available with embargo,"{'EBSCOhost Academic Search Complete': 'Available from 1999.Most recent 1 year(s) not available.', 'JSTOR Life Sciences Collection': 'Available from 1999 volume: 72 issue: 1.Most recent 4 year(s) not available.'}","[1999 - 1 year ago, 1999 - 4 year ago]"
51,American Naturalist,12604,30147,,Journal,7.0,00030147,Full-text available with embargo,{'JSTOR Life Sciences Collection': 'Available from 1867 volume: 1 issue: 1.Most recent 4 year(s) not available.'},[1867 - 4 year ago]
62,Journal of Neuroscience,16764,2706474,15292401,Journal,5.0,15292401,Full-text available with embargo,"{'Highwire Press Free': 'Available from 1981.Most recent 6 month(s) not available.', 'Free E- Journals': 'Available from 1981 volume: 1 issue: 1.Most recent 6 month(s) not available.', 'PubMed Central': 'Available from 1997 volume: 17.Most recent 6 month(s) not available.'}","[1981 - 6 month ago, 1981 - 6 month ago, 1997 - 6 month ago]"
76,Biology Letters,145678,17449561,1744957X,Journal,4.0,1744957X,Full-text available with embargo,{'PubMed Central': 'Available from 2005 volume: 1.Most recent 1 year(s) not available.'},[2005 - 1 year ago]
87,Journal of Cell Biology,18555,219525,15408140,Journal,4.0,15408140,Full-text available with embargo,"{'Highwire Press Free': 'Available from 1955 volume: 1 issue: 1.Most recent 6 month(s) not available.', 'PubMed Central': 'Available from 1962 volume: 12.Most recent 6 month(s) not available.'}","[1955 - 6 month ago, 1962 - 6 month ago]"
94,Freshwater Science,21100297824,21619549,21619565,Journal,3.0,21619565,Full-text available with embargo,"{'EBSCOhost Academic Search Complete': 'Available from 2013.Most recent 1 year(s) not available.', 'Scholars Portal': 'Available from 2012 volume: 31 issue: 1 until 2014 volume: 33 issue: 2.', 'JSTOR Life Sciences Collection': 'Available from 2012 volume: 31 issue: 1.Most recent 4 year(s) not available.', 'BioOne Complete': 'Available from 2012 until 2014.'}","[2013 - 1 year ago, 2012 - 2014, 2012 - 4 year ago, 2012 - 2014]"
149,Plant Physiology,16615,320889,,Journal,2.0,00320889,Full-text available with embargo,"{'Highwire Press Free': 'Available from 1926.Most recent 1 year(s) not available.', 'JSTOR Life Sciences Collection': 'Available from 1926 volume: 1 issue: 1.Most recent 3 year(s) not available.', 'PubMed Central': 'Available from 1926 volume: 1.Most recent 1 year(s) not available.', 'JSTOR Ecology & Botany II': 'Available from 1926 volume: 1 issue: 1.Most recent 3 year(s) not available.', 'SciTech Premium Collection': 'Available from 1998 until 2012.'}","[1926 - 1 year ago, 1926 - 3 year ago, 1926 - 1 year ago, 1926 - 3 year ago, 1998 - 2012]"
151,Cancer Research,29183,85472,,Journal,2.0,00085472,Full-text available with embargo,{'Highwire Press Free': 'Available from 1941 volume: 1 issue: 1.Most recent 1 year(s) not available.'},[1941 - 1 year ago]


In [45]:
df_journals[df_journals['availability'] == 'Full-text available, but not complete']

Unnamed: 0,publicationName,source_id,issn,eIssn,aggregationType,count,q_issn,availability,coverage,coverageRange
25,Marine Ecology Progress Series,12169,01718630,,Journal,12.0,01718630,"Full-text available, but not complete",{'Free E- Journals': 'Available from 1979 volume: 1 until 2011 volume: 448.'},[1979 - 2011]
59,Ornitologia Neotropical,4700152417,10754377,,Journal,5.0,10754377,"Full-text available, but not complete",{'SORA Searchable Ornithological Research Archive': 'Available from 1990 volume: 1 issue: 1 until 2014 volume: 25 issue: 4.'},[1990 - 2014]
61,Molecular Ecology Notes,20299,14718278,14718286.0,Journal,5.0,14718286,"Full-text available, but not complete","{'CRKN Wiley Online Library': 'Available from 2001 volume: 1 issue: 3 until 2007 volume: 7 issue: 6.', 'Scholars Portal': 'Available from 2001 volume: 1 issue: 1 until 2008 volume: 8 issue: 2.'}","[2001 - 2007, 2001 - 2008]"
144,Journal of Neurocytology,18573,03004864,,Journal,2.0,03004864,"Full-text available, but not complete","{'Scholars Portal': 'Available from 1972 volume: 1 issue: 1 until 2005 volume: 34 issue: 6.', 'Canadian Research Knowledge Network SpringerLink Current': 'Available from 1997 volume: 26 issue: 1 until 2005.'}","[1972 - 2005, 1997 - 2005]"
159,Developmental Genetics,38446,0192253X,,Journal,2.0,0192253X,"Full-text available, but not complete","{'CRKN Wiley Online Library': 'Available from 1996 volume: 18 issue: 1 until 1999 volume: 25 issue: 4.', 'Scholars Portal': 'Available from 1995 volume: 16 issue: 1 until 1999 volume: 25 issue: 4.'}","[1996 - 1999, 1995 - 1999]"
160,"Brain, Behavior and Evolution",14323,00068977,,Journal,2.0,00068977,"Full-text available, but not complete",{'SciTech Premium Collection': 'Available from 1998 until 2015.'},[1998 - 2015]
161,Developmental Brain Research,15034,01653806,,Journal,2.0,01653806,"Full-text available, but not complete","{'Elsevier SD Freedom Collection': '', 'CRKN Elsevier ScienceDirect': 'Available from 1995 volume: 84 issue: 1 until 2005 volume: 160 issue: 2.', 'Scholars Portal': 'Available from 1981 volume: 1 issue: 1 until 2005 volume: 160 issue: 2.'}","[1995 - 2005, 1981 - 2005]"
167,Journal of the North American Benthological Society,12904,08873593,,Journal,2.0,08873593,"Full-text available, but not complete","{'EBSCOhost Academic Search Complete': 'Available from 2011 until 2011.', 'JSTOR Life Sciences Collection': 'Available from 1986 volume: 5 issue: 1 until 2011 volume: 30 issue: 4.', 'Scholars Portal': 'Available from 1986 volume: 5 issue: 1 until 2014 volume: 33 issue: 4.', 'BioOne Complete': 'Available from 2004 until 2011.'}","[2011 - 2011, 1986 - 2011, 1986 - 2014, 2004 - 2011]"
222,Journal of Experimental Zoology Part B: Molecular and Developmental Evolution,22556,0022104X,,Journal,1.0,0022104X,"Full-text available, but not complete","{'Scholars Portal': 'Available from 1995 volume: 271 issue: 1 until 2006 volume: 306 issue: 6.', 'CRKN Wiley Online Library': 'Available from 1996 volume: 274 issue: 1 until 2002 volume: 294 issue: 4.'}","[1995 - 2006, 1996 - 2002]"
253,Mutation Research - Environmental Mutagenesis and Related Subjects,31756,01651161,,Journal,1.0,01651161,"Full-text available, but not complete","{'Elsevier SD Freedom Collection': '', 'CRKN Elsevier ScienceDirect': 'Available from 1995 volume: 334 issue: 1 until 1996 volume: 361 issue: 3.', 'Scholars Portal': 'Available from 1995 volume: 334 issue: 1 until 1996 volume: 361 issue: 2.'}","[1995 - 1996, 1995 - 1996]"


In [47]:
df_journals.to_csv('Results/journals.csv')