In [1]:
#return unavailable message if element has no text
def getText(elem):
    try:
        msg = elem.text  
        msg = msg.replace('<br>', '')
    except:
        msg = ""

    if msg is None:
        msg = "not available"

    return msg


def searchOpenURL(row):
    '''(pandas.DataFrame) --> pandas.Series
    
    This function takes a row of a pandas DataFram and gets the ISSN of a journal. When used with the pandas apply function, this function uses the ISSNs to run a HTTP query against a library OpenURL link resolver server, retrieves the XML response, and parses out the package name and coverage dates. Using getText, this function returns two columns in a panda Series for each row: a statement of availability, and a statement of coverage (package names and the dates they cover).
    
    '''
    # Create and run an HTTP request against the open URL link resolver 
    r = requests.get('http://na01.alma.exlibrisgroup.com/view/uresolver/01UTON_UW/openurl?svc_dat=CTO&issn={}'.format(row['q_issn']))
    # Parse the XML response and store it as root
    root = ElementTree.fromstring(r.content)
    # Create a dict of namespace values for use later on, so that the queries of the stored XML response can be cleaner
    ns = {'resolver': 'http://com/exlibris/urm/uresolver/xmlbeans/u'}
    # Create an empty dict that will be used to store the coverage statements for each journal. Key will be the package name, value will be the coverage dates.
    coverage_statement = {}
    
    # get all full-text services
    
    # if there is a full-text service
    if root.findall('.//resolver:context_service[@service_type="getFullTxt"]',ns) != []:
        # set the availability statement to show that there is a full-text
        avail_statement = 'Full-text available'
        print('Full-text available for ' + row['q_issn'])
        # for each full-text service
        for service in root.findall('.//resolver:context_service[@service_type="getFullTxt"]',ns):
            # Create empty str variables to store the details of the full-text service
            servicePackageName = ''
            serviceCoverage = ''
            # get package name of the full-text service and add it to the temporary str variable
            package = service.find('.//resolver:key[@id="package_public_name"]',ns)
            servicePackageName = getText(package)
            
            # get coverage date statement of the full-text service and add it to the temptorary str variable
            avail = service.find('.//resolver:key[@id="Availability"]',ns)
            serviceCoverage = getText(avail)
            # Add the details of this full-text service to the dict
            coverage_statement[servicePackageName] = serviceCoverage
    # When there is no full-text service
    else:
        # set the availability statement to show that there is no full-text
        avail_statement = 'No full-text available'
        print('Full-text not available for ' + row['q_issn'])
    # Return the availability and coverage statements as a pandas Series
    return pd.Series([avail_statement, coverage_statement])

In [2]:
def coverageStatement_availParser(row):
    '''
    (pd.Series) -> pd.Series
    
    This function parses out info from the coverage statements for all packages, and updates the availability statements for the journals to reflect those journals that don't have full-text coverage, those that do up to the present, those with embargo and those with full-text access, but not to the present.
    
    '''
    # Create an empy str for the coverage statement value
    avail_statement = ''
    # Only do run this function if there are full-text resources
    if row['coverage'] != {}:
        # Create an empty variable that will change if the function should stop
        stop = 0
        # Check all coverage statements in the dict, and if any ONE of them doesn't contain the words 'most recent' or 'until' (i.e., its up to the current), set the availability statement to available to present and stop.
        for value in row['coverage'].values():
            # Skip values that don't contain any data
            if value != '':
                if not any(s in value for s in ('Most recent', 'until')):
                    avail_statement = 'Full-text available to present'
                    stop = 1
                    break
        # If there was no coverage statement where there was full-text to the present, continue
        if stop == 0:
            for value in row['coverage'].values():
                if value != '':
                    # If there is any ONE line coverage statement that is for an embargo
                    if 'Most recent' in value:
                        avail_statement = 'Full-text available with embargo'
                        stop = 1
                        break
        # If there is no statement up to the present, nor for an embargo, then it must be available, but not complete.
        if stop == 0:
            for value in row['coverage'].values():
                if value != '':
                    if 'until' in value:
                        avail_statement = 'Full-text available, but not complete'
    else:
        avail_statement = 'No full-text available'
    return pd.Series([avail_statement])

In [3]:
import pandas as pd
import requests
import xml.etree.ElementTree as ElementTree
import re

In [45]:
df_rank = pd.read_csv('Data/sjrOrganicChemistry.csv', sep=';')

In [46]:
df_rank = df_rank[:100]

In [47]:
df_rank

Unnamed: 0,Rank,Sourceid,Title,Type,Issn,SJR,SJR Quartile,H index,Total Docs. (2018),Total Docs. (3years),Total Refs.,Total Cites (3years),Citable Docs. (3years),Cites / Doc. (2years),Ref. / Doc.,Country,Publisher,Coverage,Categories
0,1,26465,Progress in Polymer Science,journal,00796700,6365,Q1,244,61,148,16711,3587,145,2452,27395,Netherlands,Elsevier BV,"1967, 1970-1971, 1975, 1977-1978, 1980-1986, 1...",Ceramics and Composites (Q1); Materials Chemis...
1,2,26371,Natural Product Reports,journal,"02650568, 14604752",3552,Q1,157,76,221,9913,2250,209,1070,13043,United Kingdom,Royal Society of Chemistry,1984-ongoing,Biochemistry (Q1); Drug Discovery (Q1); Organi...
2,3,26968,Journal of Photochemistry and Photobiology C: ...,journal,13895567,2943,Q1,80,23,63,3884,848,61,998,16887,Netherlands,Elsevier BV,2000-ongoing,Catalysis (Q1); Organic Chemistry (Q1); Physic...
3,4,26396,Organic Letters,journal,"15237052, 15237060",2441,Q1,220,1912,4978,90733,31118,4860,672,4745,United States,American Chemical Society,1999-ongoing,Biochemistry (Q1); Organic Chemistry (Q1); Phy...
4,5,21100779404,Macromolecules,journal,"00249297, 15205835",2243,Q1,288,1069,2956,57548,17291,2919,600,5383,United States,American Chemical Society,1968-ongoing,Inorganic Chemistry (Q1); Materials Chemistry ...
5,6,21100209317,ACS Macro Letters,journal,21611653,2201,Q1,71,269,845,10816,4899,838,574,4021,United States,American Chemical Society,2012-ongoing,Inorganic Chemistry (Q1); Materials Chemistry ...
6,7,21100242403,Redox Biology,journal,22132317,2166,Q1,57,281,538,17565,4477,523,777,6251,Netherlands,Elsevier BV,2013-ongoing,Biochemistry (Q1); Clinical Biochemistry (Q1);...
7,8,23392,Chemistry - A European Journal,journal,"15213765, 09476539",1842,Q1,220,2530,7089,160128,33600,6845,503,6329,United Kingdom,John Wiley & Sons Ltd.,1995-ongoing,Catalysis (Q1); Chemistry (miscellaneous) (Q1)...
8,9,22669,Advanced Synthesis and Catalysis,journal,16154150,1817,Q1,140,601,1502,43896,7772,1464,541,7304,United Kingdom,John Wiley & Sons Ltd.,1992-ongoing,Catalysis (Q1); Organic Chemistry (Q1)
9,10,21100778649,Proceedings - 2016 IEEE Symposium on Security ...,conference and proceedings,00002016,1777,-,18,0,58,0,737,55,1340,000,United States,,2016,Organic Chemistry


In [48]:
df_rank['q_issn'] = df_rank['Issn'].str.slice(0, 8)

In [50]:
df_rank.isnull().sum()

Rank                      0
Sourceid                  0
Title                     0
Type                      0
Issn                      0
SJR                       0
SJR Quartile              0
H index                   0
Total Docs. (2018)        0
Total Docs. (3years)      0
Total Refs.               0
Total Cites (3years)      0
Citable Docs. (3years)    0
Cites / Doc. (2years)     0
Ref. / Doc.               0
Country                   0
Publisher                 1
Coverage                  0
Categories                0
q_issn                    0
dtype: int64

In [51]:
df_rank[['availability', 'coverage']] = df_rank.apply(searchOpenURL ,axis=1)

Full-text available for 00796700
Full-text available for 00796700
Full-text available for 02650568
Full-text available for 13895567
Full-text available for 15237052
Full-text available for 00249297
Full-text available for 21611653
Full-text available for 22132317
Full-text available for 15213765
Full-text available for 16154150
Full-text not available for 00002016
Full-text available for 20524110
Full-text available for 00162361
Full-text available for 00025100
Full-text available for 17599954
Full-text available for 15206904
Full-text available for 15204812
Full-text available for 15206041
Full-text available for 18673880
Full-text not available for 00653160
Full-text available for 01448617
Full-text available for 14220067
Full-text available for 10221336
Full-text available for 01633864
Full-text available for 1520586X
Full-text available for 02235234
Full-text available for 19485875
Full-text available for 00219673
Full-text available for 14394227
Full-text available for 18614728
Fu

In [52]:
df_rank

Unnamed: 0,Rank,Sourceid,Title,Type,Issn,SJR,SJR Quartile,H index,Total Docs. (2018),Total Docs. (3years),...,Citable Docs. (3years),Cites / Doc. (2years),Ref. / Doc.,Country,Publisher,Coverage,Categories,q_issn,availability,coverage
0,1,26465,Progress in Polymer Science,journal,00796700,6365,Q1,244,61,148,...,145,2452,27395,Netherlands,Elsevier BV,"1967, 1970-1971, 1975, 1977-1978, 1980-1986, 1...",Ceramics and Composites (Q1); Materials Chemis...,00796700,Full-text available,"{'Elsevier SD Freedom Collection': '', 'CRKN E..."
1,2,26371,Natural Product Reports,journal,"02650568, 14604752",3552,Q1,157,76,221,...,209,1070,13043,United Kingdom,Royal Society of Chemistry,1984-ongoing,Biochemistry (Q1); Drug Discovery (Q1); Organi...,02650568,Full-text available,{'Free E- Journals': 'Most recent 3 year(s) no...
2,3,26968,Journal of Photochemistry and Photobiology C: ...,journal,13895567,2943,Q1,80,23,63,...,61,998,16887,Netherlands,Elsevier BV,2000-ongoing,Catalysis (Q1); Organic Chemistry (Q1); Physic...,13895567,Full-text available,"{'Elsevier SD Freedom Collection': '', 'CRKN E..."
3,4,26396,Organic Letters,journal,"15237052, 15237060",2441,Q1,220,1912,4978,...,4860,672,4745,United States,American Chemical Society,1999-ongoing,Biochemistry (Q1); Organic Chemistry (Q1); Phy...,15237052,Full-text available,{'CRKN American Chemical Society Journals': 'A...
4,5,21100779404,Macromolecules,journal,"00249297, 15205835",2243,Q1,288,1069,2956,...,2919,600,5383,United States,American Chemical Society,1968-ongoing,Inorganic Chemistry (Q1); Materials Chemistry ...,00249297,Full-text available,{'American Chemical Society Legacy Archive': '...
5,6,21100209317,ACS Macro Letters,journal,21611653,2201,Q1,71,269,845,...,838,574,4021,United States,American Chemical Society,2012-ongoing,Inorganic Chemistry (Q1); Materials Chemistry ...,21611653,Full-text available,{'CRKN American Chemical Society Journals': 'A...
6,7,21100242403,Redox Biology,journal,22132317,2166,Q1,57,281,538,...,523,777,6251,Netherlands,Elsevier BV,2013-ongoing,Biochemistry (Q1); Clinical Biochemistry (Q1);...,22132317,Full-text available,{'DOAJ Directory of Open Access Journals': 'Av...
7,8,23392,Chemistry - A European Journal,journal,"15213765, 09476539",1842,Q1,220,2530,7089,...,6845,503,6329,United Kingdom,John Wiley & Sons Ltd.,1995-ongoing,Catalysis (Q1); Chemistry (miscellaneous) (Q1)...,15213765,Full-text available,{'CRKN Wiley Online Library': 'Available from ...
8,9,22669,Advanced Synthesis and Catalysis,journal,16154150,1817,Q1,140,601,1502,...,1464,541,7304,United Kingdom,John Wiley & Sons Ltd.,1992-ongoing,Catalysis (Q1); Organic Chemistry (Q1),16154150,Full-text available,{'Scholars Portal': 'Available from 1995 volum...
9,10,21100778649,Proceedings - 2016 IEEE Symposium on Security ...,conference and proceedings,00002016,1777,-,18,0,58,...,55,1340,000,United States,,2016,Organic Chemistry,00002016,No full-text available,{}


In [60]:
# Update the availability statements based on the coverage dates (emabargo, not to the present)
df_rank[['availability']] = df_rank.apply(coverageStatement_availParser,axis=1)

In [61]:
df_rank

Unnamed: 0,Rank,Sourceid,Title,Type,Issn,SJR,SJR Quartile,H index,Total Docs. (2018),Total Docs. (3years),...,Citable Docs. (3years),Cites / Doc. (2years),Ref. / Doc.,Country,Publisher,Coverage,Categories,q_issn,availability,coverage
0,1,26465,Progress in Polymer Science,journal,00796700,6365,Q1,244,61,148,...,145,2452,27395,Netherlands,Elsevier BV,"1967, 1970-1971, 1975, 1977-1978, 1980-1986, 1...",Ceramics and Composites (Q1); Materials Chemis...,00796700,Full-text available to present,"{'Elsevier SD Freedom Collection': '', 'CRKN E..."
1,2,26371,Natural Product Reports,journal,"02650568, 14604752",3552,Q1,157,76,221,...,209,1070,13043,United Kingdom,Royal Society of Chemistry,1984-ongoing,Biochemistry (Q1); Drug Discovery (Q1); Organi...,02650568,Full-text available to present,{'Free E- Journals': 'Most recent 3 year(s) no...
2,3,26968,Journal of Photochemistry and Photobiology C: ...,journal,13895567,2943,Q1,80,23,63,...,61,998,16887,Netherlands,Elsevier BV,2000-ongoing,Catalysis (Q1); Organic Chemistry (Q1); Physic...,13895567,Full-text available to present,"{'Elsevier SD Freedom Collection': '', 'CRKN E..."
3,4,26396,Organic Letters,journal,"15237052, 15237060",2441,Q1,220,1912,4978,...,4860,672,4745,United States,American Chemical Society,1999-ongoing,Biochemistry (Q1); Organic Chemistry (Q1); Phy...,15237052,Full-text available to present,{'CRKN American Chemical Society Journals': 'A...
4,5,21100779404,Macromolecules,journal,"00249297, 15205835",2243,Q1,288,1069,2956,...,2919,600,5383,United States,American Chemical Society,1968-ongoing,Inorganic Chemistry (Q1); Materials Chemistry ...,00249297,Full-text available to present,{'American Chemical Society Legacy Archive': '...
5,6,21100209317,ACS Macro Letters,journal,21611653,2201,Q1,71,269,845,...,838,574,4021,United States,American Chemical Society,2012-ongoing,Inorganic Chemistry (Q1); Materials Chemistry ...,21611653,Full-text available to present,{'CRKN American Chemical Society Journals': 'A...
6,7,21100242403,Redox Biology,journal,22132317,2166,Q1,57,281,538,...,523,777,6251,Netherlands,Elsevier BV,2013-ongoing,Biochemistry (Q1); Clinical Biochemistry (Q1);...,22132317,Full-text available to present,{'DOAJ Directory of Open Access Journals': 'Av...
7,8,23392,Chemistry - A European Journal,journal,"15213765, 09476539",1842,Q1,220,2530,7089,...,6845,503,6329,United Kingdom,John Wiley & Sons Ltd.,1995-ongoing,Catalysis (Q1); Chemistry (miscellaneous) (Q1)...,15213765,Full-text available to present,{'CRKN Wiley Online Library': 'Available from ...
8,9,22669,Advanced Synthesis and Catalysis,journal,16154150,1817,Q1,140,601,1502,...,1464,541,7304,United Kingdom,John Wiley & Sons Ltd.,1992-ongoing,Catalysis (Q1); Organic Chemistry (Q1),16154150,Full-text available to present,{'Scholars Portal': 'Available from 1995 volum...
9,10,21100778649,Proceedings - 2016 IEEE Symposium on Security ...,conference and proceedings,00002016,1777,-,18,0,58,...,55,1340,000,United States,,2016,Organic Chemistry,00002016,No full-text available,{}


In [62]:
df_organicChem = df_rank

In [63]:
df_chemistry.availability.value_counts()

Full-text available to present           94
No full-text available                    3
Full-text available with embargo          1
Full-text available, but not complete     1
                                          1
Name: availability, dtype: int64

In [64]:
df_chemistry[df_chemistry['availability'] == "No full-text available"]

Unnamed: 0,Rank,Sourceid,Title,Type,Issn,SJR,SJR Best Quartile,H index,Total Docs. (2018),Total Docs. (3years),...,Citable Docs. (3years),Cites / Doc. (2years),Ref. / Doc.,Country,Publisher,Coverage,Categories,q_issn,availability,coverage
3,4,21100826569,Nature Reviews Chemistry,journal,23973358,13462,Q1,21,57,74,...,41,2559,7526,United Kingdom,Nature Publishing Group,2017-ongoing,Chemical Engineering (miscellaneous) (Q1); Che...,23973358,No full-text available,{}
60,61,21100778649,Proceedings - 2016 IEEE Symposium on Security ...,conference and proceedings,2016,1777,-,18,0,58,...,55,1340,0,United States,,2016,Organic Chemistry,2016,No full-text available,{}
92,93,26554,Advances in Physical Organic Chemistry,book series,653160,1426,Q1,28,4,16,...,8,525,7375,United States,Elsevier Inc.,"1963-1971, 1973, 1975-1978, 1981-1985, 1987-19...",Organic Chemistry (Q1); Physical and Theoretic...,653160,No full-text available,{}


In [65]:
df_chemistry[df_chemistry['availability'] == "Full-text available with embargo"]

Unnamed: 0,Rank,Sourceid,Title,Type,Issn,SJR,SJR Best Quartile,H index,Total Docs. (2018),Total Docs. (3years),...,Citable Docs. (3years),Cites / Doc. (2years),Ref. / Doc.,Country,Publisher,Coverage,Categories,q_issn,availability,coverage
31,32,14151,Molecular and Cellular Proteomics,journal,"15359476, 15359484",2807,Q1,169,184,703,...,692,483,6035,United States,American Society for Biochemistry and Molecula...,2002-ongoing,Analytical Chemistry (Q1); Biochemistry (Q1); ...,15359476,Full-text available with embargo,{'Free E- Journals': 'Available from 2002 volu...


In [66]:
df_organicChem.availability.value_counts()

Full-text available to present           88
No full-text available                   11
Full-text available, but not complete     1
Name: availability, dtype: int64

In [59]:
df_organicChem[df_organicChem['availability'] == "No full-text available"]

Unnamed: 0,Rank,Sourceid,Title,Type,Issn,SJR,SJR Quartile,H index,Total Docs. (2018),Total Docs. (3years),...,Citable Docs. (3years),Cites / Doc. (2years),Ref. / Doc.,Country,Publisher,Coverage,Categories,q_issn,availability,coverage
9,10,21100778649,Proceedings - 2016 IEEE Symposium on Security ...,conference and proceedings,00002016,1777,-,18,0,58,...,55,1340,0,United States,,2016,Organic Chemistry,00002016,No full-text available,{}
18,19,26554,Advances in Physical Organic Chemistry,book series,00653160,1426,Q1,28,4,16,...,8,525,7375,United States,Elsevier Inc.,"1963-1971, 1973, 1975-1978, 1981-1985, 1987-19...",Organic Chemistry (Q1); Physical and Theoretic...,00653160,No full-text available,{}
32,33,4900153219,Topics in Organometallic Chemistry,book series,"16168534, 14366002",1073,Q1,52,15,100,...,62,876,13180,Germany,Springer Verlag,2006-ongoing,Inorganic Chemistry (Q1); Organic Chemistry (Q...,16168534,No full-text available,{}
42,43,26491,Synlett,journal,"09365214, 14372096",857,Q1,125,476,1517,...,1458,242,4657,Germany,Georg Thieme Verlag,1996-ongoing,Organic Chemistry (Q1),09365214,No full-text available,{}
47,48,26420,Organometallic Chemistry,journal,03010074,797,Q2,11,0,8,...,7,186,0,United Kingdom,London Chemical Society,"1998-2002, 2004-2005, 2007-2012, 2016-ongoing,...",Inorganic Chemistry (Q2); Organic Chemistry (Q...,03010074,No full-text available,{}
49,50,25758,Advances in Heterocyclic Chemistry,journal,00652725,784,Q2,42,15,65,...,61,383,24447,United States,Elsevier Inc.,1963-ongoing,Polymers and Plastics (Q1); Biochemistry (Q2);...,00652725,No full-text available,{}
54,55,25762,Advances in Polymer Science,journal,00653195,748,Q2,105,34,101,...,51,528,11247,Germany,Springer Verlag,"1958-1961, 1963-1964, 1969, 1976-1982, 1986-19...",Chemical Engineering (miscellaneous) (Q1); Pol...,00653195,No full-text available,{}
81,82,21100267917,Carbohydrate Chemistry,book series,"2041353X, 14651963",591,Q2,9,0,20,...,18,111,0,United Kingdom,Royal Society of Chemistry,2011-ongoing,Organic Chemistry (Q2); Biochemistry (Q3),2041353X,No full-text available,{}
85,86,21744,Planta Medica,journal,"14390221, 00320943",559,Q2,103,157,547,...,520,258,4267,Germany,Georg Thieme Verlag,"1961, 1965-ongoing",Complementary and Alternative Medicine (Q1); A...,14390221,No full-text available,{}
87,88,11600153417,Current Protocols in Nucleic Acid Chemistry,journal,"19349270, 19349289",532,Q2,14,19,67,...,67,76,2953,United States,Wiley-Liss Inc,2000-ongoing,Medicine (miscellaneous) (Q2); Organic Chemist...,19349270,No full-text available,{}


In [67]:
df_organicChem[df_organicChem['availability'] == "Full-text available, but not complete"]

Unnamed: 0,Rank,Sourceid,Title,Type,Issn,SJR,SJR Quartile,H index,Total Docs. (2018),Total Docs. (3years),...,Citable Docs. (3years),Cites / Doc. (2years),Ref. / Doc.,Country,Publisher,Coverage,Categories,q_issn,availability,coverage
39,40,23488,Current Medicinal Chemistry,journal,9298673,918,Q1,158,329,843,...,788,325,10073,United Arab Emirates,Bentham Science Publishers,1994-ongoing,Drug Discovery (Q1); Organic Chemistry (Q1); B...,9298673,"Full-text available, but not complete",{'SciTech Premium Collection': 'Available from...
