diff --git a/requirements.txt b/requirements.txt index 40e4f40f..70c23b2c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ pbr requests -matplotlib diff --git a/scopus/__init__.py b/scopus/__init__.py index 08199d76..14579256 100644 --- a/scopus/__init__.py +++ b/scopus/__init__.py @@ -18,4 +18,3 @@ from scopus.author_search import * from scopus.scopus_search import * from scopus.serial_title import * -from scopus.deprecated_ import * diff --git a/scopus/deprecated_/__init__.py b/scopus/deprecated_/__init__.py deleted file mode 100644 index 9cff6b28..00000000 --- a/scopus/deprecated_/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from scopus.deprecated_.scopus_affiliation import * -from scopus.deprecated_.scopus_author import * -from scopus.deprecated_.scopus_reports import * diff --git a/scopus/deprecated_/scopus_affiliation.py b/scopus/deprecated_/scopus_affiliation.py deleted file mode 100644 index 97099c0b..00000000 --- a/scopus/deprecated_/scopus_affiliation.py +++ /dev/null @@ -1,128 +0,0 @@ -import os -import xml.etree.ElementTree as ET -import warnings - -from scopus import config -from scopus.utils import get_content, get_encoded_text - -SCOPUS_AFFILIATION_DIR = os.path.expanduser('~/.scopus/affiliation') - -if not os.path.exists(SCOPUS_AFFILIATION_DIR): - os.makedirs(SCOPUS_AFFILIATION_DIR) - - -class ScopusAffiliation: - @property - def affiliation_id(self): - """The Scopus ID of the affiliation.""" - return get_encoded_text(self.xml, 'coredata/dc:identifier').split(":")[-1] - - @property - def date_created(self): - """Date the Scopus record was created.""" - date_created = self.xml.find('institution-profile/date-created') - if date_created is not None: - date_created = (int(date_created.attrib['year']), - int(date_created.attrib['month']), - int(date_created.attrib['day'])) - else: - date_created = (None, None, None) - return date_created - - @property - def nauthors(self): - """Number of authors in the affiliation.""" - return get_encoded_text(self.xml, 'coredata/author-count') - - @property - def ndocuments(self): - """Number of documents for the affiliation.""" - return get_encoded_text(self.xml, 'coredata/document-count') - - @property - def url(self): - """URL to the affiliation's profile page.""" - url = self.xml.find('coredata/link[@rel="scopus-affiliation"]') - if url is not None: - url = url.get('href') - return url - - @property - def api_url(self): - """URL to the affiliation's API page.""" - return get_encoded_text(self.xml, 'coredata/prism:url') - - @property - def org_type(self): - """Type of the affiliation (only present if profile is org profile).""" - return get_encoded_text(self.xml, 'institution-profile/org-type') - - @property - def org_domain(self): - """Internet domain of the affiliation.""" - return get_encoded_text(self.xml, 'institution-profile/org-domain') - - @property - def org_url(self): - """Website of the affiliation.""" - return get_encoded_text(self.xml, 'institution-profile/org-URL') - - @property - def name(self): - """The name of the affiliation.""" - return get_encoded_text(self.xml, 'affiliation-name') - - @property - def address(self): - """The address of the affiliation.""" - return get_encoded_text(self.xml, 'address') - - @property - def city(self): - """The city of the affiliation.""" - return get_encoded_text(self.xml, 'city') - - @property - def state(self): - """The state (country's administrative sububunit) of the affiliation.""" - return get_encoded_text(self.xml, 'state') - - @property - def country(self): - """The country of the affiliation.""" - return get_encoded_text(self.xml, 'country') - - def __init__(self, aff_id, refresh=False): - """Class to represent an Affiliation in Scopus. - - Parameters - ---------- - aff_id : str or int - The Scopus Affiliation ID. Optionally expressed - as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn). - - refresh : bool (optional, default=False) - Whether to refresh the cached file if it exists or not. - - Notes - ----- - The files are cached in ~/.scopus/affiliation/{aff_id}. - """ - if config.getboolean('Warnings', 'Affiliation'): - text = config.get('Warnings', 'Text').format('ContentAffiliationRetrieval') - warnings.warn(text, DeprecationWarning) - config.set('Warnings', 'Affiliation', '0') - aff_id = str(int(str(aff_id).split('-')[-1])) - - qfile = os.path.join(SCOPUS_AFFILIATION_DIR, aff_id) - url = ('https://api.elsevier.com/content/affiliation/' - 'affiliation_id/{}'.format(aff_id)) - - self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh)) - - def __str__(self): - s = '''{self.name} ({self.nauthors} authors, {self.ndocuments} documents) - {self.address} - {self.city}, {self.country} - {self.url}'''.format(self=self) - return s diff --git a/scopus/deprecated_/scopus_api.py b/scopus/deprecated_/scopus_api.py deleted file mode 100644 index 75f62fbf..00000000 --- a/scopus/deprecated_/scopus_api.py +++ /dev/null @@ -1,690 +0,0 @@ -import os -import sys -import xml.etree.ElementTree as ET -import warnings - -from scopus import config -from scopus.utils import get_content, get_encoded_text, ns - -SCOPUS_XML_DIR = os.path.expanduser('~/.scopus/xml') -SCOPUS_ISSN_DIR = os.path.expanduser('~/.scopus/issn') - -if not os.path.exists(SCOPUS_XML_DIR): - os.makedirs(SCOPUS_XML_DIR) - -if not os.path.exists(SCOPUS_ISSN_DIR): - os.makedirs(SCOPUS_ISSN_DIR) - - -class ScopusAbstract(object): - @property - def abstract(self): - """Return the abstract of an article.""" - return get_encoded_text(self.coredata, 'dc:description/abstract/ce:para') - - @property - def affiliations(self): - """A list of scopus_api._ScopusAffiliation objects.""" - return [_ScopusAffiliation(aff) for aff in - self.xml.findall('affiliation', ns)] - - @property - def aggregationType(self): - """Type of source the abstract is published in.""" - return get_encoded_text(self.coredata, 'prism:aggregationType') - - @property - def article_number(self): - """Article number.""" - return get_encoded_text(self.coredata, 'article-number') - - @property - def authkeywords(self): - """Return the keywords of the abstract. - Note: This may be empty. - """ - try: - return [a.text for a in self.xml.find('authkeywords', ns)] - except: - return None - - @property - def authors(self): - """A list of scopus_api._ScopusAuthor objects.""" - authors = self.xml.find('authors', ns) - try: - return [_ScopusAuthor(author) for author in authors] - except TypeError: - return None - - @property - def citationLanguage(self): - """Language of the article.""" - try: - return self.items.find( - 'bibrecord/head/citation-info/citation-language').get("language") - except: - return None - - @property - def citationType(self): - """Type (short version) of the article.""" - try: - return self.items.find( - 'bibrecord/head/citation-info/citation-type').get("code") - except: - return None - - @property - def citedby_count(self): - """Number of articles citing the abstract.""" - return int(get_encoded_text(self.coredata, 'citedby-count')) - - @property - def citedby_url(self): - """URL to Scopus page listing citing papers.""" - cite_link = self.coredata.find('link[@rel="scopus-citedby"]', ns) - try: - return cite_link.get('href') - except AttributeError: # cite_link is None - return None - - @property - def coverDate(self): - """The date of the cover the abstract is in.""" - return get_encoded_text(self.coredata, 'prism:coverDate') - - @property - def description(self): - """Return the description of a record. - Note: If this is empty, try the abstract instead. - """ - return get_encoded_text(self.coredata, 'dc:description') - - @property - def doi(self): - """DOI of article.""" - return get_encoded_text(self.coredata, 'prism:doi') - - @property - def eid(self): - """EID """ - return get_encoded_text(self.coredata, 'eid') - - @property - def endingPage(self): - """Ending page.""" - return get_encoded_text(self.coredata, 'prism:endingPage') - - @property - def issn(self): - """ISSN of the publisher. - Note: If E-ISSN is known to Scopus, this returns both - ISSN and E-ISSN in random order separated by blank space. - """ - return get_encoded_text(self.coredata, 'prism:issn') - - @property - def issueIdentifier(self): - """Issue number for abstract.""" - return get_encoded_text(self.coredata, 'prism:issueIdentifier') - - @property - def nauthors(self): - """Return number of authors listed in the abstract.""" - return len(self.authors) - - @property - def pageRange(self): - """Page range.""" - return get_encoded_text(self.coredata, 'prism:pageRange') - - @property - def publicationName(self): - """Name of source the abstract is published in.""" - return get_encoded_text(self.coredata, 'prism:publicationName') - - @property - def publisher(self): - """Name of the publisher of the abstract.""" - return get_encoded_text(self.coredata, 'dc:publisher') - - @property - def refcount(self): - """Number of references of an article. - Note: Requires the FULL view of the article. - """ - refs = self.items.find('bibrecord/tail/bibliography', ns) - try: - return refs.attrib['refcount'] - except AttributeError: # refs is None - return None - - @property - def references(self): - """Return EIDs of references of an article. - Note: Requires the FULL view of the article. - """ - refs = self.items.find('bibrecord/tail/bibliography', ns) - if refs is not None: - eids = [r.find("ref-info/refd-itemidlist/itemid", ns).text for r - in refs.findall("reference", ns)] - return ["2-s2.0-" + eid for eid in eids] - else: - return None - - @property - def source_id(self): - """Scopus source_id of the abstract.""" - return get_encoded_text(self.coredata, 'source-id') - - @property - def srctype(self): - """Type (short version) of source the abstract is published in.""" - return get_encoded_text(self.coredata, 'srctype') - - @property - def startingPage(self): - """Starting page.""" - return get_encoded_text(self.coredata, 'prism:startingPage') - - @property - def subjectAreas(self): - """List of subject areas of article. - Note: Requires the FULL view of the article. - """ - subjectAreas = self.xml.find('subject-areas', ns) - try: - return [a.text for a in subjectAreas] - except: - return None - - @property - def scopus_url(self): - """URL to the abstract page on Scopus.""" - scopus_url = self.coredata.find('link[@rel="scopus"]', ns) - try: - return scopus_url.get('href') - except AttributeError: # scopus_url is None - return None - - @property - def title(self): - """Abstract title.""" - return get_encoded_text(self.coredata, 'dc:title') - - @property - def url(self): - """URL to the API view of the abstract.""" - return get_encoded_text(self.coredata, 'prism:url') - - @property - def volume(self): - """Volume for the abstract.""" - return get_encoded_text(self.coredata, 'prism:volume') - - @property - def website(self): - """Website of article.""" - return get_encoded_text(self.items, - 'bibrecord/head/source/website/ce:e-address') - - def __init__(self, EID, view='META_ABS', refresh=False): - """Class to represent the results from a Scopus abstract. - - Parameters - ---------- - EID : str - The Scopus ID (EID) of an abstract. - - view : str (optional, default=META_ABS) - The view of the file that should be downloaded. Will not take - effect for already cached files. Supported values: META, META_ABS, - FULL, where FULL includes all information of META_ABS view and - META_ABS includes all information of the META view . See - https://dev.elsevier.com/guides/AbstractRetrievalViews.htm - for details. - - refresh : bool (optional, default=False) - Whether to refresh the cached file if it exists or not. - - Notes - ----- - The files are cached in ~/.scopus/xml/{eid}. - """ - if config.getboolean('Warnings', 'Abstract'): - text = config.get('Warnings', 'Text').format('AbstractRetrieval') - warnings.warn(text, DeprecationWarning) - config.set('Warnings', 'Abstract', '0') - allowed_views = ('META', 'META_ABS', 'FULL') - if view not in allowed_views: - raise ValueError('view parameter must be one of ' + - ', '.join(allowed_views)) - - # Get file content - qfile = os.path.join(SCOPUS_XML_DIR, EID) - url = "https://api.elsevier.com/content/abstract/eid/{}".format(EID) - params = {'view': view} - self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh, - params=params)) - # Remove default namespace if present - remove = u'{http://www.elsevier.com/xml/svapi/abstract/dtd}' - nsl = len(remove) - for elem in self.xml.getiterator(): - if elem.tag.startswith(remove): - elem.tag = elem.tag[nsl:] - - if self.xml.tag == 'service-error': - raise Exception('\n{0}\n{1}'.format(EID, self.xml)) - - self.coredata = self.xml.find('coredata', ns) - self.items = self.xml.find('item', ns) - - def get_corresponding_author_info(self): - """Try to get corresponding author information. - - Returns (scopus-id, name, email). - """ - resp = requests.get(self.scopus_url) - from lxml import html - - parsed_doc = html.fromstring(resp.content) - for div in parsed_doc.body.xpath('.//div'): - for a in div.xpath('a'): - if '/cdn-cgi/l/email-protection' not in a.get('href', ''): - continue - encoded_text = a.attrib['href'].replace('/cdn-cgi/l/email-protection#', '') - key = int(encoded_text[0:2], 16) - email = ''.join([chr(int('0x{}'.format(x), 16) ^ key) - for x in - map(''.join, zip(*[iter(encoded_text[2:])]*2))]) - for aa in div.xpath('a'): - if 'http://www.scopus.com/authid/detail.url' in aa.get('href', ''): - scopus_url = aa.attrib['href'] - name = aa.text - else: - scopus_url, name = None, None - - return (scopus_url, name, email) - - def __str__(self): - """Return pretty text version of the abstract. - - Assumes the abstract is a journal article and was loaded with - view="META_ABS" or view="FULL". - """ - - if len(self.authors) > 1: - authors = ', '.join([str(a.initials) + - ' ' + - str(a.surname) - for a in self.authors[0:-1]]) - authors += (' and ' + - str(self.authors[-1].initials) + - ' ' + str(self.authors[-1].surname)) - else: - a = self.authors[0] - authors = str(a.given_name) + ' ' + str(a.surname) - - s = '[[{self.scopus_url}][{self.eid}]] ' - s += '{authors}, {self.title}, {self.publicationName}, ' - s += '{self.volume}' - if self.issueIdentifier: - s += '({self.issueIdentifier}), ' - else: - s += ', ' - if self.pageRange: - s += 'p. {self.pageRange}, ' - elif self.startingPage: - s += 'p. {self.startingPage}, ' - elif self.article_number: - s += 'Art. No. {self.article_number} ' - else: - s += '(no pages found) ' - - from dateutil.parser import parse - pubDate = parse(self.coverDate) - - s += '({}).'.format(pubDate.year) - s += ' https://doi.org/{self.doi},' - s += ' {self.scopus_url},' - s += ' cited {self.citedby_count} times (Scopus).\n' - s += ' Affiliations:\n ' - s += '\n '.join([str(aff) for aff in self.affiliations]) - - return s.format(authors=authors, - self=self) - - @property - def latex(self): - """Return LaTeX representation of the abstract.""" - s = ('{authors}, \\textit{{{title}}}, {journal}, {volissue}, ' - '{pages}, ({date}). {doi}, {scopus_url}.') - if len(self.authors) > 1: - authors = ', '.join([str(a.given_name) + - ' ' + str(a.surname) - for a in self.authors[0:-1]]) - authors += (' and ' + - str(self.authors[-1].given_name) + - ' ' + str(self.authors[-1].surname)) - else: - a = self.authors[0] - authors = str(a.given_name) + ' ' + str(a.surname) - title = self.title - journal = self.publicationName - volume = self.volume - issue = self.issueIdentifier - if volume and issue: - volissue = '\\textbf{{{0}({1})}}'.format(volume, issue) - elif volume: - volissue = '\\textbf{{0}}'.format(volume) - else: - volissue = 'no volume' - date = self.coverDate - if self.pageRange: - pages = 'p. {0}'.format(self.pageRange) - elif self.startingPage: - pages = 'p. {self.startingPage}'.format(self) - elif self.article_number: - pages = 'Art. No. {self.article_number}, '.format(self) - else: - pages = '(no pages found)' - doi = '\\href{{https://doi.org/{0}}}{{doi:{0}}}'.format(self.doi) - scopus_url = '\\href{{{0}}}{{scopus:{1}}}'.format(self.scopus_url, - self.eid) - - return s.format(**locals()) - - @property - def html(self): - """Returns an HTML citation.""" - s = (u'{authors}, {title}, {journal}, {volissue}, {pages}, ' - '({date}). {doi}.') - - au_link = ('{1}') - - if len(self.authors) > 1: - authors = u', '.join([au_link.format(a.auid, - (str(a.given_name) + - ' ' + str(a.surname))) - for a in self.authors[0:-1]]) - authors += (u' and ' + - au_link.format(self.authors[-1].auid, - (str(self.authors[-1].given_name) + - ' ' + - str(self.authors[-1].surname)))) - else: - a = self.authors[0] - authors = au_link.format(a.auid, - str(a.given_name) + ' ' + str(a.surname)) - - title = u'{title}'.format(link=self.scopus_url, - title=self.title) - - jname = self.publicationName - sid = self.source_id - jlink = ('{journal}') - journal = jlink.format(sid=sid, journal=jname) - - volume = self.volume - issue = self.issueIdentifier - if volume and issue: - volissue = u'{0}({1})'.format(volume, issue) - elif volume: - volissue = u'{0}'.format(volume) - else: - volissue = 'no volume' - date = self.coverDate - if self.pageRange: - pages = u'p. {0}'.format(self.pageRange) - elif self.startingPage: - pages = u'p. {self.startingPage}'.format(self=self) - elif self.article_number: - pages = u'Art. No. {self.article_number}, '.format(self=self) - else: - pages = '(no pages found)' - doi = 'doi:{0}'.format(self.doi) - - html = s.format(**locals()) - return html.replace('None', '') - - @property - def bibtex(self): - """Bibliographic entry in BibTeX format. - - Returns - ------- - bibtex : str - A string representing a bibtex entry for the item. - - Raises - ------ - ValueError : If the item's aggregationType is not Journal. - """ - if self.aggregationType != 'Journal': - raise ValueError('Only Journal articles supported.') - template = u'''@article{{{key}, - author = {{{author}}}, - title = {{{title}}}, - journal = {{{journal}}}, - year = {{{year}}}, - volume = {{{volume}}}, - number = {{{number}}}, - pages = {{{pages}}}, - doi = {{{doi}}} -}} - -''' - if self.pageRange: - pages = self.pageRange - elif self.startingPage: - pages = self.startingPage - elif self.article_number: - pages = self.article_number - else: - pages = 'no pages found' - year = self.coverDate[0:4] - first = self.title.split()[0].title() - last = self.title.split()[-1].title() - key = ''.join([self.authors[0].surname, year, first, last]) - authors = ' and '.join(["{} {}".format(a.given_name, a.surname) - for a in self.authors]) - bibtex = template.format( - key=key, author=authors, title=self.title, - journal=self.publicationName, year=year, volume=self.volume, - number=self.issueIdentifier, pages=pages, doi=self.doi) - return bibtex - - @property - def ris(self): - """Bibliographic entry in RIS (Research Information System Format) - format. - - Returns - ------- - ris : str - The RIS string representing an item. - - Raises - ------ - ValueError : If the item's aggregationType is not Journal. - """ - if self.aggregationType != 'Journal': - raise ValueError('Only Journal articles supported.') - template = u'''TY - JOUR -TI - {title} -JO - {journal} -VL - {volume} -DA - {date} -SP - {pages} -PY - {year} -DO - {doi} -UR - https://doi.org/{doi} -''' - ris = template.format( - title=self.title, journal=self.publicationName, - volume=self.volume, date=self.coverDate, pages=self.pageRange, - year=self.coverDate[0:4], doi=self.doi) - for au in self.authors: - ris += 'AU - {}\n'.format(au.indexed_name) - if self.issueIdentifier is not None: - ris += 'IS - {}\n'.format(self.issueIdentifier) - ris += 'ER - \n\n' - return ris - - -class _ScopusAuthor(object): - """An internal class for a author in a ScopusAbstract.""" - def __init__(self, author): - """author should be an xml element. - The following attributes are supported: - - author - indexed_name - given_name - surname - initials - author_url - the scopus api url to get more information - auid - the scopus id for the author - scopusid - the scopus id for the author - seq - the index of the author in the author list. - affiliations - a list of ScopusAuthorAffiliation objects - - This class is not the same as the one in scopus.scopus_author, which - uses the scopus author api. - - """ - self.author = author - self.indexed_name = get_encoded_text(author, 'ce:indexed-name') - self.given_name = get_encoded_text(author, 'ce:given-name') - self.surname = get_encoded_text(author, 'ce:surname') - self.initials = get_encoded_text(author, 'ce:initials') - self.author_url = get_encoded_text(author, 'author-url') - self.auid = author.attrib.get('auid') - self.scopusid = self.auid - self.seq = author.attrib.get('seq') - self.affiliations = [_ScopusAuthorAffiliation(aff) - for aff in author.findall('affiliation', ns)] - - def __str__(self): - s = """{0.seq}. {0.given_name} {0.surname} scopusid:{0.auid} """ - s += ' '.join([str(aff) for aff in self.affiliations]) - return s.format(self) - - -class _ScopusAffiliation(object): - """Internal class to represent the affiliations in an Abstract.""" - def __init__(self, affiliation): - """affiliation should be an xml element from the main abstract""" - self.affiliation = affiliation - self.affilname = get_encoded_text(affiliation, 'affilname') - self.city = get_encoded_text(affiliation, 'affiliation-city') - self.country = get_encoded_text(affiliation, 'affiliation-country') - self.href = affiliation.attrib.get('href', None) - self.id = affiliation.attrib.get('id', None) - - def __str__(self): - return 'id:{0.id} {0.affilname}'.format(self) - - -class _ScopusAuthorAffiliation(object): - """Internal class to represent the affiliation in an Author element""" - def __init__(self, affiliation): - """affiliation should be an xml element from an Author element.""" - self.affiliation = affiliation - self.id = affiliation.get('id', None) - self.href = affiliation.get('href', None) - - def __str__(self): - return 'affiliation_id:{0.id}'.format(self) - - -class ScopusJournal(object): - """Class to represent a journal from the Scopus API.""" - - def __init__(self, ISSN, refresh=False): - ISSN = str(ISSN) - self.issn = ISSN - - qfile = os.path.join(SCOPUS_ISSN_DIR, ISSN) - url = ("https://api.elsevier.com/content/serial/title/issn:" + ISSN) - self.xml = ET.fromstring(get_content(qfile, refresh, url)) - - self.publisher = get_encoded_text(self.xml, 'entry/dc:publisher') - self.title = get_encoded_text(self.xml, 'entry/dc:title') - self.aggregationType = get_encoded_text(self.xml, - 'entry/prism:aggregationType') - self.prism_url = get_encoded_text(self.xml, 'entry/prism:url') - - # Impact factors - SNIP = get_encoded_text(self.xml, 'entry/SNIPList/SNIP') - SNIP_year = self.xml.find('entry/SNIPList/SNIP', ns) - if SNIP_year is not None: - SNIP_year = SNIP_year.get('year') - else: - SNIP_year = -1 - - IPP = get_encoded_text(self.xml, 'entry/IPPList/IPP') - IPP_year = self.xml.find('entry/IPPList/IPP', ns) - if IPP_year is not None: - IPP_year = IPP_year.get('year') - else: - IPP_year = -1 - - SJR = get_encoded_text(self.xml, 'entry/SJRList/SJR') - SJR_year = self.xml.find('entry/SJRList/SJR', ns) - if SJR_year is not None: - SJR_year = SJR_year.get('year') - else: - SJR_year = -1 - if SNIP: - self.SNIP = float(SNIP) - self.SNIP_year = int(SNIP_year) - else: - self.SNIP = None - self.SNIP_year = None - - if IPP: - self.IPP = float(IPP) - self.IPP_year = int(IPP_year) - else: - self.IPP = None - self.IPP_year = None - - if SJR: - self.SJR = float(SJR) - self.SJR_year = int(SJR_year) - else: - self.SJR = None - self.SJR_year = None - - scopus_url = self.xml.find('entry/link[@ref="scopus-source"]') - if scopus_url is not None: - self.scopus_url = scopus_url.attrib['href'] - else: - self.scopus_url = None - - homepage = self.xml.find('entry/link[@ref="homepage"]') - if homepage is not None: - self.homepage = homepage.attrib['href'] - else: - self.homepage = None - - def __str__(self): - s = """{self.title} {self.scopus_url} - Homepage: {self.homepage} - SJR: {self.SJR} ({self.SJR_year}) - SNIP: {self.SNIP} ({self.SNIP_year}) - IPP: {self.IPP} ({self.IPP_year}) -""".format(self=self) - return s - - @property - def org(self): - """Return an org-formatted string for a Journal.""" - s = """[[{self.scopus_url}][{self.title}]] [[{self.homepage}][homepage]] -| SJR | SNIP | IPP | -| {self.SJR} | {self.SNIP} | {self.IPP} |""".format(self=self) - return s diff --git a/scopus/deprecated_/scopus_author.py b/scopus/deprecated_/scopus_author.py deleted file mode 100644 index 7c8584d9..00000000 --- a/scopus/deprecated_/scopus_author.py +++ /dev/null @@ -1,402 +0,0 @@ -import os -import sys -import textwrap -import time -import warnings -import xml.etree.ElementTree as ET -from collections import Counter, namedtuple -from operator import itemgetter - -from .scopus_api import ScopusAbstract -from ..scopus_search import ScopusSearch -from .scopus_affiliation import ScopusAffiliation -from scopus import config -from scopus.utils import download, get_content, get_encoded_text, ns - -SCOPUS_AUTHOR_DIR = os.path.expanduser('~/.scopus/author') - -if not os.path.exists(SCOPUS_AUTHOR_DIR): - os.makedirs(SCOPUS_AUTHOR_DIR) - - -class ScopusAuthor(object): - @property - def author_id(self): - """The scopus id for the author.""" - author_id = get_encoded_text(self.xml, 'coredata/dc:identifier') - return author_id.split(":")[-1] - - @property - def orcid(self): - """The author's ORCID.""" - return get_encoded_text(self.xml, 'coredata/orcid') - - @property - def hindex(self): - """The author hindex""" - hindex = get_encoded_text(self.xml, 'h-index') - return int(hindex) if hindex is not None else 0 - - @property - def ndocuments(self): - """Number of documents authored (excludes book chapters and notes).""" - ndocuments = get_encoded_text(self.xml, 'coredata/document-count') - return int(ndocuments) if ndocuments is not None else 0 - - @property - def ncited_by(self): - """Total number of citing authors.""" - ncited_by = get_encoded_text(self.xml, 'coredata/cited-by-count') - return int(ncited_by) if ncited_by is not None else 0 - - @property - def citation_count(self): - """Total number of citing items.""" - citation_count = get_encoded_text(self.xml, 'coredata/citation-count') - return int(citation_count) if citation_count is not None else 0 - - @property - def ncoauthors(self): - """Total number of coauthors.""" - ncoauthors = get_encoded_text(self.xml, 'coauthor-count') - return int(ncoauthors) if ncoauthors is not None else 0 - - @property - def current_affiliation(self): - """Current affiliation according to scopus.""" - return get_encoded_text(self.xml, 'author-profile/affiliation-current/' - 'affiliation/ip-doc/afdispname') - - @property - def affiliation_history(self): - """List of ScopusAffiliation objects representing former - affiliations of the author. Only affiliations with more than one - publication are considered. - """ - aff_ids = [e.attrib.get('affiliation-id') for e in - self.xml.findall('author-profile/affiliation-history/affiliation') - if e is not None and len(list(e.find("ip-doc").iter())) > 1] - return [ScopusAffiliation(aff_id) for aff_id in aff_ids] - - @property - def date_created(self): - """Date the Scopus record was created.""" - date_created = self.xml.find('author-profile/date-created', ns) - try: - return (int(date_created.attrib['year']), - int(date_created.attrib['month']), - int(date_created.attrib['day'])) - except AttributeError: # date_created is None - return (None, None, None) - - @property - def firstname(self): - """Author first name.""" - return (get_encoded_text(self.xml, - 'author-profile/preferred-name/given-name') or '') - - @property - def lastname(self): - """Author last name.""" - return (get_encoded_text(self.xml, - 'author-profile/preferred-name/surname') or '') - - @property - def name(self): - """Author name.""" - return ((get_encoded_text(self.xml, - 'author-profile/preferred-name/given-name') or '') + - ' ' + - (get_encoded_text(self.xml, - 'author-profile/preferred-name/surname') or '')) - - @property - def scopus_url(self): - """URL to the author's profile page.""" - scopus_url = self.xml.find('coredata/link[@rel="scopus-author"]') - try: - return scopus_url.get('href') - except AttributeError: # scopus_url is None - return None - - - @property - def citedby_url(self): - """URL to Scopus page of citing papers.""" - citedby_url = self.xml.find('coredata/link[@rel="search"]') - try: - return citedby_url.get('href') - except AttributeError: # citedby_url is None - return None - - @property - def coauthor_url(self): - """URL to Scopus coauthor page.""" - coauthor_url = self.xml.find('coredata/link[@rel="coauthor-search"]') - try: - return coauthor_url.get('href') - except AttributeError: # coauthor_url is None - return None - - @property - def subject_areas(self): - """List of tuples of author subject areas in the form - (area, frequency, abbreviation, code), where frequency is the - number of publications in this subject area. - """ - areas = self.xml.findall('subject-areas/subject-area') - freqs = self.xml.findall('author-profile/classificationgroup/' - 'classifications[@type="ASJC"]/classification') - c = {int(cls.text): int(cls.attrib['frequency']) for cls in freqs} - cats = [(a.text, c[int(a.get("code"))], a.get("abbrev"), a.get("code")) - for a in areas] - cats.sort(reverse=True, key=itemgetter(1)) - return cats - - @property - def publication_history(self): - """List of tuples of authored publications in the form - (title, abbreviation, type, issn), where issn is only given - for journals. abbreviation and issn may be None. - """ - pub_hist = self.xml.findall('author-profile/journal-history/') - hist = [] - for pub in pub_hist: - try: - issn = pub.find("issn").text - except AttributeError: - issn = None - try: - abbr = pub.find("sourcetitle-abbrev").text - except AttributeError: - abbr = None - hist.append((pub.find("sourcetitle").text, abbr, pub.get("type"), issn)) - return hist - - def __init__(self, author_id, refresh=False, refresh_aff=False, level=1): - """Class to represent a Scopus Author query by the scopus-id. - - Parameters - ---------- - author_id : str or int - The ID of the author to search for. Optionally expressed - as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). - - refresh : bool (optional, default=False) - Whether to refresh the cached file (if it exists) or not. - - refresh_aff : bool (optional, default=False) - Whether to refresh the cached corresponding affiliation views - (if they exist) or not. - - level : int (optional, default=1) - Number of * to print in property __str__. - - Notes - ----- - The files are cached in ~/.scopus/author/{author_id} (without - eventually leading '9-s2.0-'). - """ - if config.getboolean('Warnings', 'Author'): - text = config.get('Warnings', 'Text').format('AuthorRetrieval') - warnings.warn(text, DeprecationWarning) - config.set('Warnings', 'Author', '0') - author_id = str(int(str(author_id).split('-')[-1])) - self.level = level - - qfile = os.path.join(SCOPUS_AUTHOR_DIR, author_id) - url = ('https://api.elsevier.com/content/author/' - 'author_id/{}').format(author_id) - params = {'author_id': author_id, 'view': 'ENHANCED'} - self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh, - params=params)) - - def get_coauthors(self): - """Return list of coauthors, their scopus-id and research areas.""" - url = self.xml.find('coredata/link[@rel="coauthor-search"]').get('href') - xml = download(url=url).text.encode('utf-8') - xml = ET.fromstring(xml) - coauthors = [] - N = int(get_encoded_text(xml, 'opensearch:totalResults') or 0) - - AUTHOR = namedtuple('Author', - ['name', 'scopus_id', 'affiliation', 'categories']) - - count = 0 - while count < N: - params = {'start': count, 'count': 25} - xml = download(url=url, params=params).text.encode('utf-8') - xml = ET.fromstring(xml) - - for entry in xml.findall('atom:entry', ns): - - given_name = get_encoded_text(entry, - 'atom:preferred-name/atom:given-name') - surname = get_encoded_text(entry, - 'atom:preferred-name/atom:surname') - coauthor_name = u'{0} {1}'.format(given_name, surname) - - scopus_id = get_encoded_text(entry, - 'dc:identifier').replace('AUTHOR_ID:', '') - - affiliation = get_encoded_text(entry, - 'atom:affiliation-current/atom:affiliation-name') - - # get categories for this author - s = u', '.join(['{0} ({1})'.format(subject.text, - subject.attrib['frequency']) - for subject in - entry.findall('atom:subject-area', ns)]) - - coauthors += [AUTHOR(coauthor_name, scopus_id, affiliation, s)] - count += 25 - - return coauthors - - def get_document_eids(self, *args, **kwds): - """Return list of EIDs for the author using ScopusSearch.""" - search = ScopusSearch('au-id({})'.format(self.author_id), - *args, **kwds) - return search.get_eids() - - def get_abstracts(self, refresh=True): - """Return a list of ScopusAbstract objects using ScopusSearch.""" - return [ScopusAbstract(eid, refresh=refresh) - for eid in self.get_document_eids(refresh=refresh)] - - def get_journal_abstracts(self, refresh=True): - """Return a list of ScopusAbstract objects using ScopusSearch, - but only if belonging to a Journal.""" - return [abstract for abstract in self.get_abstracts(refresh=refresh) if - abstract.aggregationType == 'Journal'] - - def get_document_summary(self, N=None, cite_sort=True, refresh=True): - """Return a summary string of documents. - - Parameters - ---------- - N : int or None (optional, default=None) - Maximum number of documents to include in the summary. - If None, return all documents. - - cite_sort : bool (optional, default=True) - Whether to sort xml by number of citations, in decreasing order, - or not. - - refresh : bool (optional, default=True) - Whether to refresh the cached abstract file (if it exists) or not. - - Returns - ------- - s : str - Text summarizing an author's documents. - """ - abstracts = self.get_abstracts(refresh=refresh) - - if cite_sort: - counts = [(a, int(a.citedby_count)) for a in abstracts] - counts.sort(reverse=True, key=itemgetter(1)) - abstracts = [a[0] for a in counts] - - if N is None: - N = len(abstracts) - - s = [u'{0} of {1} documents'.format(N, len(abstracts))] - - for i in range(N): - s += ['{0:2d}. {1}\n'.format(i + 1, str(abstracts[i]))] - - return '\n'.join(s) - - def __str__(self): - """Return a summary string.""" - s = ['{} {} (updated on {})'.format( - '*' * self.level, self.name, time.asctime())] - # Links - s += ['', self.scopus_url] - if self.orcid is not None: - s += ['http://orcid.org/' + self.orcid] - # Publication stats - s += ['{} documents cited {} times by {} people ({} coauthors)'.format( - self.ndocuments, self.citation_count, self.ncited_by, - self.ncoauthors)] - s += ['#first author papers {}'.format(self.n_first_author_papers())] - s += ['#last author papers {}'.format(self.n_last_author_papers())] - s += ['h-index: {}'.format(self.hindex) + - ' AIF(2017) = ' + - '{0:1.2f}'.format(self.author_impact_factor(2017)[2])] - # Profile information - s += ['Scopus ID created on {}'.format(self.date_created)] - # Current Affiliation. Note this is what Scopus thinks is current. - s += ['\nCurrent affiliation( according to Scopus):'] - s += [' ' + (self.current_affiliation or '-')] - # Subject Areas - s += ['\nSubject areas'] - areas = ['{} ({})'.format(el[0], el[1]) for el in self.subject_areas] - s += [textwrap.fill(', '.join(areas), initial_indent=' ', - subsequent_indent=' ')] - # Journals published in - temp_s = [el[1] for el in self.publication_history] - s += ['\nPublishes in:\n' + - textwrap.fill(', '.join(temp_s), initial_indent=' ', - subsequent_indent=' ')] - # Affiliation history - s += ['\nAffiliation history:'] - s.extend([str(aff) for aff in self.affiliation_history]) - # Bibliography - s += [self.get_document_summary()] - return '\n'.join(s) - - def author_impact_factor(self, year=2014, refresh=True): - """Get author_impact_factor for the . - - Parameters - ---------- - year : int (optional, default=2014) - The year based for which the impact factor is to be calculated. - - refresh : bool (optional, default=True) - Whether to refresh the cached search file (if it exists) or not. - - Returns - ------- - (ncites, npapers, aif) : tuple of integers - The citations count, publication count, and author impact factor. - """ - scopus_abstracts = self.get_journal_abstracts(refresh=refresh) - - cites = [int(ab.citedby_count) for ab in scopus_abstracts] - years = [int(ab.coverDate.split('-')[0]) for ab in scopus_abstracts] - - data = zip(years, cites, scopus_abstracts) - data = sorted(data, key=itemgetter(1), reverse=True) - - # now get aif papers for year-1 and year-2 - aif_data = [tup for tup in data if tup[0] in (year - 1, year - 2)] - Ncites = sum([tup[1] for tup in aif_data]) - if len(aif_data) > 0: - return (Ncites, len(aif_data), Ncites / float(len(aif_data))) - else: - return (Ncites, len(aif_data), 0) - - def n_first_author_papers(self, refresh=True): - """Return number of papers with author as the first author.""" - first_authors = [1 for ab in self.get_journal_abstracts(refresh=refresh) - if ab.authors[0].scopusid == self.author_id] - return sum(first_authors) - - def n_last_author_papers(self, refresh=True): - """Return number of papers with author as the last author.""" - first_authors = [1 for ab in self.get_journal_abstracts(refresh=refresh) - if ab.authors[-1].scopusid == self.author_id] - return sum(first_authors) - - def n_journal_articles(self, refresh=True): - """Return the number of journal articles.""" - return len(self.get_journal_abstracts(refresh=refresh)) - - def n_yearly_publications(self, refresh=True): - """Number of journal publications in a given year.""" - pub_years = [int(ab.coverDate.split('-')[0]) - for ab in self.get_journal_abstracts(refresh=refresh)] - return Counter(pub_years) diff --git a/scopus/deprecated_/scopus_reports.py b/scopus/deprecated_/scopus_reports.py deleted file mode 100644 index b95e4e3c..00000000 --- a/scopus/deprecated_/scopus_reports.py +++ /dev/null @@ -1,182 +0,0 @@ -import warnings -from operator import itemgetter - -import matplotlib.pyplot as plt - -from .scopus_api import ScopusAbstract, ScopusJournal -from .scopus_author import ScopusAuthor - - -def report(scopus_search, label): - """Print out an org-mode report for search results. - - Parameters - ---------- - scopus_search : scopus.scopus_search.ScopusSearch - An object resulting from a ScopusSearch. - - label : str - The label used in the document title ("Report for ..."). - """ - text = "Development of this class has been suspended; Please use the new"\ - "package 'scopusreport' (https://scopusreport.readthedocs.io/en/latest/)"\ - "instead." - warnings.warn(text, DeprecationWarning) - - counts = {} # to count papers per author - journals = {} # to count publications per journal - author_count = [] # to count a paper's number of authors for a histogram - paper_cites = {} - Ncites = 0 - document_types = {} - - papers = 0 # to count number of publications - - for eid in scopus_search.EIDS: - a = ScopusAbstract(eid) - - # Get types of documents - try: - document_types[a.aggregationType] += 1 - except KeyError: - document_types[a.aggregationType] = 1 - - if a.aggregationType == 'Journal': - Ncites += int(a.citedby_count) # get total cites - papers += 1 - - # get count for journals - jkey = (a.publicationName, a.source_id, a.issn) - try: - journals[jkey] += 1 - except KeyError: - journals[jkey] = 1 - - # get authors per paper - author_count += [len(a.authors)] - - # now count papers per author - for author in a.authors: - key = (author.indexed_name, author.auid) - try: - counts[key] += 1 - except KeyError: - counts[key] = 1 - - # counting cites per paper - key = (a.title, a.scopus_link) - try: - paper_cites[key] += a.citedby_count - except KeyError: - paper_cites[key] = a.citedby_count - - print('*** Report for {}\n'.format(label)) - print('#+attr_latex: :placement [H] :center nil') - print('#+caption: Types of documents found for {}.'.format(label)) - print('| Document type | count |\n|-') - for key in document_types: - print('| {0} | {1} |'.format(key, document_types[key])) - - print('\n\n') - print('{0} articles ({2} citations) ' - 'found by {1} authors'.format(papers, len(counts), Ncites)) - - # Author counts {(name, scopus-id): count} - view = [('[[https://www.scopus.com/authid/detail.uri?authorId={0}][{1}]]'.format( - k[1], k[0]), # org-mode link - v, k[1]) for k, v in counts.items()] # counts, scopus-id - view.sort(reverse=True, key=itemgetter(1)) - - print('\n#+attr_latex: :placement [H] :center nil') - print('#+caption: Author publication counts for {0}.'.format(label)) - print('| name | count | categories |') - print('|-') - for name, count, scopus_id in view[0:20]: - cats = ', '.join(['{0} ({1})'.format(cat[0], cat[1]) - for cat in ScopusAuthor(scopus_id).categories[0:3]]) - print('| {0} | {1} | {2} |'.format(name, count, cats)) - - # journal view - s = '[[https://www.scopus.com/source/sourceInfo.url?sourceId={0}][{1}]]' - jview = [(s.format(k[1], k[0][0:50]), # url - k[1], # source_id - k[2], # issn - v) # count - for k, v in journals.items()] - jview.sort(reverse=True, key=itemgetter(3)) - - print('\n\n') - print('#+attr_latex: :placement [H] :center nil') - print('#+caption: Journal publication counts for {0}.'.format(label)) - print('| Journal | count | IPP |') - print('|-') - - for journal, sid, issn, count in jview[0:12]: - # issn may contain E-ISSN - issn_tokens = issn.split() - try: - JOURNAL = ScopusJournal(issn_tokens[0]) - except: - JOURNAL = ScopusJournal(issn_tokens[1]) - IPP = JOURNAL.IPP or 0 - print('| {0} | {1} | {2} |'.format(journal, count, IPP)) - - # view of journals sorted by `IPP - JVIEW = [] - for journal, sid, issn, count in jview: - issn_tokens = issn.split() - try: - JOURNAL = ScopusJournal(issn_tokens[0]) - except: - JOURNAL = ScopusJournal(issn_tokens[1]) - IPP = JOURNAL.IPP or 0 - JVIEW.append([journal, count, IPP]) - JVIEW.sort(reverse=True, key=itemgetter(2)) - - print('\n\n') - print('#+attr_latex: :placement [H] :center nil') - print('#+caption: Journal publication counts' - ' for {0} sorted by IPP.'.format(label)) - print('| Journal | count | IPP |') - print('|-') - for journal, count, IPP in JVIEW[0:12]: - print('|{0}|{1}|{2}|'.format(journal, count, IPP)) - - # top cited papers - pview = [('[[{0}][{1}]]'.format(k[1], k[0][0:60]), - int(v)) - for k, v in paper_cites.items()] - pview.sort(reverse=True, key=itemgetter(1)) - - # Compute department j-index - hindex = 0 - for i, entry in enumerate(pview): - # entry is url, source_id, count - u, count = entry - if count > i + 1: - continue - else: - hindex = i + 1 - break - - print('\n\n#+attr_latex: :placement [H] :center nil') - print('#+caption: Top cited publication' - ' counts for {0}. j-index = {1}.'.format(label, hindex)) - print('| title | cite count |\n|-') - for title, count in pview[0:10]: - print('| {0} | {1} |'.format(title, count)) - - plt.figure() - plt.hist(author_count, 20) - plt.xlabel('# authors') - plt.ylabel('frequency') - plt.savefig('{0}-nauthors-per-publication.png'.format(label)) - - print('\n\n#+caption: Number of authors ' - 'on each publication for {}.'.format(label)) - print('[[./{0}-nauthors-per-publication.png]]'.format(label)) - print('''**** Bibliography :noexport: - :PROPERTIES: - :VISIBILITY: folded - :END:''') - print(scopus_search.org_summary) diff --git a/scopus/utils/__init__.py b/scopus/utils/__init__.py index 59c01b06..65fab502 100644 --- a/scopus/utils/__init__.py +++ b/scopus/utils/__init__.py @@ -1,6 +1,5 @@ from scopus.utils.constants import * from scopus.utils.create_config import * from scopus.utils.get_content import * -from scopus.utils.get_encoded_text import * from scopus.utils.parse_content import * from scopus.utils.startup import * diff --git a/scopus/utils/get_encoded_text.py b/scopus/utils/get_encoded_text.py deleted file mode 100644 index f4c3cfe5..00000000 --- a/scopus/utils/get_encoded_text.py +++ /dev/null @@ -1,33 +0,0 @@ -# Namespaces for Scopus XML -ns = {'dtd': 'http://www.elsevier.com/xml/svapi/abstract/dtd', - 'dn': 'http://www.elsevier.com/xml/svapi/abstract/dtd', - 'ait': "http://www.elsevier.com/xml/ani/ait", - 'cto': "http://www.elsevier.com/xml/cto/dtd", - 'xocs': "http://www.elsevier.com/xml/xocs/dtd", - 'ce': 'http://www.elsevier.com/xml/ani/common', - 'prism': 'http://prismstandard.org/namespaces/basic/2.0/', - 'xsi': "http://www.w3.org/2001/XMLSchema-instance", - 'dc': 'http://purl.org/dc/elements/1.1/', - 'atom': 'http://www.w3.org/2005/Atom', - 'opensearch': 'http://a9.com/-/spec/opensearch/1.1/'} - - -def get_encoded_text(container, xpath): - """Return text for element at xpath in the container xml if it is there. - - Parameters - ---------- - container : xml.etree.ElementTree.Element - The element to be searched in. - - xpath : str - The path to be looked for. - - Returns - ------- - result : str - """ - try: - return "".join(container.find(xpath, ns).itertext()) - except AttributeError: - return None diff --git a/scopus/utils/startup.py b/scopus/utils/startup.py index 9eefde34..b87b72cf 100644 --- a/scopus/utils/startup.py +++ b/scopus/utils/startup.py @@ -17,17 +17,3 @@ warnings.warn(text, UserWarning) else: config.read(CONFIG_FILE) - -# Temporary Deprecation Warnings flags -warnings.simplefilter('always', DeprecationWarning) -try: - config.add_section('Warnings') -except configparser.DuplicateSectionError: - pass -text = "This class is deprecated and its maintenance has been suspended. "\ - "Please use {}() instead. For details see https://scopus."\ - "readthedocs.io/en/where/tips.html#migration-guide-from-0-x-to-1-x." -config.set('Warnings', 'Text', text) -config.set('Warnings', 'Affiliation', '1') -config.set('Warnings', 'Author', '1') -config.set('Warnings', 'Abstract', '1')