In [1]:
import numpy as np
import json
from pylatexenc.latexencode import unicode_to_latex

In [2]:
import inspyhep
from inspyhep.inspires_tools import InspiresRecord

In [12]:
import requests
class Author():
    def __init__(self, identifier, max_papers=1000):
        """ Author()

            Parameters
            ----------
            identifier : str
                the author's identifier string (e.g. 'Steven.Weinberg.1')
            max_papers : int, optional
                Number of papers requested from INSPIRE-HEP, by default 1000


            Modified from
                * https://github.com/efranzin/python 
                * https://github.com/motloch/track_inspire-hep_citations

        """

        self.identifier    = identifier
        self.max_papers    = max_papers
        self.max_title_length = 100

        # Query Inspire-HEP for author's information
        _inspire_query = 'https://inspirehep.net/api/literature?sort=mostrecent'
        self.author_query = f'{_inspire_query}&size={self.max_papers}&q=a%20{self.identifier}'

        self.full_record = self.get_full_records_from_query(self.author_query)
        self.full_json_records = json.loads(self.full_record)

        # how many records found?
        self.num_hits = self.full_json_records['hits']['total']

        # Fill in information about author's papers from the website response
        self.get_records_dict(self.full_json_records)

        # total number of citations
        self.citations = self.get_total_number_of_citations(self.inspires_records, )
        self.citations_noself = self.get_total_number_of_citations(self.inspires_records, cite_self=False)


    def get_total_number_of_citations(self, records: dict, cite_self=True) -> int:
        """ get_total_number_of_citations

        Parameters
        ----------
        records : dict
            the dictionary with asll the InspiresRecord instances
        cite_self : bool, optional
            if True, count self citations, otherwise do not. By default True

        Returns
        -------
        int
            total number of citations of the author
        """
        count = 0
        for record in records.values():
            if cite_self:
                count += record.citation_count
            else:
                count += record.ins_citation_count_without_self_citations
        return count

    def get_records_dict(self, json_records) -> dict:
        """get_record_json get a dictionary of all inspire records for this author

        Parameters
        ----------
        json_record : str
            str with json output of inspires query

        Returns
        -------
        dict
            a dictionary with keys containing instances of the InspireRecord class,
            accessible with inspire texkeys (e.g., dic['weinberd:2002abc'])
        """
        self.inspires_records = {}
        for record in json_records['hits']['hits']:
            r = InspiresRecord(record['metadata'])
            self.inspires_records[f'{r.texkey}'] = r
        return self.inspires_records

    def get_full_records_from_query(self, query) -> str:
        """get_full_record_from_query get the full result of the author query to Inspires

        Parameters
        ----------
        query : str
            url string with the author query following Inspires API

        Returns
        -------
        str
            full string output from the Inpires query
        """

        # Load the full record of the author
        response = requests.get(self.author_query)
        if response.status_code == 200:
            return (response.content).decode("utf-8")
        else:
            print(f"Could not find Inspire entry for author identified = {self.identifier}.")
            return None


In [14]:
me = Author('M.Hostert.1')

In [17]:
me.citations_noself

1047

# Importing data directly 

In [8]:
with open('example_inspire_record.json', 'r') as file:
    data = json.load(file)

In [11]:
data['hits']['hits'][0]['metadata']['texkeys'][0]

'Batell:2022xau'

In [321]:
rec = InspireRecord(data['hits']['hits'][14]['metadata'])
unicode_to_latex(rec)

'Arg\\"uelles, Foppiani, Hostert, Phys.Rev.D 105 (2022) 9 095006, 2022, arXiv:2109.03831 [hep-ph].'

In [300]:
rec.authors_

['Arg\\"uelles', 'Foppiani', 'Hostert']

In [262]:
for record in data['hits']['hits']:
    if record['metadata']['document_type'][0] == 'proceedings':
        print(record['metadata'].keys())
    

dict_keys(['report_numbers', 'authors', 'publication_info', 'documents', 'citation_count', 'citation_count_without_self_citations', 'citeable', '$schema', 'keywords', 'references', 'number_of_pages', 'legacy_creation_date', 'preprint_date', 'author_count', 'public_notes', 'earliest_date', 'external_system_identifiers', 'facet_author_name', 'core', 'license', 'journal_title_variants', 'arxiv_eprints', 'referenced_authors_bais', 'figures', 'legacy_version', 'inspire_categories', 'urls', 'first_author', 'control_number', 'dois', 'document_type', 'texkeys', 'abstracts', 'primary_arxiv_category', 'titles', 'imprints', 'curated'])


In [226]:
i=40
print(data['hits']['hits'][i]['metadata']['document_type'][0],'\n')

for key in data['hits']['hits'][i]['metadata'].keys():
    print(key, type(data['hits']['hits'][i]['metadata'][key]))
        



conference paper 

authors <class 'list'>
publication_info <class 'list'>
documents <class 'list'>
citation_count <class 'int'>
citation_count_without_self_citations <class 'int'>
citeable <class 'bool'>
$schema <class 'str'>
keywords <class 'list'>
references <class 'list'>
number_of_pages <class 'int'>
referenced_authors_bais <class 'list'>
legacy_version <class 'str'>
inspire_categories <class 'list'>
legacy_creation_date <class 'str'>
author_count <class 'int'>
urls <class 'list'>
first_author <class 'dict'>
control_number <class 'int'>
dois <class 'list'>
earliest_date <class 'str'>
document_type <class 'list'>
texkeys <class 'list'>
abstracts <class 'list'>
titles <class 'list'>
facet_author_name <class 'list'>
core <class 'bool'>
license <class 'list'>
imprints <class 'list'>
curated <class 'bool'>
journal_title_variants <class 'list'>


C. A. Argüelles, I. Esteban, M. Hostert, Kevin J. Kelly, J. Kopp, P. A. N. Machado, I. Martinez-Soler, Y. F. Perez-Gonzalez
Argüelles, C.A. and Esteban, I. and Hostert, M. and Kelly, Kevin J. and Kopp, J. and Machado, P.A.N. and Martinez-Soler, I. and Perez-Gonzalez, Y.F.
