In [29]:
import urllib
import time
import feedparser

In [None]:
# Simple Query Read
with urllib.request.urlopen(base_url+'search_query=all:electron&start=0&max_results=1') as url:
      r = url.read()
#print(r)

In [10]:
# Base api query url
base_url = 'http://export.arxiv.org/api/query?';

In [11]:
# Search parameters
search_query = 'all:electron' # search for electron in all fields
start = 0                     # retreive the first 5 results
max_results = 5

query = 'search_query=%s&start=%i&max_results=%i' % (search_query,
                                                     start,
                                                     max_results)

In [12]:
# Opensearch metadata such as totalResults, startIndex, 
# and itemsPerPage live in the opensearch namespase.
# Some entry metadata lives in the arXiv namespace.
# This is a hack to expose both of these namespaces in
# feedparser v4.1
#feedparser._FeedParserMixin.namespaces['http://a9.com/-/spec/opensearch/1.1/'] = 'opensearch'
#feedparser._FeedParserMixin.namespaces['http://arxiv.org/schemas/atom'] = 'arxiv'

d = feedparser.parse('http://arxiv.org/schemas/atom')
d.namespaces

{'': 'http://www.w3.org/2001/XMLSchema', 'a': 'http://arxiv.org/schemas/atom'}

In [14]:
# perform a GET request using the base_url and query
response = urllib.request.urlopen(base_url+query).read()

In [17]:
# parse the response using feedparser
feed = feedparser.parse(response)
feed

{'bozo': False,
 'entries': [{'id': 'http://arxiv.org/abs/cond-mat/0102536v1',
   'guidislink': True,
   'link': 'http://arxiv.org/abs/cond-mat/0102536v1',
   'updated': '2001-02-28T20:12:09Z',
   'updated_parsed': time.struct_time(tm_year=2001, tm_mon=2, tm_mday=28, tm_hour=20, tm_min=12, tm_sec=9, tm_wday=2, tm_yday=59, tm_isdst=0),
   'published': '2001-02-28T20:12:09Z',
   'published_parsed': time.struct_time(tm_year=2001, tm_mon=2, tm_mday=28, tm_hour=20, tm_min=12, tm_sec=9, tm_wday=2, tm_yday=59, tm_isdst=0),
   'title': 'Impact of Electron-Electron Cusp on Configuration Interaction Energies',
   'title_detail': {'type': 'text/plain',
    'language': None,
    'base': '',
    'value': 'Impact of Electron-Electron Cusp on Configuration Interaction Energies'},
   'summary': 'The effect of the electron-electron cusp on the convergence of configuration\ninteraction (CI) wave functions is examined. By analogy with the\npseudopotential approach for electron-ion interactions, an effect

In [22]:
# print out feed information
print ('Feed title: %s' % feed.feed.title)
print ('Feed last updated: %s' % feed.feed.updated)

Feed title: ArXiv Query: search_query=all:electron&amp;id_list=&amp;start=0&amp;max_results=5
Feed last updated: 2023-05-11T00:00:00-04:00


In [24]:
# print opensearch metadata
print ('totalResults for this query: %s' % feed.feed.opensearch_totalresults)
print ('itemsPerPage for this query: %s' % feed.feed.opensearch_itemsperpage)
print ('startIndex for this query: %s'   % feed.feed.opensearch_startindex)

totalResults for this query: 195950
itemsPerPage for this query: 5
startIndex for this query: 0


In [28]:
# Run through each entry, and print out information
for entry in feed.entries:
    print ('e-print metadata')
    print ('arxiv-id: %s' % entry.id.split('/abs/')[-1])
    print ('Published: %s' % entry.published)
    print ('Title:  %s' % entry.title)
    
    # feedparser v4.1 only grabs the first author
    author_string = entry.author
    
    # grab the affiliation in <arxiv:affiliation> if present
    # - this will only grab the first affiliation encountered
    #   (the first affiliation for the first author)
    # Please email the list with a way to get all of this information!
    try:
        author_string += ' (%s)' % entry.arxiv_affiliation
    except AttributeError:
        pass
    
    print ('Last Author:  %s' % author_string)
    
    # feedparser v5.0.1 correctly handles multiple authors, print them all
    try:
        print ('Authors:  %s' % ', '.join(author.name for author in entry.authors))
    except AttributeError:
        pass

    # get the links to the abs page and pdf for this e-print
    for link in entry.links:
        if link.rel == 'alternate':
            print ('abs page link: %s' % link.href)
        elif link.title == 'pdf':
            print ('pdf link: %s' % link.href)
    
    # The journal reference, comments and primary_category sections live under 
    # the arxiv namespace
    try:
        journal_ref = entry.arxiv_journal_ref
    except AttributeError:
        journal_ref = 'No journal ref found'
    print ('Journal reference: %s' % journal_ref)
    
    try:
        comment = entry.arxiv_comment
    except AttributeError:
        comment = 'No comment found'
    print ('Comments: %s' % comment)
    
    # Since the <arxiv:primary_category> element has no data, only
    # attributes, feedparser does not store anything inside
    # entry.arxiv_primary_category
    # This is a dirty hack to get the primary_category, just take the
    # first element in entry.tags.  If anyone knows a better way to do
    # this, please email the list!
    print ('Primary Category: %s' % entry.tags[0]['term'])
    
    # Lets get all the categories
    all_categories = [t['term'] for t in entry.tags]
    print ('All Categories: %s' % (', ').join(all_categories))
    
    # The abstract is in the <summary> element
    print ('Abstract: %s' %  entry.summary)

e-print metadata
arxiv-id: cond-mat/0102536v1
Published: 2001-02-28T20:12:09Z
Title:  Impact of Electron-Electron Cusp on Configuration Interaction Energies
Last Author:  J. C. Greer (NMRC, University College, Cork, Ireland)
Authors:  David Prendergast, M. Nolan, Claudia Filippi, Stephen Fahy, J. C. Greer
abs page link: http://arxiv.org/abs/cond-mat/0102536v1
pdf link: http://arxiv.org/pdf/cond-mat/0102536v1
Journal reference: J. Chem. Phys. 115, 1626 (2001)
Comments: 11 pages, 6 figures, 3 tables, LaTeX209, submitted to The Journal of
  Chemical Physics
Primary Category: cond-mat.str-el
All Categories: cond-mat.str-el
Abstract: The effect of the electron-electron cusp on the convergence of configuration
interaction (CI) wave functions is examined. By analogy with the
pseudopotential approach for electron-ion interactions, an effective
electron-electron interaction is developed which closely reproduces the
scattering of the Coulomb interaction but is smooth and finite at zero
electron-

In [None]:
## Paging Example

In [30]:
# Search parameters
search_query = 'all:biophysics' # search for biophysics in all fields
start = 0                       # start at the first result
total_results = 20              # want 20 total results
results_per_iteration = 5       # 5 results at a time
wait_time = 3                   # number of seconds to wait beetween calls

In [31]:
print ('Searching arXiv for %s' % search_query)

Searching arXiv for all:biophysics


In [34]:
for i in range(start,total_results,results_per_iteration):
    
    print ("Results %i - %i" % (i,i+results_per_iteration))
    
    query = 'search_query=%s&start=%i&max_results=%i' % (search_query,
                                                         i,
                                                        results_per_iteration)

    # perform a GET request using the base_url and query
    response = urllib.request.urlopen(base_url+query).read()

    # parse the response using feedparser
    feed = feedparser.parse(response)

    # Run through each entry, and print out information
    for entry in feed.entries:
        print ('arxiv-id: %s' % entry.id.split('/abs/')[-1])
        print ('Title:  %s' % entry.title)
        # feedparser v4.1 only grabs the first author
        print ('First Author:  %s' % entry.author)
    
    # Remember to play nice and sleep a bit before you call
    # the api again!
    print ('Sleeping for %i seconds' % wait_time) 
    time.sleep(wait_time)

Results 0 - 5
arxiv-id: 2303.14456v1
Title:  The African Biophysics Landscape: A Provisional Status Report
First Author:  Lawrence Norris
arxiv-id: 1303.0453v1
Title:  Biophysics software for interdisciplinary education and research
First Author:  J. M. Deutsch
arxiv-id: 1307.1009v1
Title:  Systems Biophysics of Gene Expression
First Author:  Leonor Saiz
arxiv-id: 1402.6330v2
Title:  Black Holes and Biophysical (Mem)-branes
First Author:  Troels Harmark
arxiv-id: 1510.04919v1
Title:  The Twilight of Determinism: At Least in Biophysical Novelties
First Author:  Amihud Gilead
Sleeping for 3 seconds
Results 5 - 10
arxiv-id: 1902.06557v1
Title:  Decomposing multispectral face images into diffuse and specular shading
  and biophysical parameters
First Author:  William A. P. Smith
arxiv-id: 1905.02007v1
Title:  Axonal Computations
First Author:  Ahmed El Hady
arxiv-id: 2006.10113v2
Title:  From biophysical to integrate-and-fire modelling
First Author:  Rodolphe Sepulchre
arxiv-id: 2103.17131