In [48]:
from dataclasses import dataclass
from typing import List, Optional
from datetime import datetime

@dataclass
class ResearchPaper:
    """Base data model for representing a research paper"""

    id: str
    title: str
    authors: List[str]
    abstract: str
    publication_date: datetime
    pdf_url: Optional[str] = None
    categories: Optional[List[str]] = None

In [42]:
import feedparser

BASE_URL = "http://export.arxiv.org/api/query"

def search_papers(query: str, max_results: int = 10):
    """
    Search Arxiv for research papers based on a query string.

    Parameters:
        query (str): The search query (e.g., keywords, author names).
        max_results (int): The maximum number of results to return.

    Returns:
        List[ResearchPaper]: A list of ResearchPaper objects with data from Arxiv.
    """
    params = {
        "search_query": query,
        "max_results": max_results,
        "sortBy": "relevance",
        "sortOrder": "descending"
    }
    
    response = requests.get(BASE_URL, params=params)
    response.raise_for_status()
    
    return feedparser.parse(response.text)['entries']

In [43]:
papers = search_papers(query="electron", max_results=10)

In [49]:
import re

p_o = []
for p in papers:
    try:
        p_o.append(
            ResearchPaper(
                id=p['id'],
                title=p['title'],
                authors=[a['name'] for a in p['authors']],
                abstract=p['summary'],
                publication_date=datetime.strptime(p['published'], '%Y-%m-%dT%H:%M:%SZ'),
                pdf_url=p['id'].replace('/abs/', '/pdf/'),
                categories=p['arxiv_primary_category']['term']
            )
        )
    except:
        print(p)
        break

In [62]:
p_o[0]

ResearchPaper(id='http://arxiv.org/abs/cond-mat/0102536v1', title='Impact of Electron-Electron Cusp on Configuration Interaction Energies', authors=['David Prendergast', 'M. Nolan', 'Claudia Filippi', 'Stephen Fahy', 'J. C. Greer'], abstract='The effect of the electron-electron cusp on the convergence of configuration\ninteraction (CI) wave functions is examined. By analogy with the\npseudopotential approach for electron-ion interactions, an effective\nelectron-electron interaction is developed which closely reproduces the\nscattering of the Coulomb interaction but is smooth and finite at zero\nelectron-electron separation. The exact many-electron wave function for this\nsmooth effective interaction has no cusp at zero electron-electron separation.\nWe perform CI and quantum Monte Carlo calculations for He and Be atoms, both\nwith the Coulomb electron-electron interaction and with the smooth effective\nelectron-electron interaction. We find that convergence of the CI expansion of\nthe 

In [39]:
papers['entries'][0]

{'id': 'http://arxiv.org/abs/cond-mat/0102536v1',
 'guidislink': True,
 'link': 'http://arxiv.org/abs/cond-mat/0102536v1',
 'updated': '2001-02-28T20:12:09Z',
 'updated_parsed': time.struct_time(tm_year=2001, tm_mon=2, tm_mday=28, tm_hour=20, tm_min=12, tm_sec=9, tm_wday=2, tm_yday=59, tm_isdst=0),
 'published': '2001-02-28T20:12:09Z',
 'published_parsed': time.struct_time(tm_year=2001, tm_mon=2, tm_mday=28, tm_hour=20, tm_min=12, tm_sec=9, tm_wday=2, tm_yday=59, tm_isdst=0),
 'title': 'Impact of Electron-Electron Cusp on Configuration Interaction Energies',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': '',
  'value': 'Impact of Electron-Electron Cusp on Configuration Interaction Energies'},
 'summary': 'The effect of the electron-electron cusp on the convergence of configuration\ninteraction (CI) wave functions is examined. By analogy with the\npseudopotential approach for electron-ion interactions, an effective\nelectron-electron interaction is developed which