In [4]:
import requests
from bs4 import BeautifulSoup
from PyPDF2 import PdfReader
import io
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

def fetch_arxiv_data(search_query, max_results=10, start_date=None, end_date=None, primary_category=None, categories=None):
    api_url = "http://export.arxiv.org/api/query"
    params = {
        "search_query": search_query,
        "start": 0,
        "max_results": max_results,
    }
    if start_date:
        params["start_date"] = start_date
    if end_date:
        params["end_date"] = end_date
    if primary_category:
        params["cat"] = primary_category
    if categories:
        params["categories"] = categories

    response = requests.get(api_url, params=params)
    if response.status_code == 200:
        feed = BeautifulSoup(response.content, features="html.parser")
        entries = feed.find_all('entry')
        articles = []

        for entry in entries:
            article = {}
            article['Title'] = entry.title.text
            article['Authors'] = [author.find('name').text for author in entry.find_all('author')]
            article['Published'] = entry.published.text
            article['Updated'] = entry.updated.text
            article['Summary'] = entry.summary.text.strip()
            article['ID'] = entry.id.text
            articles.append(article)

        df = pd.DataFrame(articles)
        return df
    else:
        print("Failed to retrieve data from arXiv API")
        return None

def download_pdf_from_link(link):
    response = requests.get(link, stream=True)
    if response.status_code == 200:
        return io.BytesIO(response.content)
    else:
        print(f"Failed to download PDF from {link}")
        return None

def extract_text_from_pdf(pdf_io):
    if pdf_io:
        try:
            reader = PdfReader(pdf_io)
            text = ""
            for page in reader.pages:
                text += page.extract_text() + "\n"
            return text
        except Exception as e:
            print(f"Error occurred while extracting text from PDF: {str(e)}")
            return ""

def process_pdf_links(df):
    df['PDF_Text'] = ''
    for index, row in df.iterrows():
        link = row['ID']
        # We need to convert the '/abs/' URL to a '/pdf/' URL and ensure it ends with '.pdf'
        link = link.replace('abs', 'pdf')
        if not link.endswith('.pdf'):
            link += '.pdf'
        print(f"Processing link: {link}")
        pdf_io = download_pdf_from_link(link)
        if pdf_io:
            text = extract_text_from_pdf(pdf_io)
            df.at[index, 'PDF_Text'] = text
    return df



In [6]:
# Example usage
df = fetch_arxiv_data(search_query="quantum physics", max_results=10, start_date="2023-01-01", end_date="2023-12-31", primary_category="quant-ph", categories=["quant-ph", "cond-mat"])
df_with_pdf_text = process_pdf_links(df)
df_with_pdf_text

Processing link: http://arxiv.org/pdf/quant-ph/0302169v1.pdf
Processing link: http://arxiv.org/pdf/1212.4177v1.pdf
Processing link: http://arxiv.org/pdf/1504.03207v1.pdf
Processing link: http://arxiv.org/pdf/2208.08064v1.pdf
Processing link: http://arxiv.org/pdf/cond-mat/0601285v1.pdf
Processing link: http://arxiv.org/pdf/0811.2516v1.pdf
Processing link: http://arxiv.org/pdf/1807.11019v1.pdf
Processing link: http://arxiv.org/pdf/quant-ph/0201082v1.pdf
Processing link: http://arxiv.org/pdf/quant-ph/0309066v1.pdf
Processing link: http://arxiv.org/pdf/quant-ph/0504224v1.pdf


Unnamed: 0,Title,Authors,Published,Updated,Summary,ID,PDF_Text
0,Nonlinear Dynamics In Quantum Physics -- Quant...,[H. Kröger],2003-02-21T20:20:47Z,2003-02-21T20:20:47Z,We discuss the recently proposed quantum actio...,http://arxiv.org/abs/quant-ph/0302169v1,arXiv:quant-ph/0302169v1 21 Feb 2003PROCEEDIN...
1,Quantum spherical model,[I. Lyberg],2012-12-17T22:06:56Z,2012-12-17T22:06:56Z,"We define a ""quantum spherical model"", a quant...",http://arxiv.org/abs/1212.4177v1,arXiv:1212.4177v1 [math-ph] 17 Dec 2012A “qu...
2,Can classical physics agree with quantum physi...,[Michele Marrocco],2015-04-13T15:04:26Z,2015-04-13T15:04:26Z,Classical physics fails where quantum physics ...,http://arxiv.org/abs/1504.03207v1,1 Can classical physics agree with quant um p...
3,The Physics of Quantum Information,[John Preskill],2022-08-17T04:35:36Z,2022-08-17T04:35:36Z,Rapid ongoing progress in quantum information ...,http://arxiv.org/abs/2208.08064v1,arXiv:2208.08064v1 [quant-ph] 17 Aug 2022Aug...
4,Topologization of electron liquids with Chern-...,[Zhenghan Wang],2006-01-13T09:06:10Z,2006-01-13T09:06:10Z,"We discuss a nexus among quantum topology, qua...",http://arxiv.org/abs/cond-mat/0601285v1,arXiv:cond-mat/0601285v1 [cond-mat.mes-hall] ...
5,"Operational Quantum Mechanics, Quantum Axiomat...",[Diederik Aerts],2008-11-15T18:37:02Z,2008-11-15T18:37:02Z,"The role of operational quantum mechanics, qua...",http://arxiv.org/abs/0811.2516v1,arXiv:0811.2516v1 [physics.hist-ph] 15 Nov 2...
6,Universal Uncertainty Principle in Different Q...,"[C. Huang, Yong-Chang Huang]",2018-07-29T07:50:32Z,2018-07-29T07:50:32Z,This paper deduces universal uncertainty princ...,http://arxiv.org/abs/1807.11019v1,1 \n Universal Uncertainty Principle in Diff...
7,Quantum Computers and Quantum Computer Languag...,[Stephen Blaha],2002-01-18T15:08:05Z,2002-01-18T15:08:05Z,We show a representation of Quantum Computers ...,http://arxiv.org/abs/quant-ph/0201082v1,QQuuaannttuumm CCoommppuutteerrss aanndd QQ...
8,Probabilistic foundations of quantum mechanics...,[Andrei Khrennikov],2003-09-08T09:37:27Z,2003-09-08T09:37:27Z,We discuss foundation of quantum mechanics (in...,http://arxiv.org/abs/quant-ph/0309066v1,arXiv:quant-ph/0309066v1 8 Sep 2003Probabilis...
9,From quantum graphs to quantum random walks,[Gregor Tanner],2005-04-29T09:41:00Z,2005-04-29T09:41:00Z,We give a short overview over recent developme...,http://arxiv.org/abs/quant-ph/0504224v1,arXiv:quant-ph/0504224v1 29 Apr 2005From quan...


In [8]:
print(df_with_pdf_text['PDF_Text'].iloc[0])


arXiv:quant-ph/0302169v1  21 Feb 2003PROCEEDINGS OF THE TENTH INTERNATIONAL CONFERENCE ON
COMPUTATIONAL AND APPLIED MATHEMATICS
July 22 – 26, 2002, Leuven, Belgium pp.1–10
NONLINEAR DYNAMICS IN QUANTUM PHYSICS –
QUANTUM CHAOS AND QUANTUM INSTANTONS
Helmut Kr ¨oger
D´ epartement de Physique
Universit´ e Laval
Qu´ ebec, Qu´ ebec G1K 7P4, Canada
Abstract. We discuss the recently proposed quantum action - its interp retation, its moti-
vation, its mathematical properties and its use in physics: quantum mechanical tunneling,
quantum instantons and quantum chaos.
1.Introduction. Modern physics returns to some of its origins dating back
to the ﬁrst part of the last century. Examples are entanglement, according to
Schr¨ odinger themost peculiar property occuring inquantum mech anics, orthe
condensationofvery coldatomspredictedbyEinsteinandBose(Bo se-Einstein
condensate). Another example is nonlinear dynamics and chaos, da ting back
to the work of Poincar´ e and others and its modern descendent