In [2]:
import os
import requests
import arxiv

def download_arxiv_pdfs(query, max_results=10, output_folder="arxiv_papers"):
    """
    Fetches papers from arXiv and downloads their PDFs.

    Args:
        query (str): Search term for arXiv.
        max_results (int): Number of papers to fetch.
        output_folder (str): Directory to save PDFs.

    Returns:
        list of dict: Each dict contains paper title, authors, published date, pdf_path, etc.
    """
    os.makedirs(output_folder, exist_ok=True)

    # Search papers
    search = arxiv.Search(
        query=query,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.SubmittedDate
    )

    papers = []
    for result in search.results():
        title = result.title
        pdf_url = result.pdf_url
        paper_id = pdf_url.split("/")[-1]
        file_name = f"{paper_id}.pdf"
        save_path = os.path.join(output_folder, file_name)

        # Download if not already present
        if not os.path.exists(save_path):
            response = requests.get(pdf_url, stream=True)
            with open(save_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            print(f" Downloaded: {title}")
        else:
            print(f" Skipped (already exists): {title}")

        papers.append({
            "title": title,
            "authors": [author.name for author in result.authors],
            "published": result.published,
            "categories": result.categories,
            "pdf_path": save_path
        })

    return papers
