# XML to CSV

To receive some data entries to work with in the database, I use [https://export.arxiv.org](https://export.arxiv.org) via the query endpoint.

In [1]:
n_entries = 300

url = f"""
    https://export.arxiv.org/api/query?search_query=cat:cs.*&
    start=0&
    max_results={n_entries}&
    sortBy=submittedDate&
    sortOrder=descending
"""

# Write the content of the url query to a file
import requests

response = requests.get(url)
with open('arxiv_papers.xml', 'w') as file:
    file.write(response.text)

In [2]:
import xml.etree.ElementTree as ET
import csv

# Parse the XML file
tree = ET.parse('arxiv_papers.xml')
root = tree.getroot()

# Open a new CSV file to write
with open('arxiv_papers.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    
    # Write header
    csvwriter.writerow(['title', 'author', 'summary', 'published'])
    
    # Extract data
    for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):
        title = entry.find('{http://www.w3.org/2005/Atom}title').text
        author = ', '.join([author.find('{http://www.w3.org/2005/Atom}name').text for author in entry.findall('{http://www.w3.org/2005/Atom}author')])
        summary = entry.find('{http://www.w3.org/2005/Atom}summary').text
        published = entry.find('{http://www.w3.org/2005/Atom}published').text
        
        # Write row to CSV
        csvwriter.writerow([title, author, summary, published])