# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

M. Samland  ->  M. Samland  |  ['M. Samland']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']


S. Belladitta  ->  S. Belladitta  |  ['S. Belladitta']
J. Wolf  ->  D. J. Wolf  |  ['J. Wolf']
Arxiv has 55 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/3 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2509.08044


extracting tarball to tmp_2509.08044... done.




Issues with the citations
list index out of range
Retrieving document from  https://arxiv.org/e-print/2509.08172
extracting tarball to tmp_2509.08172... done.
Retrieving document from  https://arxiv.org/e-print/2509.08793


extracting tarball to tmp_2509.08793... done.




Issues with the citations
list index out of range


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2509.08044-b31b1b.svg)](https://arxiv.org/abs/2509.08044) | **spherical: A Comprehensive Database and Automated Pipeline for VLT/SPHERE High-Contrast Imaging**  |
|| <mark>M. Samland</mark> |
|*Appeared on*| *2025-09-11*|
|*Comments*| *4 pages, 0 figures, submitted to JOSS*|
|**Abstract**|            I present spherical (this https URL), a software package and database designed for the ESO VLT/SPHERE high-contrast imager. SPHERE has produced the world's largest archive of direct imaging observations of exoplanets and circumstellar disks, but its heterogeneous metadata and fragmented reduction tools make end-to-end analysis labor-intensive. spherical addresses this by combining (1) a curated, regularly updated, and searchable database of all SPHERE observations, cross-matched with stellar properties and observing conditions, and (2) a Python-based, script-driven pipeline for the Integral Field Spectrograph (IFS). The database, archived on Zenodo (this https URL) and reproducible from the ESO archive, currently includes about 6000 IRDIS dual-band imaging, about 1000 IRDIS polarimetric, and about 4500 IFS sequences, with additional modes (ZIMPOL, IRDIS-LSS, SAM) planned. The pipeline automates raw data retrieval, calibration, and IFS reduction with the adapted open-source CHARIS instrument pipeline, followed by astrometric and photometric calibration and post-processing with TRAP for companion detection and spectral extraction. spherical lowers the barrier from raw files to science-ready products, enabling homogeneous population studies, atmospheric characterization of companions, and efficient survey follow-up, while remaining interoperable with community tools such as VIP, pyKLIP, and IRDAP.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2509.08793-b31b1b.svg)](https://arxiv.org/abs/2509.08793) | **Frequent Extreme Galaxy-scale Outflows among Luminous Early Quasars**  |
|| W. Liu, et al. -- incl., <mark>S. Belladitta</mark>, <mark>J. Wolf</mark> |
|*Appeared on*| *2025-09-11*|
|*Comments*| *24 pages, 6 figures. Nature in review. This manuscript represents the original submitted version following journal policy. Comments are welcome!*|
|**Abstract**|            The existence of abundant post-starburst/quiescent galaxies just $\sim$1-2 Gyrs after the Big Bang challenges our current paradigm of galaxy evolution. Cosmological simulations suggest that quasar feedback is likely the most promising mechanism responsible for such rapid quenching. Here we report a high detection rate (6/27) of exceptionally fast and powerful galaxy-scale outflows traced by [O III] emission in z $\sim$ 5-6 luminous quasars as revealed by the James Webb Space Telescope (JWST), with velocity up to $\sim$8400 km s$^{-1}$ and order-of-magnitude kinetic energy outflow rates up to $\sim$260% the observed quasar bolometric luminosities. This fraction is $\gtrsim$6.6 times larger than that in luminosity-matched comparison samples at z $\sim$ 1.5-3.5 (0/58) and z $<$ 1 (5/148). These extreme outflows are comparable to or even faster than the most rapid [O III] outflows reported at z $\lesssim$ 3, and could reach the circumgalactic medium (CGM) or even the intergalactic medium (IGM). The average kinetic energy outflow rate of our entire sample is $\sim$230$\times$ higher than that at cosmic noon. The substantially higher frequency of outflows with energetics well above the threshold for negative feedback in our sample strongly suggests that quasar feedback plays a significant role in efficiently quenching/regulating early massive galaxies.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2509.08172-b31b1b.svg)](https://arxiv.org/abs/2509.08172) | **Short GRB 090510: a magnetized neutron star binary merger leading to a black hole**  |
|| J. A. Rueda, R. Ruffini, <mark>Y. Wang</mark> |
|*Appeared on*| *2025-09-11*|
|*Comments*| *Accepted for publication in the Journal of High Energy Astrophysics (JHEAP)*|
|**Abstract**|            We model the short gamma-ray bursts (GRB) 090510 as the product of a magnetized neutron star (NS) binary merger. Accounting for the NS critical mass constraint given by the mass of PSR J0952--0607, we infer that GRB 090510 was a highly-magnetized NS-NS merger that left as remnant a Kerr black hole (BH) of $2.4 M_\odot$ with a low-mass accretion disk. The gamma-ray precursor is powered by the magnetic energy released during the merger of the NSs. The prompt emission originates at the transparency of an ultra-relativistic $e^+e^-$ pair-plasma produced by the overcritical electric field induced by the rotating strong magnetic field around the merged object before it reaches the critical mass, the GeV emission by the extractable energy of the newborn BH, and the X-ray afterglow by accretion onto it. We derive the masses of the merging NSs, their magnetic fields, the BH mass, spin, and irreducible mass, the strength of the magnetic field, the disk mass, and obtain an estimate of the gravitational-wave emission during the merger phase preceding the prompt short GRB emission. The inferred parameters agree with up-to-date numerical relativity simulations, confirming that strong magnetic fields above $10^{14}$ G develop in NS-NS mergers and that mergers leading to a central BH remnant have low-mass disks of $\sim 10^{-2} M_\odot$. We also advance the possibility that quasi-period oscillations of tens of Hz of frequency due to Lense-Thirring precession of the matter surrounding the merged object before BH formation can explain the successive spikes following the prompt emission peak.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures []
exported in  _build/html/2509.08044.md
found figures ['tmp_2509.08793/./J1620+52022panel.png', 'tmp_2509.08793/./J0759+18002panel.png', 'tmp_2509.08793/./J0829+03032panel.png', 'tmp_2509.08793/./J0840+56242panel.png', 'tmp_2509.08793/./J0859+25202panel.png', 'tmp_2509.08793/./J1141+71192panel.png', 'tmp_2509.08793/./spec_compare_outflow_zoom.png', 'tmp_2509.08793/./o3prof_compare_outflow_zoom.png', 'tmp_2509.08793/./cumulative_v98.png', 'tmp_2509.08793/./cumulative_w90.png']
copying  tmp_2509.08793/./J1620+52022panel.png to _build/html/
copying  tmp_2509.08793/./J0759+18002panel.png to _build/html/
copying  tmp_2509.08793/./J0829+03032panel.png to _build/html/
copying  tmp_2509.08793/./J0840+56242panel.png to _build/html/
copying  tmp_2509.08793/./J0859+25202panel.png to _build/html/
copying  tmp_2509.08793/./J1141+71192panel.png to _build/html/
copying  tmp_2509.08793/./spec_compare_outflow_zoom.png to _build/html/
copying  tmp_2509.08793/./o3prof_compare_outflow_zoom

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\tightlist}{$
$  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}$
$\newcommand{\CSLBlock}[1]{\hfill\break\parbox[t]{\linewidth}{\strut\ignorespaces#1\strut}}$
$\newcommand{\CSLLeftMargin}[1]{\parbox[t]{\csllabelwidth}{\strut#1\strut}}$
$\newcommand{\CSLRightInline}[1]{\parbox[t]{\linewidth - \csllabelwidth}{\strut#1\strut}}$
$\newcommand{\CSLIndent}[1]{\hspace{\cslhangindent}#1}$
$\newcommand{\rorlogo}{$
$\begin{tikzpicture}[y=1cm, x=1cm, yscale=\rorglobalscale,xscale=\rorglobalscale, every node/.append style={scale=\rorglobalscale}, inner sep=0pt, outer sep=0pt]$
$  \begin{scope}[even odd rule,line join=round,miter limit=2.0,shift={(-0.025, 0.0216)}]$
$    \path[fill=c53baa1,nonzero rule,line join=round,miter limit=2.0] (1.8164, 3.012) -- (1.4954, 2.5204) -- (1.1742, 3.012) -- (1.8164, 3.012) -- cycle;$
$    \path[fill=c53baa1,nonzero rule,line join=round,miter limit=2.0] (3.1594, 3.012) -- (2.8385, 2.5204) -- (2.5172, 3.012) -- (3.1594, 3.012) -- cycle;$
$    \path[fill=c53baa1,nonzero rule,line join=round,miter limit=2.0] (1.1742, 0.0669) -- (1.4954, 0.5588) -- (1.8164, 0.0669) -- (1.1742, 0.0669) -- cycle;$
$    \path[fill=c53baa1,nonzero rule,line join=round,miter limit=2.0] (2.5172, 0.0669) -- (2.8385, 0.5588) -- (3.1594, 0.0669) -- (2.5172, 0.0669) -- cycle;$
$    \path[fill=c202826,nonzero rule,line join=round,miter limit=2.0] (3.8505, 1.4364).. controls (3.9643, 1.4576) and (4.0508, 1.5081) .. (4.1098, 1.5878).. controls (4.169, 1.6674) and (4.1984, 1.7642) .. (4.1984, 1.8777).. controls (4.1984, 1.9719) and (4.182, 2.0503) .. (4.1495, 2.1132).. controls (4.1169, 2.1762) and (4.0727, 2.2262) .. (4.0174, 2.2635).. controls (3.9621, 2.3006) and (3.8976, 2.3273) .. (3.824, 2.3432).. controls (3.7505, 2.359) and (3.6727, 2.367) .. (3.5909, 2.367) -- (2.9676, 2.367) -- (2.9676, 1.8688).. controls (2.9625, 1.8833) and (2.9572, 1.8976) .. (2.9514, 1.9119).. controls (2.9083, 2.0164) and (2.848, 2.1056) .. (2.7705, 2.1791).. controls (2.6929, 2.2527) and (2.6014, 2.3093) .. (2.495, 2.3487).. controls (2.3889, 2.3881) and (2.2728, 2.408) .. (2.1468, 2.408).. controls (2.0209, 2.408) and (1.905, 2.3881) .. (1.7986, 2.3487).. controls (1.6925, 2.3093) and (1.6007, 2.2527) .. (1.5232, 2.1791).. controls (1.4539, 2.1132) and (1.3983, 2.0346) .. (1.3565, 1.9436).. controls (1.3504, 2.009) and (1.3351, 2.0656) .. (1.3105, 2.1132).. controls (1.2779, 2.1762) and (1.2338, 2.2262) .. (1.1785, 2.2635).. controls (1.1232, 2.3006) and (1.0586, 2.3273) .. (0.985, 2.3432).. controls (0.9115, 2.359) and (0.8337, 2.367) .. (0.7519, 2.367) -- (0.1289, 2.367) -- (0.1289, 0.7562) -- (0.4837, 0.7562) -- (0.4837, 1.4002) -- (0.6588, 1.4002) -- (0.9956, 0.7562) -- (1.4211, 0.7562) -- (1.0118, 1.4364).. controls (1.1255, 1.4576) and (1.2121, 1.5081) .. (1.2711, 1.5878).. controls (1.2737, 1.5915) and (1.2761, 1.5954) .. (1.2787, 1.5991).. controls (1.2782, 1.5867) and (1.2779, 1.5743) .. (1.2779, 1.5616).. controls (1.2779, 1.4327) and (1.2996, 1.3158) .. (1.3428, 1.2113).. controls (1.3859, 1.1068) and (1.4462, 1.0176) .. (1.5237, 0.944).. controls (1.601, 0.8705) and (1.6928, 0.8139) .. (1.7992, 0.7744).. controls (1.9053, 0.735) and (2.0214, 0.7152) .. (2.1474, 0.7152).. controls (2.2733, 0.7152) and (2.3892, 0.735) .. (2.4956, 0.7744).. controls (2.6016, 0.8139) and (2.6935, 0.8705) .. (2.771, 0.944).. controls (2.8482, 1.0176) and (2.9086, 1.1068) .. (2.952, 1.2113).. controls (2.9578, 1.2253) and (2.9631, 1.2398) .. (2.9681, 1.2544) -- (2.9681, 0.7562) -- (3.3229, 0.7562) -- (3.3229, 1.4002) -- (3.4981, 1.4002) -- (3.8349, 0.7562) -- (4.2603, 0.7562) -- (3.8505, 1.4364) -- cycle(0.9628, 1.7777).. controls (0.9438, 1.7534) and (0.92, 1.7357) .. (0.8911, 1.7243).. controls (0.8623, 1.7129) and (0.83, 1.706) .. (0.7945, 1.7039).. controls (0.7588, 1.7015) and (0.7252, 1.7005) .. (0.6932, 1.7005) -- (0.4839, 1.7005) -- (0.4839, 2.0667) -- (0.716, 2.0667).. controls (0.7477, 2.0667) and (0.7805, 2.0643) .. (0.8139, 2.0598).. controls (0.8472, 2.0553) and (0.8768, 2.0466) .. (0.9025, 2.0336).. controls (0.9282, 2.0206) and (0.9496, 2.0021) .. (0.9663, 1.9778).. controls (0.9829, 1.9534) and (0.9914, 1.9209) .. (0.9914, 1.8799).. controls (0.9914, 1.8362) and (0.9819, 1.8021) .. (0.9628, 1.7777) -- cycle(2.6125, 1.3533).. controls (2.5889, 1.2904) and (2.5553, 1.2359) .. (2.5112, 1.1896).. controls (2.4672, 1.1433) and (2.4146, 1.1073) .. (2.3529, 1.0814).. controls (2.2916, 1.0554) and (2.2228, 1.0427) .. (2.1471, 1.0427).. controls (2.0712, 1.0427) and (2.0026, 1.0557) .. (1.9412, 1.0814).. controls (1.8799, 1.107) and (1.8272, 1.1433) .. (1.783, 1.1896).. controls (1.7391, 1.2359) and (1.7052, 1.2904) .. (1.6817, 1.3533).. controls (1.6581, 1.4163) and (1.6465, 1.4856) .. (1.6465, 1.5616).. controls (1.6465, 1.6359) and (1.6581, 1.705) .. (1.6817, 1.7687).. controls (1.7052, 1.8325) and (1.7388, 1.8873) .. (1.783, 1.9336).. controls (1.8269, 1.9799) and (1.8796, 2.0159) .. (1.9412, 2.0418).. controls (2.0026, 2.0675) and (2.0712, 2.0804) .. (2.1471, 2.0804).. controls (2.223, 2.0804) and (2.2916, 2.0675) .. (2.3529, 2.0418).. controls (2.4143, 2.0161) and (2.467, 1.9799) .. (2.5112, 1.9336).. controls (2.5551, 1.8873) and (2.5889, 1.8322) .. (2.6125, 1.7687).. controls (2.636, 1.705) and (2.6477, 1.6359) .. (2.6477, 1.5616).. controls (2.6477, 1.4856) and (2.636, 1.4163) .. (2.6125, 1.3533) -- cycle(3.8015, 1.7777).. controls (3.7825, 1.7534) and (3.7587, 1.7357) .. (3.7298, 1.7243).. controls (3.701, 1.7129) and (3.6687, 1.706) .. (3.6333, 1.7039).. controls (3.5975, 1.7015) and (3.5639, 1.7005) .. (3.5319, 1.7005) -- (3.3226, 1.7005) -- (3.3226, 2.0667) -- (3.5547, 2.0667).. controls (3.5864, 2.0667) and (3.6192, 2.0643) .. (3.6526, 2.0598).. controls (3.6859, 2.0553) and (3.7155, 2.0466) .. (3.7412, 2.0336).. controls (3.7669, 2.0206) and (3.7883, 2.0021) .. (3.805, 1.9778).. controls (3.8216, 1.9534) and (3.8301, 1.9209) .. (3.8301, 1.8799).. controls (3.8301, 1.8362) and (3.8206, 1.8021) .. (3.8015, 1.7777) -- cycle;$
$  \end{scope}$
$\end{tikzpicture}$
$}$
$\newcommand{\@}{biblabel#1 $
$ }$
$\newcommand{\@}{cite#1#2}$
$\newcommand{\citeproctext}{#2}$
$\newcommand\languageshorthands{#1 $
$}$
$\newcommand\rorglobalscale{0.1}$
$\newcommand{\labelenumi}{\arabic{enumi}.}$
$\newcommand{\labelenumi}{\arabic{enumi}.}$</div>



<div id="title">

# spherical: A Comprehensive Database and Automated Pipeline forVLT/SPHERE High-Contrast Imaging

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2509.08044-b31b1b.svg)](https://arxiv.org/abs/2509.08044)<mark>Appeared on: 2025-09-11</mark> -  _4 pages, 0 figures, submitted to JOSS_

</div>
<div id="authors">

<mark>M. Samland</mark>

</div>
<div id="abstract">

**Abstract:**            I present spherical (this https URL), a software package and database designed for the ESO VLT/SPHERE high-contrast imager. SPHERE has produced the world's largest archive of direct imaging observations of exoplanets and circumstellar disks, but its heterogeneous metadata and fragmented reduction tools make end-to-end analysis labor-intensive. spherical addresses this by combining (1) a curated, regularly updated, and searchable database of all SPHERE observations, cross-matched with stellar properties and observing conditions, and (2) a Python-based, script-driven pipeline for the Integral Field Spectrograph (IFS). The database, archived on Zenodo (this https URL) and reproducible from the ESO archive, currently includes about 6000 IRDIS dual-band imaging, about 1000 IRDIS polarimetric, and about 4500 IFS sequences, with additional modes (ZIMPOL, IRDIS-LSS, SAM) planned. The pipeline automates raw data retrieval, calibration, and IFS reduction with the adapted open-source CHARIS instrument pipeline, followed by astrometric and photometric calibration and post-processing with TRAP for companion detection and spectral extraction. spherical lowers the barrier from raw files to science-ready products, enabling homogeneous population studies, atmospheric characterization of companions, and efficient survey follow-up, while remaining interoperable with community tools such as VIP, pyKLIP, and IRDAP.         

</div>

<div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2509.08044"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\bibinfo}[2]{#2}$
$\newcommand{\eprint}[2][]{\url{#2}}$
$\newcommand{\doi}[1]{\url{https://doi.org/#1}}$
$\newcommand{\bibinfo}[2]{#2}$
$\newcommand{\eprint}[2][]{\url{#2}}$
$\newcommand{\doi}[1]{\url{https://doi.org/#1}}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\xh}{<\chi_{H I}>}$
$\newcommand{\TQ}{t_{Q}}$
$\newcommand{\paa}{{Pa\alpha}}$
$\newcommand{\pab}{{Pa\beta}}$
$\newcommand{\av}{{A_{V}}}$
$\newcommand{\ebv}{E(B-V)}$
$\newcommand{\siv}{[S~{\sc iv}] 10.51 \mum}$
$\newcommand{\oiiitext}{[O~{\sc iii}]}$
$\newcommand{\sivtext}{[S~{\sc iv}]}$
$\newcommand{\lya}{Ly\alpha}$
$\newcommand{\Lbol}{L_{\rm bol}}$
$\newcommand{\edd}{\lambda_{Edd}}$
$\newcommand{\arcsec}{^{\prime\prime}}$
$\newcommand{\cii}{[C~{\sc ii}] 158 \mum}$
$\newcommand{\ciitext}{[C~{\sc ii}]}$
$\newcommand{\mum}{\ifmmode{\rm \mu m}\else{\mum}\fi}$
$\newcommand{\vdisp}{\vdisp}$
$\newcommand{\wba}{w_{80}}$
$\newcommand{\wjiu}{w_{90}}$
$\newcommand{\vwu}{{v_{50}}}$
$\newcommand{\vjiu}{{v_{90}}}$
$\newcommand{\vyi}{{v_{10}}}$
$\newcommand{\vbasi}{{v_{84}}}$
$\newcommand{\vyiliu}{{v_{16}}}$
$\newcommand{\flux}{erg cm^{-2} s^{-1}}$
$\newcommand{\fsb}{erg cm^{-2} s^{-1} }$
$\newcommand{\Lwu}{{\lambda L_{\lambda}(5100)}}$
$\newcommand{\vjiuba}{{v_{98}}}$
$\newcommand{\vlingwu}{{v_{05}}}$
$\newcommand{\vjiuwu}{{v_{95}}}$
$\newcommand{\ajiuyi}{{A_{91}}}$
$\newcommand{\ajiuyiha}{{A_{91,H\alpha}}}$
$\newcommand{\ajiuyio}{{A_{91,[O~\sc{III}]}}}$
$\newcommand{\spi}{{\it Spitzer}}$
$\newcommand{\her}{{\it Herschel}}$
$\newcommand{\oi}{\hbox{[O {\scriptsize I}]}}$
$\newcommand{\oii}{{[O {\scriptsize II}]}}$
$\newcommand{\oiii}{{[O~{\sc iii}] \lambda5007}}$
$\newcommand{\oiiiab}{{[O~{\sc iii}] \lambda\lambda4959,5007}}$
$\newcommand{\oiiib}{{[O~{\sc iii}] \lambda4959}}$
$\newcommand{\nv}{\hbox{N {\scriptsize V}}}$
$\newcommand{\civ}{\hbox{C {\scriptsize IV} \lambda1549}}$
$\newcommand{\nev}{\hbox{[Ne {\scriptsize V}]}}$
$\newcommand{\nii}{\hbox{[N {\scriptsize II}]}}$
$\newcommand{\sii}{\hbox{[S {\scriptsize II}]}}$
$\newcommand{\siitext}{\hbox{[S {\scriptsize II}]}}$
$\newcommand{\ha}{\hbox{H\alpha}}$
$\newcommand{\hb}{\hbox{H\beta}}$
$\newcommand{\hg}{\hbox{H\gamma}}$
$\newcommand{\hd}{\hbox{H\delta}}$
$\newcommand{\mgii}{\hbox{Mg {\scriptsize II}}}$
$\newcommand{\feii}{\hbox{Fe {\scriptsize II}}}$
$\newcommand{\red}[1]{\textcolor{red}{#1}}$
$\newcommand{\blue}[1]{\textcolor{blue}{#1}}$
$\newcommand{\kms}{km s^{-1}}$
$\newcommand{\msun}{M_{\odot}}$
$\newcommand{\msunyr}{{M_{\odot} yr^{-1}}}$
$\newcommand{\lsun}{\ensuremath{\mathrm{L}_{\odot}}}$
$\newcommand{\eden}{cm^{-3}}$
$\newcommand{\momfluxsfr}{\dot{P}_{SFR} }$
$\newcommand{\momfluxagn}{\dot{P}_{QSO} }$
$\newcommand{\momfluxout}{\dot{P}_{outflow} }$
$\newcommand{\momfluxratio}{\frac{\dot{P}_{outflow}}{\dot{P}_{AGN}}}$
$\newcommand{\msigma}{M_{\bullet}-\sigma~}$
$\newcommand{\ergs}{erg s^{-1}}$
$\newcommand{\ergscm}{erg s^{-1} cm^{-2}}$
$\newcommand{\myr}{M_\odot~yr^{-1}}$
$\newcommand{\loghn}{log(\nii/\ha) }$
$\newcommand{\logohb}{log(\oiii/\hb) }$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand\url{#1}$
$\newcommand{\urlprefix}{URL }$
$\newcommand\url{#1}$
$\newcommand{\urlprefix}{URL }$</div>



<div id="title">

# Frequent Extreme Galaxy-scale Outflows among Luminous Early Quasars

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2509.08793-b31b1b.svg)](https://arxiv.org/abs/2509.08793)<mark>Appeared on: 2025-09-11</mark> -  _24 pages, 6 figures. Nature in review. This manuscript represents the original submitted version following journal policy. Comments are welcome!_

</div>
<div id="authors">

W. Liu, et al. -- incl., <mark>S. Belladitta</mark>, <mark>J. Wolf</mark>

</div>
<div id="abstract">

**Abstract:**            The existence of abundant post-starburst/quiescent galaxies just $\sim$1-2 Gyrs after the Big Bang challenges our current paradigm of galaxy evolution. Cosmological simulations suggest that quasar feedback is likely the most promising mechanism responsible for such rapid quenching. Here we report a high detection rate (6/27) of exceptionally fast and powerful galaxy-scale outflows traced by [O III] emission in z $\sim$ 5-6 luminous quasars as revealed by the James Webb Space Telescope (JWST), with velocity up to $\sim$8400 km s$^{-1}$ and order-of-magnitude kinetic energy outflow rates up to $\sim$260% the observed quasar bolometric luminosities. This fraction is $\gtrsim$6.6 times larger than that in luminosity-matched comparison samples at z $\sim$ 1.5-3.5 (0/58) and z $<$ 1 (5/148). These extreme outflows are comparable to or even faster than the most rapid [O III] outflows reported at z $\lesssim$ 3, and could reach the circumgalactic medium (CGM) or even the intergalactic medium (IGM). The average kinetic energy outflow rate of our entire sample is $\sim$230$\times$ higher than that at cosmic noon. The substantially higher frequency of outflows with energetics well above the threshold for negative feedback in our sample strongly suggests that quasar feedback plays a significant role in efficiently quenching/regulating early massive galaxies.         

</div>

<div id="div_fig1">

<img src="tmp_2509.08793/./J1620+52022panel.png" alt="Fig2" width="100%"/>

**Figure 2. -** **Top:** The object with the fastest $\oiiitext$ outflow ($|$\vjiuba$|$$\sim$ 8400 $\kms$) discovered in our sample, with the JWST spectrum (black), best-fit emission line profiles (blue), iron emission (cyan), continuum (orange), and residual (gray dotted line). The best-fit individual Gaussian components for $\hb$ and $\hg$ are shown in red and those for $\oi$iiab are shown in green. Systemic velocities of individual emission lines are shown in vertical black dotted lines. The spectral windows adopted for fitting the quasar pseudo continuum are marked by the gray thick bars. The detector gap and adjacent noisy regions not used in the fitting are masked by the vertical gray shaded region. **Bottom:**$\hb$(left) and $\oi$ii(right) line profiles with their best-fit models (blue solid lines) and individual components (dashed lines). (*fig:profile*)

</div>
<div id="div_fig2">

<img src="tmp_2509.08793/./J0759+18002panel.png" alt="Fig1.1" width="20%"/><img src="tmp_2509.08793/./J0829+03032panel.png" alt="Fig1.2" width="20%"/><img src="tmp_2509.08793/./J0840+56242panel.png" alt="Fig1.3" width="20%"/><img src="tmp_2509.08793/./J0859+25202panel.png" alt="Fig1.4" width="20%"/><img src="tmp_2509.08793/./J1141+71192panel.png" alt="Fig1.5" width="20%"/>

**Figure 1. -** Same as Fig. \ref{fig:profile} but for the remaining 5 extreme outflows discovered in our sample. (*fig:samples*)

</div>
<div id="div_fig3">

<img src="tmp_2509.08793/./spec_compare_outflow_zoom.png" alt="Fig3.1" width="25%"/><img src="tmp_2509.08793/./o3prof_compare_outflow_zoom.png" alt="Fig3.2" width="25%"/><img src="tmp_2509.08793/./cumulative_v98.png" alt="Fig3.3" width="25%"/><img src="tmp_2509.08793/./cumulative_w90.png" alt="Fig3.4" width="25%"/>

**Figure 3. -** **Upper Left:**
Mean rest-frame spectra of all outflows (objects with $\vwu$$<$0 $\kms$, orange) and extreme outflows ($|$\vjiuba$|>$2700 $\kms$; red) in our sample, zoomed in to the $\hb$--$\oiiitext$ region, in comparison with outflow sources in _Shen sample_(blue) and _Wu-SDSS sample_(gray) with bolometric luminosity ranges matched to our sample. The spectra of individual objects are binned to 2 Å/pixel. The mean spectra are normalized with the mean flux density within 5080--5100Å. **Upper Right:** Same as left but for the best-fit $\oi$ii model profile, which are instead normalized at maximum flux density.
**Bottom:**
Cumulative distribution functions of $\oiiitext$ non-parametric kinematics measurements (left: $\vjiu$ba; right: $\wjiu$) for our sample (red), _Shen sample_(blue) and _Wu-SDSS sample_(gray). The 68\% confidence intervals are indicated by the shaded regions. (*fig:hist*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2509.08793"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

129  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

8  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
