# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Olofsson  ->  J. Olofsson  |  ['J. Olofsson']
G. Chauvin  ->  G. Chauvin  |  ['G. Chauvin']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Li  ->  J. Li  |  ['J. Li']
H. Jiang  ->  H. Jiang  |  ['H. Jiang']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']


Arxiv has 56 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2502.15081


not a gzip file


Retrieving document from  https://arxiv.org/e-print/2502.15299


extracting tarball to tmp_2502.15299...

 done.
Retrieving document from  https://arxiv.org/e-print/2502.15446


extracting tarball to tmp_2502.15446... done.
Retrieving document from  https://arxiv.org/e-print/2502.15447


extracting tarball to tmp_2502.15447...

 done.
Retrieving document from  https://arxiv.org/e-print/2502.15581



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2502.15581...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


Found 119 bibliographic references in tmp_2502.15581/main.bbl.
Issues with the citations
syntax error in line 228: '=' expected


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.15581-b31b1b.svg)](https://arxiv.org/abs/2502.15581) | **Euclid: Galaxy morphology and photometry from bulge-disc decomposition of Early Release Observations**  |
|| L. Quilley, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2025-02-24*|
|*Comments*| *Submitted to A&A, comments welcome. 27 pages, 19 figures, 6 tables (+ appendix of 6 pages, 18 figures - exemples gallery, 1 table)*|
|**Abstract**|            The background galaxies in Euclid Early Release Observations images of the Perseus cluster make up a remarkable sample in its combination of 0.57 deg$^2$ area, 25.3 and 23.2 AB mag depth, as well as 0.1" and 0.3" angular resolutions, in optical and near-IR bands, respectively. Towards characterising the history of the Hubble sequence, we perform a preliminary morphological analysis of 2445 and 12,786 galaxies with $I_E < 21$ and $I_E < 23$, respectively. We use single-Sérsic profiles and the sums of a Sérsic bulge and an exponential disc to model these galaxies with SourceXtractor++ and analyse their parameters in order to assess their consistencies and biases. The fitted galaxies to $I_E < 21$ span the various Hubble types with ubiquitous bulge and disc components, and a bulge-to-total light ratio B/T taking all values from 0 to 1. The median effective radius of the single-Sérsic profile is a biased estimate of galaxy size, intermediate between the bulge and disc effective radii, depending on B/T. The axis ratio of the single-Sérsic profile overestimates that of the disc, increasingly so with B/T. The model impacts the photometry with -0.08 to 0.01 mag median systematic $I_E$ offsets between single-Sérsic and bulge+disc total magnitudes, and a 0.05 to 0.15 mag dispersion, from low to high B/T. We measure a median 0.4 mag bulge-disc colour difference in $I_E - J_E$ that originates from the disc-dominated galaxies, whereas bulge-dominated galaxies have similar median colours of their components. Remarkably, we also measure redder-inside disc colour gradients, based on 5 to 10% systematic variations of disc effective radii between the optical and near-IR bands. This analysis demonstrates the usefulness and limits of single-Sérsic profile modelling, and the power of bulge-disc decomposition for reliably characterising the morphology of lenticulars and spirals in Euclid images.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.15447-b31b1b.svg)](https://arxiv.org/abs/2502.15447) | **Ultra-high-energy γ-ray emission associated with the tail of a bow-shock pulsar wind nebula**  |
|| Z. Cao, et al. -- incl., <mark>H. Jiang</mark> |
|*Appeared on*| *2025-02-24*|
|*Comments*| **|
|**Abstract**|            In this study, we present a comprehensive analysis of an unidentified point-like ultra-high-energy (UHE) $\gamma$-ray source, designated as 1LHAASO J1740+0948u, situated in the vicinity of the middle-aged pulsar PSR J1740+1000. The detection significance reached 17.1$\sigma$ (9.4$\sigma$) above 25$\,$TeV (100$\,$TeV). The source energy spectrum extended up to 300$\,$TeV, which was well fitted by a log-parabola function with $N0 = (1.93\pm0.23) \times 10^{-16} \rm{TeV^{-1}\,cm^{-2}\,s^{-2}}$, $\alpha = 2.14\pm0.27$, and $\beta = 1.20\pm0.41$ at E0 = 30$\,$TeV. The associated pulsar, PSR J1740+1000, resides at a high galactic latitude and powers a bow-shock pulsar wind nebula (BSPWN) with an extended X-ray tail. The best-fit position of the gamma-ray source appeared to be shifted by $0.2^{\circ}$ with respect to the pulsar position. As the (i) currently identified pulsar halos do not demonstrate such offsets, and (ii) centroid of the gamma-ray emission is approximately located at the extension of the X-ray tail, we speculate that the UHE $\gamma$-ray emission may originate from re-accelerated electron/positron pairs that are advected away in the bow-shock tail.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.15299-b31b1b.svg)](https://arxiv.org/abs/2502.15299) | **On type 1 active galactic nuclei with double-peaked [O~{\sc iii}]. I. data sample and basic results**  |
|| Q. Zheng, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-02-24*|
|*Comments*| *38 pages, 15 figures, accepted to be published in ApJS*|
|**Abstract**|            Double-peaked narrow emission lines (DPNELs) might be evidence for the existence of kpc-scale dual AGNs. There are so far large samples of objects with DPNELs in narrow emission line galaxies. Here, a systematic search is made to build a sample of type 1 AGNs with double-peaked [O~{\sc~iii}] from Data Release 16 of the Sloan Digital Sky Survey (SDSS). Through visually inspecting and fitting [O~{\sc~iii}], fitting broad H$\alpha$ emission lines, performing F-test for [O~{\sc~iii}] profiles, and checking broad H$\beta$ and [O~{\sc~iii}] emission lines, we select 62 type 1 AGNs with reliable double-peaked [O~{\sc~iii}] from 11557 QSOs with z < 0.3. After visually checking the 62 SDSS multi-color images, we find only seven objects with signs of merging. Four possible models for the double-peaked [O~{\sc~iii}] observed in our sample are discussed: the superposition model, AGN outflow model, dual AGN model, and rotating disk model. However, the current results can not provide any one explanation conclusively, and additional observational data are needed to provide the details of narrow line regions. But at least 22 objects with different velocity offsets between double-peaked [O~{\sc~iii}] and narrow H$\alpha$ emission lines could be excluded as dual AGN candidates. The relative velocity offsets of the [O~{\sc~iii}] blue-shifted/red-shifted components are negative to their line flux ratios, which is consistent with dual AGN model. This work provides a new sample of 62 type 1 AGNs with double-peaked [O~{\sc~iii}] for further study.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.15446-b31b1b.svg)](https://arxiv.org/abs/2502.15446) | **Development and Performance Validation of a Versatile VLBI Digital Backend Using the ROACH2 Platform**  |
|| <mark>J. Li</mark>, et al. |
|*Appeared on*| *2025-02-24*|
|*Comments*| *12 pages, 8 figures*|
|**Abstract**|            Customized digital backends for Very Long Baseline Interferometry (VLBI) are critical components for radio astronomy observatories. There are several serialized products such as the Digital Baseband Converter (DBBC), Reconfigurable Open Architecture Computing Hardware (ROACH) Digital BackEnd (RDBE), and Chinese Data Acquisition System (CDAS). However, the reliance on high-speed analog-to-digital converters (ADC) and Field Programmable Gate Arrays (FPGAs) often necessitates dedicated hardware platforms with long development cycles and prohibitive cost, limiting scalability and adaptability to evolving observational needs. To address these challenges, we propose a design leveraging the versatile and cost-effective ROACH2 hardware platform, developed by CASPER (Collaboration for Astronomy Signal Processing and Electronics Research). ROACH2's mature technology and streamlined firmware development capabilities significantly reduce the hardware platform's development cycle and cost, making it ideal for modern astronomical applications. This VLBI digital backend, based on the ROACH2 platform, incorporates key technologies such as Polyphase Filter Banks (PFB) algorithm implementation, digital complex-to-real baseband signal conversion, Mark5B data formatter design and two-bit optimal threshold quantization. These features ensure compatibility with existing systems while providing enhanced performance. The backend's performance was validated through multi-station VLBI experiments, demonstrating its ability to achieve good correlation fringes compared to the customized CDAS2-D system. Furthermore, this platform offers flexibility for rapid deployment of additional digital backends, such as those for spectral line observations, showcasing its potential for broader astronomical applications.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2502.15081-b31b1b.svg)](https://arxiv.org/abs/2502.15081) | **Evidence for a sub-jovian planet in the young TWA7 disk**  |
|| A.-M. Lagrange, et al. -- incl., <mark>J. Olofsson</mark>, <mark>G. Chauvin</mark> |
|*Appeared on*| *2025-02-24*|
|*Comments*| *25 pages, 9 figures, submitted to Nature*|
|**Abstract**|            Planets are thought to form from dust and gas in protoplanetary disks, and debris disks are the remnants of planet formation. Aged a few Myr up to a few Gyr, debris disks have lost their primordial gas, and their dust is produced by steady-state collisions between larger, rocky bodies. Tens of debris disks, with sizes of tens, sometimes hundreds of au, have been resolved with high spatial resolution, high contrast imagers at optical/near-IR or (sub)-millimeter interferometers. They commonly show cavities, ring-like structures, and gaps, which are often regarded as indirect signatures of the presence of planets that gravitationally interact with unseen planetesimals. However, no planet responsible for these features has been detected yet, probably because of the limited sensitivity (typically 2-10 MJ) of high contrast imaging instruments prior to JWST. We have used the unprecedented sensitivity of JWST/MIRI in the thermal IR to search for such planets in the disk of the ~ 6.4 Myr old star TWA 7. With its pole-on orientation, this three-ring debris disk is indeed ideally suited for such a detection. We unambiguously detected a source 1.5 arsec from the star, that is best interpreted as a cold, sub-Jupiter mass planet. Its estimated mass (~ 0.3 MJ) and position (~ 52 au, de-projected) can thoroughly account for the main disk structures.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2502.15581/./Figures/Re_disk_bulge_1p_cmap_BT_with_errors.png', 'tmp_2502.15581/./Figures/nsersic_1p_vs_BT_cmap_nsersic_B.png', 'tmp_2502.15581/./Figures/nsersic_B_vs_nsersic_1p_cmap_BT.png', 'tmp_2502.15581/./Figures/nsersic_B_vs_BT_cmap_nsersic_1p.png', 'tmp_2502.15581/./Figures/Re_1p_ratio_corner_plot_cropped.png']
copying  tmp_2502.15581/./Figures/Re_disk_bulge_1p_cmap_BT_with_errors.png to _build/html/
copying  tmp_2502.15581/./Figures/nsersic_1p_vs_BT_cmap_nsersic_B.png to _build/html/
copying  tmp_2502.15581/./Figures/nsersic_B_vs_nsersic_1p_cmap_BT.png to _build/html/
copying  tmp_2502.15581/./Figures/nsersic_B_vs_BT_cmap_nsersic_1p.png to _build/html/
copying  tmp_2502.15581/./Figures/Re_1p_ratio_corner_plot_cropped.png to _build/html/
exported in  _build/html/2502.15581.md
    + _build/html/tmp_2502.15581/./Figures/Re_disk_bulge_1p_cmap_BT_with_errors.png
    + _build/html/tmp_2502.15581/./Figures/nsersic_1p_vs_BT_cmap_nsersic_B.png
    + _build/html/tmp_2

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\orcid}[1]{\orcidlink{#1}}$
$\newcommand{\rms}{RMS~}$
$\newcommand{\resp}{resp{.}}$
$\newcommand{\ie}{i{.}e{.}~}$
$\newcommand{\eg}{e{.}g{.}~}$
$\newcommand{\eq}{Eq{.}~}$
$\newcommand{\eqs}{Eqs{.}~}$
$\newcommand{\tab}{Table~}$
$\newcommand{\fg}{Fig{.}~}$
$\newcommand{\fgs}{Figs{.}~}$
$\newcommand{\sct}{Sect{.}~}$
$\newcommand{\scts}{Sects{.}~}$
$\newcommand{\col}{Col{.}}$
$\newcommand{\cols}{Cols{.}}$
$\newcommand{\magn}{^\mathrm{mag}}$
$\newcommand{\pg}{p{.}~}$
$\newcommand{\ppg}{pp{.}~}$
$\newcommand{\hmpc}{~h^{-1}~Mpc}$
$\newcommand{\hkpc}{~h^{-1}~kpc}$
$\newcommand{\hmpcs}{~h^{-2}~Mpc^2}$
$\newcommand{\hmpcc}{~h^{-3}~Mpc^3}$
$\newcommand{\phiunit}{~h^3~Mpc^{-3}~mag^{-1}}$
$\newcommand{\etal}{{\it et\thinspace al.} }$
$\newcommand{\lo}[2]{\noindent{\color{royalblue} \bf[ \triangle \st{#1} #2]}}$
$\newcommand{\val}[2]{\noindent{\bf[\color{darkred} \spadesuit \st{#1} #2]}}$
$\newcommand{\va}[2]{\noindent{\bf[\color{magenta} \spadesuit \st{#1} #2]}}$
$\newcommand{\vl}[2]{\noindent{\bf[\color{violet} \spadesuit \st{#1} #2]}}$
$\newcommand{\redcom}[2]{\noindent{\bf[\color{darkred} \st{#1} #2]}}$
$\newcommand{\id}[2]{\noindent{\bf[\color{brown} \st{#1} IVANA: #2]}}$
$\newcommand{\chg}[2]{\noindent{#2}}$
$\newcommand{\chgs}[2]$
$\newcommand{\chgc}[2]$</div>



<div id="title">

# $\Euclid$: Galaxy morphology and photometry from bulge-disc decomposition of Early Release Observations$\thanks{This paper is published on behalf of the Euclid Consortium}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2502.15581-b31b1b.svg)](https://arxiv.org/abs/2502.15581)<mark>Appeared on: 2025-02-24</mark> -  _Submitted to A&A, comments welcome. 27 pages, 19 figures, 6 tables (+ appendix of 6 pages, 18 figures - exemples gallery, 1 table)_

</div>
<div id="authors">

L. Quilley, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** The background galaxies in $\Euclid$ Early Release Observations (ERO) images of the Perseus cluster make up a remarkable sample in its combination of $0.57$ deg $^2$ area, 25.3 and 23.2 AB mag depth, as well as $\ang{;;0.1}$ and $\ang{;;0.3}$ angular resolutions, in optical and near-infrared bands, respectively.Towards characterising the history of the Hubble sequence, we perform a preliminary morphological analysis of $2445$ and $12 786$ galaxies with $\IE\le21$ and $\IE\le23$ , respectively.We use single-Sérsic profiles and the sums of a Sérsic bulge and an exponential disc to model these galaxies with \texttt{SourceXtractor++} and analyse their positional, structural, and flux parameters in order to assess their consistencies and biases.The fitted galaxies to $\IE\le21$ span the various Hubble types with ubiquitous bulge and disc components, and a bulge-to-total light ratio $B/T$ taking all values from 0 to 1. The median effective radius of the single-Sérsic profile is a biased estimate of galaxy size, intermediate between the bulge and disc effective radii, depending on $B/T$ . The axis ratio of the single-Sérsic profile overestimates that of the disc, increasingly so with $B/T$ . The type of model impacts the photometry with $-0.08$ to 0.01 mag median systematic $\IE$ offsets between single-Sérsic and bulge-disc total magnitudes, and a 0.05 to 0.15 mag dispersion, from low to high $B/T$ . We measure a median $0.4$ mag bulge-disc colour difference in $\IE-\JE$ that originates from the disc-dominated galaxies, whereas bulge-dominated galaxies have similar median colours of their components. Remarkably, we also measure redder-inside disc colour gradients, based on 5 to 10 \% systematic variations of disc effective radii between the optical and near-infrared bands.This analysis demonstrates the usefulness and limits of single-Sérsic profile modelling, and the power of bulge-disc decomposition for reliably characterising the morphology of lenticulars and spirals in $\Euclid$ images.

</div>

<div id="div_fig1">

<img src="tmp_2502.15581/./Figures/Re_disk_bulge_1p_cmap_BT_with_errors.png" alt="Fig13" width="100%"/>

**Figure 13. -** Ratios of the disc-to-bulge effective radii as a function of the ratios of the disc-to-single-Sérsic effective radii, all in the \IE band, for the $2445$ galaxies with $\IE\le21$. In the upper vertical concentration of disc-dominated galaxies (in blue, $B/T\approx0$) and on the right diagonal concentration of bulge-dominated galaxies (in red, $B/T\approx1$), the single-Sérsic effective radius is consistent with that of either a dominating disc or bulge component. The single-Sérsic effective radius of the $74.7\%$ of galaxies in the top-right cone is intermediate between the disc effective radius (the largest) and the bulge effective radius (the smallest), with a smooth gradient in the ratio of the disc to single-Sérsic effective radius, while $B/T$ varies from zero to one. These galaxies are visually indistinguishable from the types in the present-time Hubble sequence, whereas objects in other regions of the diagram are identified as either non-physical bulge-disc modelling, or biased bulge fits due to bars. (*all-radii-related-cmap-BT*)

</div>
<div id="div_fig2">

<img src="tmp_2502.15581/./Figures/nsersic_1p_vs_BT_cmap_nsersic_B.png" alt="Fig4.1" width="33%"/><img src="tmp_2502.15581/./Figures/nsersic_B_vs_nsersic_1p_cmap_BT.png" alt="Fig4.2" width="33%"/><img src="tmp_2502.15581/./Figures/nsersic_B_vs_BT_cmap_nsersic_1p.png" alt="Fig4.3" width="33%"/>

**Figure 4. -** Relations between the Sérsic index $n$ from the single-Sérsic modelling, the bulge-to-total light ratio $B/T$, and the bulge Sérsic index $n_\mathrm{B}$ from the bulge-disc decomposition, for the $2445$ galaxies with $\IE\le21$. Each plot shows one projection of this 3D parameter space, and incorporates the third parameter as a colour map on the points. Grey horizontal dashed lines indicate Sérsic index values of 1 and 4. The inclined purple dotted lines in the left panel delimit above and below the visually inspected samples (see text). Once spurious fits and bulges that are too small or too faint to be modelled are identified and discarded, there remains an overall correlation between $n$, $B/T$, and $n_\mathrm{B}$, which all increase jointly, as galaxy types change from late to earlier types along the Hubble sequence, and as their bulges grow from small pseudo-bulges to more prominent classical bulges. In this latter regime, the increase in $n_\mathrm{B}$\chg{with $B/T$} flattens, $\chg${as shown by the black solid line displaying the median values and bootstrap uncertainties of $n_\mathrm{bulge}$ in $B/T$ intervals of $0.1$ in width from $0.2$ to $1.0$ (the grey shaded area shows the 10--90\% percentile range around these values)} (*nsersic-nsersicB-BT-3d-space*)

</div>
<div id="div_fig3">

<img src="tmp_2502.15581/./Figures/Re_1p_ratio_corner_plot_cropped.png" alt="Fig17" width="100%"/>

**Figure 17. -** Ratios of the single-Sérsic effective radius $R_\mathrm{e,1p}$ between different bands as a function of the Sérsic index measured in the \IE VIS band for the $2328$ galaxies to $\IE\le21$, with $R_\mathrm{e,disc} /R_\mathrm{e,bulge} \ge 1$ or $R_\mathrm{e,disc}/R_\mathrm{e,1p} > 0.9$ in the \IE band. All possible pairs of \Euclid bands are shown in the six panels, with the band whose $R_\mathrm{e,1p}$ is the numerator (denominator) appearing on the left (top). Overall median values of the $R_\mathrm{e,1p}$ ratio appear on each panel as a red dotted horizontal line, whereas the red points and error bars correspond to the median and standard error for bins of $n_{\IE}$ listed in $\tab$\ref{tab-Re-ratio-1p}. Redder bands have smaller $R_\mathrm{e,1p}$ than bluer bands for the majority of galaxies, and this effect is stronger for bands that are further apart in wavelength. Most galaxies have blue to red inward gradients, which are the largest between the VIS band and any NISP bands, and are interpreted as resulting mostly from the colour dichotomy of bulges and discs shown in $\fg$\ref{bulge-disk-colors}, which in turn originates from the different stellar populations in the bulge and disc components of spiral galaxies. (*color-grad-Re-1p*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2502.15581"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

340  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

6  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
