# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Liu  ->  J. Liu  |  ['J. Liu']
R. Klein  ->  R. Klein  |  ['R. Klein']
M. Schirmer  ->  M. Schirmer  |  ['M. Schirmer']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
J. Liu  ->  J. Liu  |  ['J. Liu']
Arxiv has 55 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2412.16853


extracting tarball to tmp_2412.16853...

 done.
Retrieving document from  https://arxiv.org/e-print/2412.16930


extracting tarball to tmp_2412.16930...

 done.
Retrieving document from  https://arxiv.org/e-print/2412.17645


extracting tarball to tmp_2412.17645...

 done.
Retrieving document from  https://arxiv.org/e-print/2412.17672


extracting tarball to tmp_2412.17672...

 done.


Found 173 bibliographic references in tmp_2412.17672/main.bbl.
Issues with the citations
syntax error in line 403: '=' expected
Retrieving document from  https://arxiv.org/e-print/2412.17716


extracting tarball to tmp_2412.17716...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.17672-b31b1b.svg)](https://arxiv.org/abs/2412.17672) | **Euclid: Early Release Observations of diffuse stellar structures and globular clusters as probes of the mass assembly of galaxies in the Dorado group**  |
|| M. Urbano, et al. -- incl., <mark>M. Schirmer</mark>, <mark>K. Jahnke</mark> |
|*Appeared on*| *2024-12-24*|
|*Comments*| *25 pages, 20 figures*|
|**Abstract**|            Deep surveys reveal tidal debris and associated compact stellar systems. Euclid's unique combination of capabilities (spatial resolution, depth, and wide sky coverage) will make it a groundbreaking tool for galactic archaeology in the local Universe, bringing low surface brightness (LSB) science into the era of large-scale astronomical surveys. Euclid's Early Release Observations (ERO) demonstrate this potential with a field of view that includes several galaxies in the Dorado group. In this paper, we aim to derive from this image a mass assembly scenario for its main galaxies: NGC 1549, NGC 1553, and NGC 1546. We detect internal and external diffuse structures, and identify candidate globular clusters (GCs). By analysing the colours and distributions of the diffuse structures and candidate GCs, we can place constraints on the galaxies' mass assembly and merger histories. The results show that feature morphology, surface brightness, colours, and GC density profiles are consistent with galaxies that have undergone different merger scenarios. We classify NGC 1549 as a pure elliptical galaxy that has undergone a major merger. NGC 1553 appears to have recently transitioned from a late-type galaxy to early type, after a series of radial minor to intermediate mergers. NGC 1546 is a rare specimen of galaxy with an undisturbed disk and a prominent diffuse stellar halo, which we infer has been fed by minor mergers and then disturbed by the tidal effect from NGC 1553. Finally, we identify limitations specific to the observing conditions of this ERO, in particular stray light in the visible and persistence in the near-infrared bands. Once these issues are addressed and the extended emission from LSB objects is preserved by the data-processing pipeline, the Euclid Wide Survey will allow studies of the local Universe to be extended to statistical ensembles over a large part of the extragalactic sky.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.16853-b31b1b.svg)](https://arxiv.org/abs/2412.16853) | **Application of 3D U-Net Neural Networks in Extracting the Epoch of Reionization Signal from SKA-Low Observations Based on Real Observations of NCP Field from LOFAR**  |
|| L.-Y. Gao, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2024-12-24*|
|*Comments*| *18 pages, 17 figures*|
|**Abstract**|            Neutral hydrogen serves as a crucial probe for the Cosmic Dawn and the Epoch of Reionization (EoR). Actual observations of the 21-cm signal often encounter challenges such as thermal noise and various systematic effects. To overcome these challenges, we simulate SKA-Low-depth images and process them with a deep learning method. We utilized foreground residuals acquired by LOFAR during actual North Celestial Pole field observations, thermal and excess variances calculated via Gaussian process regression, and 21-cm signals generated with 21cmFAST for signal extraction tests. Our approach to overcome these foreground, thermal noise, and excess variance components employs a 3D U-Net neural network architecture for image analysis. When considering thermal noise corresponding to 1400 hours of integration, U-Net provides reliable 2D power spectrum predictions, and robustness tests ensure that we get realistic EoR signals. Adding foreground residuals, however, causes inconsistencies below the horizon delay-line. Lastly, evaluating both thermal and excess variance with observations up to 3700 and 14000 hours ensures reliable power spectrum estimations within the EoR window and across nearly all scales, respectively.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.16930-b31b1b.svg)](https://arxiv.org/abs/2412.16930) | **Photometric Stellar Parameters for 195,478 Kepler Input Catalog (KIC) Stars**  |
|| B. Zhang, et al. -- incl., <mark>J. Liu</mark> |
|*Appeared on*| *2024-12-24*|
|*Comments*| *26 pages, 22 figures. To be published in ApJS. For associated catalogs of the result, see this https URL*|
|**Abstract**|            The stellar atmospheric parameters and physical properties of stars in the Kepler Input Catalog (KIC) are of great significance for the study of exoplanets, stellar activity, and asteroseismology. However, despite extensive effort over the past decades, accurate spectroscopic estimates of these parameters are available for only about half of the stars in the full KIC catalog. In our work, by training relationships between photometric colors and spectroscopic stellar parameters from Gaia DR3, the Kepler Issac-Newton Survey, LAMOST DR10, and APOGEE DR17, we have obtained atmospheric-parameter estimates for over 195,000 stars, accounting for 97% of the total sample of KIC stars. We obtain 1{\sigma} uncertainties of 0.1 dex on metallicity [Fe/H], 100 K on effective temperature $T_{\mathrm{eff}}$ , and 0.2 dex on surface gravity log $g$. In addition, based on these atmospheric parameters, we estimated the ages, masses, radii, and surface gravities of these stars using the commonly adopted isochrone-fitting approach. The resulting precisions are 20% for ages, 0.1 $M_{\odot}$ for masses, 0.01 $R_{\odot}$ for radii and 0.1 dex for surface gravities. These accurate parameters are expected to provide valuable insights for future studies on various fields.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.17645-b31b1b.svg)](https://arxiv.org/abs/2412.17645) | **Full disc [CII] mapping of nearby star-forming galaxies: SOFIA FIFI/LS observations of NGC 3627, NGC 4321, and NGC 6946**  |
|| I. Kovačić, et al. -- incl., <mark>R. Klein</mark> |
|*Appeared on*| *2024-12-24*|
|*Comments*| *17 pages, 10 figures*|
|**Abstract**|            As a major cooling line of interstellar gas, the far-infrared 158 {\mu}m line from singly ionised carbon [CII] is an important tracer of various components of the interstellar medium in galaxies across all spatial and morphological scales. Yet, there is still not a strong constraint on the origins of [CII] emission. In this work, we derive the resolved [CII] star formation rate relation and aim to unravel the complexity of the origin of [CII]. We used the Field-Imaging Far-Infrared Line Spectrometer on board the Stratospheric Observatory for Infrared Astronomy to map [CII] in three nearby star-forming galaxies at sub-kiloparsec scales, namely, NGC 3627, NGC 4321, and NGC 6946, and we compared these [CII] observations to the galactic properties derived from complementary data from the literature. We find that the relationship between the [CII] fine structure line and star formation rate shows variations between the galaxies as well as between different environments within each galaxy. Our results show that the use of [CII] as a tracer for star formation is much more tangled than has previously been suggested within the extragalactic literature, which typically focuses on small regions of galaxies and/or uses large-aperture sampling of many different physical environments. As found within resolved observations of the Milky Way, the picture obtained from [CII] observations is complicated by its local interstellar medium conditions. Future studies will require a larger sample and additional observational tracers, obtained on spatial scales within galaxies, in order to accurately disentangle the origin of [CII] and calibrate its use as a star formation tracer.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.17716-b31b1b.svg)](https://arxiv.org/abs/2412.17716) | **A Tale of Three: Magnetic Fields along the Orion Integral-Shaped Filament as Revealed by JCMT BISTRO survey**  |
|| J. Wu, et al. -- incl., <mark>J. Liu</mark> |
|*Appeared on*| *2024-12-24*|
|*Comments*| *published in the ApJ Letters*|
|**Abstract**|            As part of the BISTRO survey, we present JCMT 850 $\mu$m polarimetric observations towards the Orion Integral-Shaped Filament (ISF) that covers three portions known as OMC-1, OMC-2, and OMC-3. The magnetic field threading the ISF seen in the JCMT POL-2 map appears as a tale of three: pinched for OMC-1, twisted for OMC-2, and nearly uniform for OMC-3. A multi-scale analysis shows that the magnetic field structure in OMC-3 is very consistent at all the scales, whereas the field structure in OMC-2 shows no correlation across different scales. In OMC-1, the field retains its mean orientation from large to small scales, but shows some deviations at small scales. Histograms of relative orientations between the magnetic field and filaments reveal a bimodal distribution for OMC-1, a relatively random distribution for OMC-2, and a distribution with a predominant peak at 90$^\circ$ for OMC-3. Furthermore, the magnetic fields in OMC-1 and OMC-3 both appear to be aligned perpendicular to the fibers, which are denser structures within the filament, but the field in OMC-2 is aligned along with the fibers. All these suggest that gravity, turbulence, and magnetic field are each playing a leading role in OMC-1, 2, and 3, respectively. While OMC-2 and 3 have almost the same gas mass, density, and non-thermal velocity dispersion, there are on average younger and fewer young stellar objects in OMC-3, providing evidence that a stronger magnetic field will induce slower and less efficient star formation in molecular clouds.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2412.17672/./figures/tidal_features_contours.png', 'tmp_2412.17672/./figures/tidal_features_SB.png', 'tmp_2412.17672/./figures/color_features.png', 'tmp_2412.17672/./figures/color_features_GCs.png', 'tmp_2412.17672/./figures/EGCs.png', 'tmp_2412.17672/./figures/zoom_EGCs.png', 'tmp_2412.17672/./figures/zoom_EGCs2.png', 'tmp_2412.17672/./figures/blue_EGCs.png', 'tmp_2412.17672/./figures/zoom_blue_EGCs.png', 'tmp_2412.17672/./figures/zoom_blue_EGCs2.png', 'tmp_2412.17672/./figures/red_EGCs.png', 'tmp_2412.17672/./figures/zoom_red_EGCs.png', 'tmp_2412.17672/./figures/zoom_red_EGCs2.png', 'tmp_2412.17672/./figures/color-color_age.png', 'tmp_2412.17672/./figures/color-color_metal.png']
copying  tmp_2412.17672/./figures/tidal_features_contours.png to _build/html/
copying  tmp_2412.17672/./figures/tidal_features_SB.png to _build/html/
copying  tmp_2412.17672/./figures/color_features.png to _build/html/
copying  tmp_2412.17672/./figures/color_features_GCs.png to _build/html

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\AL}[1]{\textcolor{teal}{#1}}$
$\newcommand{\orcid}[1]$
$\newcommand{\hms}[3]{#1\textsuperscript{h} #2\textsuperscript{m} #3\textsuperscript{s}}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\arraystretch}{1.2}$
$\newcommand{\arraystretch}{1.2}$</div>



<div id="title">

# $\Euclid$: Early Release Observations of diffuse stellar structures and globular clusters as probes of the mass assembly of galaxies in the Dorado group$\thanks{This paper is published on behalf of the Euclid Consortium}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2412.17672-b31b1b.svg)](https://arxiv.org/abs/2412.17672)<mark>Appeared on: 2024-12-24</mark> -  _25 pages, 20 figures_

</div>
<div id="authors">

M. Urbano, et al. -- incl., <mark>M. Schirmer</mark>, <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** Deep surveys have helped unveil the history of past and present galaxy mergers. In particular, they reveal their tidal debris and associated compact stellar systems. $\Euclid$ 's unique combination of capabilities (spatial resolution, depth, and wide sky coverage) will make it a groundbreaking tool for galactic archaeology in the local Universe, bringing low surface brightness (LSB) science into the era of large-scale astronomical surveys. $\Euclid$ 's Early Release Observations (ERO) demonstrate this potential with a field of view that includes several galaxies in the Dorado group. In this paper, we aim to derive from this image a mass assembly scenario for its main galaxies: NGC 1549, NGC 1553, and NGC 1546. We detect internal and external diffuse structures, and identify candidate globular clusters (GCs). By analysing the colours and distributions of the diffuse structures and candidate GCs, we can place constraints on the galaxies' mass assembly and merger histories. The results show that feature morphology, surface brightness, colours, and GC density profiles are consistent with galaxies that have undergone different merger scenarios. We classify NGC 1549 as a pure elliptical galaxy that has undergone a major merger. NGC 1553 appears to have recently transitioned from a late-type galaxy to early type, after a series of radial minor to intermediate mergers. NGC 1546 is a rare specimen of galaxy with an undisturbed disk and a prominent diffuse stellar halo, which we infer has been fed by minor mergers and then disturbed by the tidal effect from NGC 1553. Finally, we identify limitations specific to the observing conditions of this ERO, in particular stray light in the visible and persistence in the near-infrared bands. Once these issues are addressed and the extended emission from LSB objects is preserved by the data-processing pipeline, the Euclid Wide Survey will allow studies of the local Universe to be extended to statistical ensembles over a large part of the extragalactic sky.

</div>

<div id="div_fig1">

<img src="tmp_2412.17672/./figures/tidal_features_contours.png" alt="Fig14.1" width="25%"/><img src="tmp_2412.17672/./figures/tidal_features_SB.png" alt="Fig14.2" width="25%"/><img src="tmp_2412.17672/./figures/color_features.png" alt="Fig14.3" width="25%"/><img src="tmp_2412.17672/./figures/color_features_GCs.png" alt="Fig14.4" width="25%"/>

**Figure 14. -** *Upper left*: features detected for each galaxy with the help of the $\IE$ images and residuals. *Upper right*: $\IE$ surface brightness map of the features. Those for which $\IE$ photometry can be performed are coloured according to their surface brightness and numbered. Those for which this study is not possible are delineated in black. The photometry of the shells, of the uncertain features between NGC 1549 and NGC 1553, and of the features close to the galactic centres was not estimated. These features are delineated in black. *Lower left*: $\IE-\JE$ integrated fluxes colour map. The features with uncertain detection in the NIR bands appear in grey.  *Lower right*: $\IE-\JE$ GCs colour map. The average colours are weighted based on the uncertainties of the magnitudes that contribute to them. The features that encompass less than three bright GC candidates appear in grey. (*features_colours*)

</div>
<div id="div_fig2">

<img src="tmp_2412.17672/./figures/EGCs.png" alt="Fig12.1" width="11%"/><img src="tmp_2412.17672/./figures/zoom_EGCs.png" alt="Fig12.2" width="11%"/><img src="tmp_2412.17672/./figures/zoom_EGCs2.png" alt="Fig12.3" width="11%"/><img src="tmp_2412.17672/./figures/blue_EGCs.png" alt="Fig12.4" width="11%"/><img src="tmp_2412.17672/./figures/zoom_blue_EGCs.png" alt="Fig12.5" width="11%"/><img src="tmp_2412.17672/./figures/zoom_blue_EGCs2.png" alt="Fig12.6" width="11%"/><img src="tmp_2412.17672/./figures/red_EGCs.png" alt="Fig12.7" width="11%"/><img src="tmp_2412.17672/./figures/zoom_red_EGCs.png" alt="Fig12.8" width="11%"/><img src="tmp_2412.17672/./figures/zoom_red_EGCs2.png" alt="Fig12.9" width="11%"/>

**Figure 12. -** Bright GC candidates density maps of the full ERO-D FoV (left column), with zooms on the NGC 1549-NGC 1553 pair (middle column) and on NGC 1546 (right column). The colour scale is the same for both zooms, where the density fields were re-evaluated locally using the samples within the cutouts. All GCs, blue GCs, and red GCs are respectively represented in the upper, middle and lower rows. The threshold used to differentiate between blue and red GCs is the median value of the $\IE-\HE$ GC candidate colours. (*GC_distribution*)

</div>
<div id="div_fig3">

<img src="tmp_2412.17672/./figures/color-color_age.png" alt="Fig15.1" width="50%"/><img src="tmp_2412.17672/./figures/color-color_metal.png" alt="Fig15.2" width="50%"/>

**Figure 15. -** Colour-colour plot of the ERO Dorado features, for both total flux photometry (square labels) and GCs photometry (round labels). The interpolated metallicities and ages are obtained using the PEGASE \citep{2004A&A...425..881L} Single Stellar Population (SSP) model, however, the colour-coded area is consistent in age and metallicity across all other SSP models tested. (*fig:colour-colour*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2412.17672"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

291  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

8  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
