# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']
I. J. M. Crossfield  ->  I. J. M. Crossfield  |  ['I. J. M. Crossfield']
S. Kraus  ->  S. Kraus  |  ['S. Kraus']
A. Pillepich  ->  A. Pillepich  |  ['A. Pillepich']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']


X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Li  ->  J. Li  |  ['J. Li']
J. Liu  ->  J. Liu  |  ['J. Liu']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
Arxiv has 105 new papers today
          8 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/8 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2501.06333


extracting tarball to tmp_2501.06333...

 done.
Retrieving document from  https://arxiv.org/e-print/2501.06342



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2501.06342...

 done.
Retrieving document from  https://arxiv.org/e-print/2501.06982



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2501.06982...

 done.
  0: tmp_2501.06982/ms1.tex, 746 lines
  1: tmp_2501.06982/aassymbols.tex, 579 lines
Retrieving document from  https://arxiv.org/e-print/2501.07151



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2501.07151...

 done.


Found 96 bibliographic references in tmp_2501.07151/draft.bbl.
Retrieving document from  https://arxiv.org/e-print/2501.07340


extracting tarball to tmp_2501.07340...

 done.
Retrieving document from  https://arxiv.org/e-print/2501.07361


extracting tarball to tmp_2501.07361... done.
Retrieving document from  https://arxiv.org/e-print/2501.07362


extracting tarball to tmp_2501.07362...

 done.
Retrieving document from  https://arxiv.org/e-print/2501.07559


extracting tarball to tmp_2501.07559...

 done.




Found 89 bibliographic references in tmp_2501.07559/tomographic_binning.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.07151-b31b1b.svg)](https://arxiv.org/abs/2501.07151) | **The diverse physical origins of stars in the dynamically hot bulge: CALIFA vs. IllustrisTNG**  |
|| L. Zhang, et al. -- incl., <mark>A. Pillepich</mark> |
|*Appeared on*| *2025-01-14*|
|*Comments*| *18 pages, 15 figures*|
|**Abstract**|            We compare the internal stellar structures of central galaxies in the TNG50 and TNG100 simulations and field galaxies in the CALIFA survey. The luminosity fractions of the dynamically cold, warm, and hot components in both TNG50 and TNG100 galaxies exhibit general consistency with those observed in CALIFA galaxies. For example, they all exhibit a minimum luminosity fraction of the dynamically hot component in galaxies with intermediate stellar masses, and the morphology of each orbital component in the TNG50 and TNG100 galaxies closely resembles that found in the CALIFA galaxies. We therefore use the simulations to quantify the physical origins of the different components, focusing on the dynamically hot component in TNG50. We identify three primary regimes and thus physical processes: (1) in low mass galaxies that have not experienced major mergers, stars are born with a wide range of circularity distributions and have remained relatively unchanged until the present day. Consequently, hot stars in such galaxies at redshift 0 are predominantly born hot. (2) In higher mass galaxies lacking major mergers, most stars are initially born cold but are subsequently heated through secular evolution. (3) In galaxies across the entire mass range, mergers, if they occurred, significantly increased the hot orbital fraction. As a result, the dynamically hot bulge within $R_e$ of present-day galaxies does not indicate their past merger histories; instead, the hot stars in the outer regions are mostly heated or accreted by mergers, thus indicating galaxy merger history. The massive galaxies are initially born with cold, rotationally supported structures, consistent with recent observations from the James Webb Space Telescope (JWST) regarding high-redshift galaxies.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.07559-b31b1b.svg)](https://arxiv.org/abs/2501.07559) | **Euclid: Optimising tomographic redshift binning for 3$\times$2pt power spectrum constraints on dark energy**  |
|| J. H. W. Wong, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2025-01-14*|
|*Comments*| *Euclid Consortium paper. 28 pages, 17 figures. For submission to A&A*|
|**Abstract**|            We present a simulation-based method to explore the optimum tomographic redshift binning strategy for 3x2pt analyses with Euclid, focusing on the expected configuration of its first major data release (DR1). To do this, we 1) simulate a Euclid-like observation and generate mock shear catalogues from multiple realisations of the 3x2pt fields on the sky, and 2) measure the 3x2pt Pseudo-Cl power spectra for a given tomographic configuration and derive the constraints that they place on the standard dark energy equation of state parameters (w0, wa). For a simulation including Gaussian-distributed photometric redshift uncertainty and shape noise under a LambdaCDM cosmology, we find that bins equipopulated with galaxies yield the best constraints on (w0, wa) for an analysis of the full 3x2pt signal, or the angular clustering component only. For the cosmic shear component, the optimum (w0, wa) constraints are achieved by bins equally spaced in fiducial comoving distance. However, the advantage with respect to alternative binning choices is only a few percent in the size of the $1\,\sigma\,$(w0, wa) contour, and we conclude that the cosmic shear is relatively insensitive to the binning methodology. We find that the information gain extracted on (w0, wa) for any 3x2pt component starts to saturate at $\gtrsim$ 7-8 bins. Any marginal gains resulting from a greater number of bins is likely to be limited by additional uncertainties present in a real measurement, and the increasing demand for accuracy of the covariance matrix. Finally, we consider a 5% contamination from catastrophic photometric redshift outliers and find that, if these errors are not mitigated in the analysis, the bias induced in the 3x2pt signal for 10 equipopulated bins results in dark energy constraints that are inconsistent with the fiducial LambdaCDM cosmology at $>5\,\sigma$.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.06333-b31b1b.svg)](https://arxiv.org/abs/2501.06333) | **VLA+VLBA to ngVLA Transition Option Concepts**  |
|| A. Corsi, et al. -- incl., <mark>E. Schinnerer</mark> |
|*Appeared on*| *2025-01-14*|
|*Comments*| *This report reflects an initial set of recommendations by the Transition Advisory Group for the ngVLA Project and is distributed for the purposes of obtaining community comment. Modification of this report in response to community comment is expected. Please submit your feedback at ngvla-transition-feedback@listmgr.this http URL*|
|**Abstract**|            The next-generation Very Large Array (ngVLA) is intended to be the premier centimeter-wavelength facility for astronomy and astrophysics, building on the substantial scientific legacies of the Karl G. Jansky Very Large Array (VLA) and the Very Long Baseline Array (VLBA). The ngVLA would open a new window on the Universe through ultra-sensitive imaging of thermal line and continuum emission to milliarcsecond resolution, while delivering unprecedented broad-band continuum imaging and polarimetry of non-thermal emission. The ngVLA would provide a critical electromagnetic complement to a suite of particle detectors and gravitational-wave observatories, as well as space- and ground-based telescopes operating from infrared to gamma-ray wavelengths, hence enabling multi-messenger and multi-band astronomy and astrophysics. Current construction plans call for the ngVLA to leverage some of the physical infrastructure of both the VLA and the VLBA, potentially drawing on overlapping personnel and information infrastructure. Multiple options can be envisioned for a VLA+VLBA to ngVLA transition. In order to assess risks and benefits of possible transition plans, the ngVLA project established the VLA+VLBA to ngVLA Transition Advisory Group (TAG). The primary deliverable from the TAG is a ``VLA+VLBA to ngVLA Transition Option Concepts'' report (this report) that includes a prioritized list of transition options.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.06342-b31b1b.svg)](https://arxiv.org/abs/2501.06342) | **The TESS-Keck Survey XXIV: Outer Giants may be More Prevalent in the Presence of Inner Small Planets**  |
|| J. V. Zandt, et al. -- incl., <mark>I. J. M. Crossfield</mark> |
|*Appeared on*| *2025-01-14*|
|*Comments*| *32 pages, 20 figures, 4 tables. Comments welcome*|
|**Abstract**|            We present the results of the Distant Giants Survey, a three-year radial velocity (RV) campaign to search for wide-separation giant planets orbiting Sun-like stars known to host an inner transiting planet. We defined a distant giant to have $a$ = 1--10 AU and $M_{p} \sin i = 70-4000$ \mearth~ = 0.2-12.5 \mj, and required transiting planets to have $a<1$ AU and $R_{p} = 1-4$ \rearth. We assembled our sample of 47 stars using a single selection function, and observed each star at monthly intervals to obtain $\approx$30 RV observations per target. The final catalog includes a total of twelve distant companions: four giant planets detected during our survey, two previously known giant planets, and six objects of uncertain disposition identified through RV/astrometric accelerations. Statistically, half of the uncertain objects are planets and the remainder are stars/brown dwarfs. We calculated target-by-target completeness maps to account for missed planets. We found evidence for a moderate enhancement of distant giants (DG) in the presence of close-in small planets (CS), P(DG|CS) = $30^{+14}_{-12}\%$, over the field rate of P(DG) = $16^{+2}_{-2}\%$. No enhancement is disfavored ($p \sim$ 8%). In contrast to a previous study, we found no evidence that stellar metallicity enhances P(DG|CS). We found evidence that distant giant companions are preferentially found in systems with multiple transiting planets and have lower eccentricities than randomly selected giant planets. This points toward dynamically cool formation pathways for the giants that do not disturb the inner systems.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.06982-b31b1b.svg)](https://arxiv.org/abs/2501.06982) | **A Spectroscopic and Interferometric Study of W Serpentis Stars. I. Circumbinary Outflow in the Interacting Binary W Serpentis**  |
|| K. Shepard, et al. -- incl., <mark>S. Kraus</mark> |
|*Appeared on*| *2025-01-14*|
|*Comments*| *Published in ApJ*|
|**Abstract**|            W Serpentis is an eclipsing binary system and the prototype of the Serpentid class of variable stars. These are interacting binaries experiencing intense mass transfer and mass loss. However, the identities and properties of both stars in W Ser remain a mystery. Here we present an observational analysis of high quality, visible-band spectroscopy made with the Apache Point Observatory 3.5 m telescope and ARCES spectrograph plus the first near-IR, long-baseline interferometric observations obtained with the CHARA Array. We present examples of the appearance and radial velocities of the main spectral components: prominent emission lines, strong shell absorption lines, and weak absorption lines. We show that some of the weak absorption features are associated with the cool mass donor, and we present the first radial velocity curve for the donor star. The donor's absorption lines are rotationally broadened, and we derive a ratio of donor to gainer mass of 0.36 +/- 0.09 based on the assumptions that the donor fills its Roche lobe and rotates synchronously with the orbit. We use a fit of the ASAS light curve to determine the orbital inclination and mass estimates of 2.0 and 5.7 solar masses for the donor and gainer, respectively. The partially resolved interferometric measurements of orbital motion are consistent with our derived orbital properties and the distance from Gaia EDR3. Spectroscopic evidence indicates that the gainer is enshrouded in an opaque disk that channels the mass transfer stream into an outflow through the L3 region and into a circumbinary disk.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.07340-b31b1b.svg)](https://arxiv.org/abs/2501.07340) | **The Spectrum of C/2023 A3 Indicates A Depleted Composition**  |
|| Y. Tang, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-01-14*|
|*Comments*| *Published on RNAAS, Oct 2024. 4 pages, 1 figure. Further work in progress and comments are welcome*|
|**Abstract**|            We report a spectroscopic observation of comet C/2023 A3 using an 80 mm apochromatic (apo) refractor equipped with a custom-built spectrometer with a resolution of R~2,500 on the night of 4 October 2024. Sodium D lines were detected prominently, while no other emission lines, particularly carbon-bearing species, were observed, which suggests that comet C/2023 A3 may be carbon-depleted. The mobility and flexibility of our observational setup highlight the value of amateur telescopes in observing low-altitude targets like C/2023 A3 as a complement to professional facilities.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.07361-b31b1b.svg)](https://arxiv.org/abs/2501.07361) | **Probing the sign-changeable interaction between dark energy and dark matter with DESI baryon acoustic oscillations and DES supernovae data**  |
|| T.-N. Li, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-01-14*|
|*Comments*| *10 pages, 3 figures*|
|**Abstract**|            There is a possibility of interaction between dark energy and dark matter, and this interaction may also undergo a sign change during the evolution of the universe. In this paper, we utilize the latest observational data to constrain models of a sign-changeable interaction. The data we employ, in addition to the cosmic microwave background data, also encompass the first-year baryon acoustic oscillation data from DESI and the type Ia supernova data of the full 5-year observation from DES. To achieve high generality, we investigate four interacting dark energy (IDE) models with different forms of the interaction term $Q$: (i) IDE1 with $Q = \beta(a)H\rho_{\rm de}$; (ii) IDE2 with $Q = \beta(a)H\rho_{\rm c}$; (iii) IDE3 with $Q = \beta(a)H_0\rho_{\rm de}$; (iv) IDE4 with $Q = \beta(a)H_0\rho_{\rm c}$. From the analysis, we observe that $\beta(z) > 0$ at early times and $\beta(z) < 0$ at late times, with the coupling $\beta(z)$ crossing the non-interacting line $\beta(z) = 0$ during cosmic evolution at the 2$\sigma$ confidence level for the IDE1, IDE3, and IDE4 models. However, for the IDE2 model, $\beta(z)$ remains consistently negative and does not cross $\beta(z) = 0$ at the 2$\sigma$ confidence level. Our findings indicate that the energy transfer is from dark matter to dark energy when dark matter dominates the universe, and from dark energy to dark matter when dark energy dominates, for the IDE1 and IDE3 models. Furthermore, Bayesian evidence suggests that the IDE1 and IDE3 models are moderately preferred over the $\Lambda$CDM model. The overall outcomes of this study clearly indicate that, based on current observational data, the sign-changeable IDE models are quite compelling and merit further attention.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2501.07362-b31b1b.svg)](https://arxiv.org/abs/2501.07362) | **Science objectives of the Einstein Probe mission**  |
|| W. Yuan, et al. -- incl., <mark>J. Li</mark>, <mark>J. Liu</mark> |
|*Appeared on*| *2025-01-14*|
|*Comments*| *67 pages, 24 figures, accepted for publication in SCIENCE CHINA Physics, Mechanics & Astronomy*|
|**Abstract**|            The Einstein Probe (EP) is an interdisciplinary mission of time-domain and X-ray astronomy. Equipped with a wide-field lobster-eye X-ray focusing imager, EP will discover cosmic X-ray transients and monitor the X-ray variability of known sources in 0.5-4 keV, at a combination of detecting sensitivity and cadence that is not accessible to the previous and current wide-field monitoring missions. EP can perform quick characterisation of transients or outbursts with a Wolter-I X-ray telescope onboard. In this paper, the science objectives of the Einstein Probe mission are presented. EP is expected to enlarge the sample of previously known or predicted but rare types of transients with a wide range of timescales. Among them, fast extragalactic transients will be surveyed systematically in soft X-rays, which include {\gamma}-ray bursts and their variants, supernova shock breakouts, and the predicted X-ray transients associated with binary neutron star mergers. EP will detect X-ray tidal disruption events and outbursts from active galactic nuclei, possibly at an early phase of the flares for some. EP will monitor the variability and outbursts of X-rays from white dwarfs, neutron stars and black holes in our and neighbouring galaxies at flux levels fainter than those detectable by the current instruments, and is expected to discover new objects. A large sample of stellar X-ray flares will also be detected and characterised. In the era of multi-messenger astronomy, EP has the potential of detecting the possible X-ray counterparts of gravitational wave events, neutrino sources, and ultra-high energy {\gamma}-ray and cosmic ray sources. EP is expected to help advance the studies of extreme objects/phenomena and their underlying physical processes revealed in the dynamic X-ray universe, as well as studies in other areas of X-ray astronomy.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2501.07151/./figures/Flum.png', 'tmp_2501.07151/./figures/qt_y_age.png', 'tmp_2501.07151/./figures/qe.png']
copying  tmp_2501.07151/./figures/Flum.png to _build/html/
copying  tmp_2501.07151/./figures/qt_y_age.png to _build/html/
copying  tmp_2501.07151/./figures/qe.png to _build/html/
exported in  _build/html/2501.07151.md
    + _build/html/tmp_2501.07151/./figures/Flum.png
    + _build/html/tmp_2501.07151/./figures/qt_y_age.png
    + _build/html/tmp_2501.07151/./figures/qe.png
found figures ['tmp_2501.07559/././figs/w0wa_NONOISE_1x2ptE.png', 'tmp_2501.07559/././figs/w0wa_NOISE_3x2pt.png', 'tmp_2501.07559/././figs/nz_diagram_final.png']
copying  tmp_2501.07559/././figs/w0wa_NONOISE_1x2ptE.png to _build/html/
copying  tmp_2501.07559/././figs/w0wa_NOISE_3x2pt.png to _build/html/
copying  tmp_2501.07559/././figs/nz_diagram_final.png to _build/html/
exported in  _build/html/2501.07559.md
    + _build/html/tmp_2501.07559/././figs/w0wa_NONOISE_1x2ptE.png
    + _build/htm

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\LZ}[1]{{\color{red} #1}}$
$\newcommand{\revision}[1]{{\color{black} #1}}$
$\newcommand{\ap}[1]{{\color{cyan} #1}}$
$\newcommand{\kms}{km s^{-1}}$
$\newcommand{\dgr}{^\circ}$
$\newcommand{\kmsM}{km s^{-1} Mpc^{-1}}$
$\newcommand{\Msun}{M_\odot}$
$\newcommand{\Msunpcsq}{M_\odot pc^{-2}}$
$\newcommand{\Msunpccube}{M_\odot pc^{-3}}$
$\newcommand{\Lsun}{L_\odot}$
$\newcommand{\Lsunpcsq}{L_\odot pc^{-2}}$
$\newcommand{\Lsunpccube}{L_\odot pc^{-3}}$
$\newcommand{\MLsun}{M_\odot/L_\odot}$
$\newcommand{\magarcsq}{\mathrm{mag arcsec^{-2}}}$</div>



<div id="title">

# The diverse physical origins of stars in the dynamically hot bulge: CALIFA vs. IllustrisTNG

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2501.07151-b31b1b.svg)](https://arxiv.org/abs/2501.07151)<mark>Appeared on: 2025-01-14</mark> -  _18 pages, 15 figures_

</div>
<div id="authors">

L. Zhang, et al. -- incl., <mark>A. Pillepich</mark>

</div>
<div id="abstract">

**Abstract:** We compare the internal stellar structures of central galaxies in the TNG50 and TNG100 simulations and field galaxies in the CALIFA survey. The luminosity fractions of the dynamically cold, warm, and hot components in both TNG50 and TNG100 galaxies exhibit general consistency with those observed in CALIFA galaxies. For example, they all exhibit a minimum luminosity fraction ( $f_{\rm hot} \sim$ 0.18) of the dynamically hot component in galaxies with stellar masses of $M_*\sim 1-2 \times 10^{10}$ $\Msun$ , and the morphology of each orbital component in the TNG50 and TNG100 galaxies closely resembles that found in the CALIFA galaxies. We therefore use the simulations to quantify the physical origins of the different components, focusing on the dynamically hot component in TNG50. We identify three primary regimes and thus physical processes: (1) in low mass galaxies ( $M_*\lesssim 10^{10}$ $\Msun$ ) that have not experienced major mergers, stars are born with a wide range of circularity distributions and have remained relatively unchanged until the present day. Consequently, hot stars in such galaxies at redshift $z = 0$ are predominantly born hot. (2) In higher mass galaxies ( $M_*\gtrsim 10^{10}$ $\Msun$ ) lacking major mergers, most stars are initially born cold but are subsequently heated through secular evolution. (3) In galaxies across the entire mass range, mergers, if they occurred, significantly increased the hot orbital fraction. As a result, the dynamically hot bulge within $R_e$ of present-day galaxies does not indicate their past merger histories; instead, the hot stars in the outer regions are mostly heated or accreted by mergers, thus indicating galaxy merger history. Massive galaxies are initially born with cold, rotationally supported structures, consistent with recent observations from the James Webb Space Telescope (JWST) regarding high-redshift galaxies.

</div>

<div id="div_fig1">

<img src="tmp_2501.07151/./figures/Flum.png" alt="Fig7" width="100%"/>

**Figure 7. -** Comparison of luminosity fractions of four orbital components among CALIFA, TNG50, and TNG100 galaxies at $z\sim 0$. The trend across the four panels each represents the luminosity fraction of the cold, warm, hot, and CR components as functions of stellar mass. Each red solid curves represent the CALIFA galaxies from [ and Zhu (2018)](), and the associated error bars represent the $1 \sigma$ uncertainties. The black and blue curves represent the mean values of luminosity fraction as a function of stellar mass for TNG50 and TNG100 galaxies, respectively. The solid lines are for the whole sample and the dashed lines for the sample matched CALIFA in mass, size, and sSFR. The shaded areas represent the corresponding 1$\sigma$ scatters, indicating that $68\%$ of the galaxies fall within these regions. Both TNG50 and TNG100 broadly replicate the luminosity fractions of the four components and their dependence on stellar mass, as observed in the CALIFA galaxies. (*fig:Flum*)

</div>
<div id="div_fig2">

<img src="tmp_2501.07151/./figures/qt_y_age.png" alt="Fig3" width="100%"/>

**Figure 3. -** The luminosity fraction of hot orbits heated by secular evolution $f_{\rm hot,secular-heated}$ vs. the cosmic time when the galaxy quenched (upper panel), and vs. the average stellar age of this group of stars (lower panel). All TNG50 massive galaxies (stellar mass $M_*\ge 10^{10.5}$\Msun) with quiescent histories (merger ratio of < 1:10) are included. For galaxies that are still with star formation at $z=0$, we set quench time to be the age of the universe (13.8 Gyr). The Pearson correlation coefficients of the two panels are labeled. Galaxies quenched at an earlier stage are more significantly heated by secular processes, while the average ages of their stars exhibit a weaker correlation. (*fig:qt_y_age*)

</div>
<div id="div_fig3">

<img src="tmp_2501.07151/./figures/qe.png" alt="Fig8" width="100%"/>

**Figure 8. -** The intrinsic flattening ($q_{\rm R_e}$) of each component as functions of the galaxy's stellar mass $M_*$. The three columns, from left to right, represent the cold, warm, and hot components, respectively. Line styles and colors are same as \ref{fig:Flum}. (*fig:qe*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2501.07151"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\orcid}[1]$
$\newcommand{\arraystretch}{1.5}$
$\newcommand{\arraystretch}{1}$</div>



<div id="title">

# $\Euclid$: Optimising tomographic redshift binning for 3$\times$2pt power spectrum constraints on dark energy$\thanks{This paper is published on behalf of the Euclid Consortium.}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2501.07559-b31b1b.svg)](https://arxiv.org/abs/2501.07559)<mark>Appeared on: 2025-01-14</mark> -  _Euclid Consortium paper. 28 pages, 17 figures. For submission to A&A_

</div>
<div id="authors">

J. H. W. Wong, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** The tomographic approach to analyse the 3 $\times$ 2pt signal involves dividing the observed galaxy sample into a configuration of redshift bins. We present a simulation-based method to explore the optimum tomographic binning strategy for _Euclid_ , focusing on the expected configuration of its first major data release (DR1). To do this, we 1) simulate a _Euclid_ -like observation and generate mock shear catalogues from multiple realisations of the 3 $\times$ 2pt fields on the sky, and 2) measure the 3 $\times$ 2pt Pseudo- $C_{\ell}$ power spectra for a given tomographic configuration and derive the constraints that they place on the standard dark energy equation of state parameters $(w_{0},w_{a})$ . For a simulation including Gaussian-distributed photometric redshift uncertainty and shape noise under a $\Lambda$ CDM cosmology, we find that bins equipopulated with galaxies yield the best constraints on $(w_{0},w_{a})$ for an analysis of the full 3 $\times$ 2pt signal, or the angular clustering component only. For the cosmic shear component, the optimum $(w_{0},w_{a})$ constraints are achieved by bins equally spaced in fiducial comoving distance. However, the advantage with respect to alternative binning choices is only a few percent in the size of the $1 \sigma (w_{0},w_{a})$ contour, and we conclude that the cosmic shear is relatively insensitive to the binning methodology. We find that the information gain extracted on $(w_{0},w_{a})$ for any 3 $\times$ 2pt component starts to saturate at $\gtrsim$ 7--8 bins. Any marginal gains resulting from a greater number of bins is likely to be limited by additional uncertainties present in a real measurement, and the increasing demand for accuracy of the covariance matrix. Finally, we consider a $5\%$ contamination from catastrophic photometric redshift outliers and find that, if these errors are not mitigated in the analysis, the bias induced in the 3 $\times$ 2pt signal for 10 equipopulated bins results in dark energy constraints that are inconsistent with the fiducial $\Lambda$ CDM cosmology at $>5 \sigma$ .

</div>

<div id="div_fig1">

<img src="tmp_2501.07559/././figs/w0wa_NONOISE_1x2ptE.png" alt="Fig3" width="100%"/>

**Figure 3. -** The areas enclosed by the $1 \sigma$(Blue) and $2 \sigma$ contours in the $(w_{0}, w_{a})$ plane, for different numbers of redshift bins used in a tomographic analysis of the cosmic shear signal measured from 400 realisations of our simulation. We show in circular markers joined with solid lines the areas measured for the equipopulated binning choice; in square markers joined with dashed lines the equal comoving distance bins; and in triangular markers joined with dotted lines the equal redshift width binning choice. Since the cosmic shear component alone is relatively weakly constraining, the 1 bin measurement does not yield a closed contour in $(w_{0}, w_{a})$ within the ranges of the parameter grid. Hence, the data point for this case represents a lower bound of the true value, which we represent by using a vertical arrow. (*fig:w0wa_NONOISE_1x2ptE*)

</div>
<div id="div_fig2">

<img src="tmp_2501.07559/././figs/w0wa_NOISE_3x2pt.png" alt="Fig8" width="100%"/>

**Figure 8. -** The contour areas enclosed by the 1 and 2 $\sigma$ constraints on the dark energy $(w_{0}, w_{a})$ parameters, measured for the full 3$\times$2pt signal in the presence of Gaussian shape noise and photo-$z$ uncertainty. We vary the number of redshift bins used for each of the equipopulated, equally spaced in fiducial comoving distance, and equal redshift width binning strategies. (*fig:w0wa_NOISE_3x2pt*)

</div>
<div id="div_fig3">

<img src="tmp_2501.07559/././figs/nz_diagram_final.png" alt="Fig2" width="100%"/>

**Figure 2. -** Illustration of the method used to simulate a mock weak lensing survey. 2D maps of the correlated 3$\times$2pt fields are generated at finely-sampled points in redshift. These 2D maps are used to approximate the full 3D cosmological information of the 3$\times$2pt signal. From the overdensity fields, we Poisson sample a galaxy population that traces the underlying $n(z)$ distribution. We then assign the correlated weak lensing observables to each galaxy from the shear field values at the galaxy's angular position on the sky at a given redshift. We show an early approximation of the _Euclid_ DR1 footprint at the bottom of the figure which we have used to create our _Euclid_-like simulations. The `observed' region is shown in yellow. (The actual _Euclid_ DR1 footprint will be significantly different to that shown here.) (*fig:nz_diagram*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2501.07559"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

307  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

14  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

5  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
