# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']
E. Bañados  ->  E. Bañados  |  ['E. Bañados']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
Arxiv has 54 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2405.17560


extracting tarball to tmp_2405.17560...

 done.
Retrieving document from  https://arxiv.org/e-print/2405.18086
extracting tarball to tmp_2405.18086... done.


E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']


list index out of range
Retrieving document from  https://arxiv.org/e-print/2405.18126
extracting tarball to tmp_2405.18126...

 done.


Found 190 bibliographic references in tmp_2405.18126/main.bbl.
syntax error in line 1194: '=' expected
Retrieving document from  https://arxiv.org/e-print/2405.18228


extracting tarball to tmp_2405.18228...

 done.
  0: tmp_2405.18228/aassymbols.tex, 579 lines
  1: tmp_2405.18228/natnotes.tex, 332 lines
  2: tmp_2405.18228/NGC6517_Pulsars.tex, 916 lines
  3: tmp_2405.18228/natbib.tex, 96 lines



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.18086-b31b1b.svg)](https://arxiv.org/abs/2405.18086) | **A$^3$COSMOS: Measuring the cosmic dust-attenuated star formation rate density at $4 < z < 5$**  |
|| B. Magnelli, et al. -- incl., <mark>E. Schinnerer</mark> |
|*Appeared on*| *2024-05-29*|
|*Comments*| *Accepted for publication in A&A; 14 pages and 8 figures*|
|**Abstract**|            [Abridged] In recent years, conflicting results have provided an uncertain view of the dust-attenuated properties of $z>4$ star-forming galaxies (SFGs). To solve this, we used the deepest data publicly available in COSMOS to build a mass-complete ($>10^{9.5}\,M_{\odot}$) sample of SFGs at $4<z<5$ and measured their dust-attenuated properties by stacking all archival ALMA band 6 and 7 observations available. Combining this information with their rest-frame ultraviolet emission from the COSMOS2020 catalog, we constrained the IRX ($\equiv L_{\rm IR}/L_{\rm UV}$)--$\beta_{\rm UV}$, IRX--$M_\ast$, and SFR--$M_\ast$ relations at $z\sim4.5$. Finally, using these relations and the stellar mass function of SFGs at $z\sim4.5$, we inferred the unattenuated and dust-attenuated SFRD at this epoch. SFGs at $z\sim4.5$ follow an IRX--$\beta_{\rm UV}$ relation that is consistent with that of local starbursts, while they follow a steeper IRX--$M_\ast$ relation than observed locally. The grain properties of dust in these SFGs seems thus similar to those in local starbursts but its mass and geometry result in lower attenuation in low-mass SFGs. SFGs at $z\sim4.5$ lie on a linear SFR--$M_\ast$ relation, whose normalization varies by 0.3 dex, when we exclude or include from our stacks the ALMA primary targets. The cosmic SFRD$(>M_\ast)$ converges at $M_\ast<10^{9}\,M_\odot$ and is dominated by SFGs with $M_\ast\sim10^{9.5-10.5}\,M_\odot$. The fraction of the cosmic SFRD that is attenuated by dust, ${\rm SFRD}_{\rm IR}(>M_\ast)/ {\rm SFRD}(>M_\ast)$, is $90\pm4\%$ for $M_\ast\,=\,10^{10}\,M_\odot$, $68\pm10\%$ for $M_\ast=10^{8.9}\,M_\odot$ (i.e., $0.03\times M^\star$; $M^\star$ being the characteristic stellar mass of SFGs) and this value converges to $60\pm10\%$ for $M_\ast=10^{8}\,M_\odot$. Even at this early epoch, the fraction of the cosmic SFRD that is attenuated by dust remains thus significant.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.18126-b31b1b.svg)](https://arxiv.org/abs/2405.18126) | **Euclid preparation. Observational expectations for redshift z<7 active galactic nuclei in the Euclid Wide and Deep surveys**  |
|| E. Collaboration, et al. -- incl., <mark>E. Bañados</mark>, <mark>K. Jahnke</mark> |
|*Appeared on*| *2024-05-29*|
|*Comments*| *36 pages, 21 figures, submitted to A&A*|
|**Abstract**|            We forecast the expected population of active galactic nuclei (AGN) observable in the Euclid Wide Survey (EWS) and Euclid Deep Survey (EDS). Starting from an X-ray luminosity function (XLF) we generate volume-limited samples of the AGN expected in the survey footprints. Each AGN is assigned an SED appropriate for its X-ray luminosity and redshift, with perturbations sampled from empirical distributions. The photometric detectability of each AGN is assessed via mock observation of the assigned SED. We estimate 40 million AGN will be detectable in at least one band in the EWS and 0.24 million in the EDS, corresponding to surface densities of 2.8$\times$10$^{3}$ deg$^{-2}$ and 4.7$\times$10$^{3}$ deg$^{-2}$. Employing colour selection criteria on our simulated data we select a sample of 4.8$\times$10$^{6}$ (331 deg$^{-2}$) AGN in the EWS and 1.7$\times$10$^{4}$ (346 deg$^{-2}$) in the EDS, amounting to 10% and 8% of the AGN detectable in the EWS and EDS. Including ancillary Rubin/LSST bands improves the completeness and purity of AGN selection. These data roughly double the total number of selected AGN to comprise 21% and 15% of the detectable AGN in the EWS and EDS. The total expected sample of colour-selected AGN contains 6.0$\times$10$^{6}$ (74%) unobscured AGN and 2.1$\times$10$^{6}$ (26%) obscured AGN, covering $0.02 \leq z \lesssim 5.2$ and $43 \leq \log_{10} (L_{bol} / erg s^{-1}) \leq 47$. With this simple colour selection, expected surface densities are already comparable to the yield of modern X-ray and mid-infrared surveys of similar area. The relative uncertainty on our expectation for detectable AGN is 6.7% for the EWS and 12.5% for the EDS, driven by the uncertainty of the XLF.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.17560-b31b1b.svg)](https://arxiv.org/abs/2405.17560) | **The Symbiotic X-ray Binary IGR J16194-2810: A Window on the Future Evolution of Wide Neutron Star Binaries From Gaia**  |
|| P. Nagarajan, <mark>K. El-Badry</mark>, C. Lam, H. Reggiani |
|*Appeared on*| *2024-05-29*|
|*Comments*| *20 pages, 13 figures, Submitted to PASP*|
|**Abstract**|            We present optical follow-up of IGR J16194-2810, a hard X-ray source discovered by the INTEGRAL mission. The optical counterpart is a $\sim500\,L_\odot$ red giant at a distance of $2.1$ kpc. We measured 16 radial velocities (RVs) of the giant over a period of $\sim 300$ days. Fitting these RVs with a Keplerian model, we find an orbital period of $P_{\rm orb} = 192.73 \pm 0.01$ days and a companion mass function $f(M_2) = 0.361 \pm 0.005 \,M_{\odot}$. We detect ellipsoidal variability with the same period in optical light curves from the ASAS-SN survey. Joint fitting of the RVs, light curves, and the broadband SED allows us to robustly constrain the masses of both components. We find a giant mass of $M_\star = 1.02\pm 0.01\,M_{\odot}$ and a companion mass of $M_{2} = 1.35^{+0.09}_{-0.07}\,M_{\odot}$, implying that the companion is a neutron star (NS). We recover a $4.06$-hour period in the system's TESS light curve, which we tentatively associate with the NS spin period. The giant does not yet fill its Roche lobe, suggesting that current mass transfer is primarily via winds. MESA evolutionary models predict that the giant will overflow its Roche lobe in $5$-$10$ Myr, eventually forming a recycled pulsar + white dwarf binary with a $\sim 900$ day period. IGR J16194-2810 provides a window on the future evolution of wide NS + main sequence binaries recently discovered via Gaia astrometry. As with those systems, the binary's formation history is uncertain. Before the formation of the NS, it likely survived a common envelope episode with a donor-to-accretor mass ratio $\gtrsim 10$ and emerged in a wide orbit. The NS likely formed with a weak kick ($v_{\rm kick}\lesssim 100\,\rm km\,s^{-1}$), as stronger kicks would have disrupted the orbit.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.18228-b31b1b.svg)](https://arxiv.org/abs/2405.18228) | **FAST Discovery of Eight Isolated Millisecond Pulsars in NGC 6517**  |
|| D. Yin, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2024-05-29*|
|*Comments*| *21 pages, 2 figures, accepted for publication in The Astrophysical Journal Letters*|
|**Abstract**|            We present the discovery of 8 isolated millisecond pulsars in Globular Cluster (GC) NGC 6517 using the Five-Hundred-meter Aperture Spherical radio Telescope (FAST). The spin periods of those pulsars (namely PSR J1801-0857K to R, or, NGC 6517K to R) are all shorter than 10 ms. With these discoveries, NGC 6517 is currently the GC with the most known pulsars in the FAST sky. The largest difference in dispersion measure of the pulsars in NGC 6517 is 11.2 cm$^{-3}$ pc, the second among all GCs. The fraction of isolated pulsars in this GC (16 of 17, 94$\%$) is consistent with previous studies indicating an overabundance of isolated pulsars in the densest GCs, especially in those undergoing cluster core collapse. Considering the FAST GC pulsar discoveries, we modeled the GC pulsar population using the empirical Bayesian method described by Turk and Lorimer with the recent counts. Using this approach, we find that the expected number of potential pulsars in GCs seems to be correlated with the central escape velocity, hence, the GCs Liller 1, NGC 6441, M54 (NGC 6715), and $\omega$-Cen (NGC 5139) are expected to host the largest numbers of pulsars.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2405.18086.md
    + _build/html/tmp_2405.18086/./Fraction_SFRD_z_4.png
    + _build/html/tmp_2405.18086/./IRX_Beta_z_4.png
    + _build/html/tmp_2405.18086/./IRX_M_z_4.png
    + _build/html/tmp_2405.18086/./SFR_M_z_4.png
exported in  _build/html/2405.18126.md
    + _build/html/tmp_2405.18126/./figures/selectionsinfo.png
    + _build/html/tmp_2405.18126/./figures/lf_consistency_refereeupdate.png
    + _build/html/tmp_2405.18126/./figures/filter_sed_zevo.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\acosmos}{A^3COSMOS }$
$\newcommand\natexlab{#1}$</div>



<div id="title">

# A$^3$COSMOS: Measuring the cosmic dust-attenuated star formation rate density at $4 < z < 5$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2405.18086-b31b1b.svg)](https://arxiv.org/abs/2405.18086)<mark>Appeared on: 2024-05-29</mark> -  _Accepted for publication in A&A; 14 pages and 8 figures_

</div>
<div id="authors">

B. Magnelli, et al. -- incl., <mark>E. Schinnerer</mark>

</div>
<div id="abstract">

**Abstract:** In recent years, conflicting results have provided an uncertain view of the dust-attenuated star-forming properties of $z\gtrsim4$ galaxies. To solve this, we need to accurately measure the mean dust-attenuated properties of star-forming galaxies (SFGs) at $4<z<5$ and therefore constrain the cosmic dust-attenuated star formation rate density (SFRD) of the Universe 1.3 Giga-years after the Big Bang. We used the deepest optical-to-near-infrared data publicly available in the Cosmic Evolution Survey (COSMOS) field to build a mass-complete ( $>10^{9.5} M_{\odot}$ ) sample of SFGs at $4<z<5$ .   Then, we measured their mean dust-attenuated properties (i.e., infrared luminosity, $\langle L_{\rm IR}\rangle$ ; dust-attenuated star formation rate, $\langle{\rm SFR}_{\rm IR}\rangle$ ) by dividing our sample in three stellar mass ( $M_\ast$ ) bins (i.e., $10^{9.5} < M_\ast/M_\odot<10^{10}$ , $10^{10} < M_\ast/M_\odot<10^{10.5}$ , and $10^{10.5} < M_\ast/M_\odot<10^{11.5}$ ) and by stacking in the $uv$ domain all archival Atacama Large Millimeter/submillimeter Array (ALMA) band 6 and 7 observations available for these galaxies.   Then, we combined this information with their mean rest-frame ultraviolet (UV) emission measured from the COSMOS2020 catalog (i.e., UV luminosity, $\langle L_{\rm UV}\rangle$ ; UV spectral slope, $\langle \beta_{\rm UV}\rangle$ ; and unattenuated SFR, $\langle{\rm SFR}_{\rm UV}\rangle$ ), and constrained the IRX ( $\equiv L_{\rm IR}/L_{\rm UV}$ )-- $\beta_{\rm UV}$ , IRX-- $M_\ast$ , and SFR-- $M_\ast$ relations at $z\sim4.5$ .   Finally, using these relations and the stellar mass function of SFGs at $z\sim4.5$ , we inferred the unattenuated and dust-attenuated SFRD at this epoch. SFGs follow an IRX-- $\beta_{\rm UV}$ relation that is consistent with that observed in local starbursts.   Our measurements favors a steepening of the IRX-- $M_\ast$ relation at $z\sim4.5$ , compared to the redshift-independent IRX-- $M_\ast$ relation observed at $z\sim1-3$ .   Our galaxies lie on a linear SFR-- $M_\ast$ relation, whose normalization varies by 0.3 dex, when we exclude or include from our stacks the ALMA primary targets (i.e., sources within 3 $\arcsec$ from the ALMA phase center).   The cosmic SFRD $(>M_\ast)$ converges at $M_\ast\lesssim10^{9} M_\odot$ , with SFGs at $10^8<M_\ast/M_\odot<10^9$ contributing already less than 15 \% of the SFRD from all SFGs with $M_\ast>10^8 M_\odot$ .   The cosmic SFRD at $z\sim4.5$ is dominated by SFGs with a stellar mass of $10^{9.5-10.5} M_\odot$ .   Finally, the fraction of the cosmic SFRD that is attenuated by dust, ${\rm SFRD}_{\rm IR}(>M_\ast)/ {\rm SFRD}(>M_\ast)$ , is $90\pm4\%$ for $M_\ast = 10^{10} M_\odot$ , $68\pm10\%$ for $M_\ast=10^{8.9} M_\odot$ (i.e., $0.03\times M^\star$ ; $M^\star$ being the characteristic stellar mass of SFGs at this epoch) and this value converges to $60\pm10\%$ for $M_\ast=10^{8} M_\odot$ . A non-evolving IRX-- $\beta_{\rm UV}$ relation suggests that the grain properties (e.g., size distribution, composition) of dust in SFGs at $z\sim4.5$ are similar to those in local starbursts.   However, the mass and geometry of this dust result in lower attenuation in low-mass SFGs ( $\lesssim10^{10} M_\odot$ ) at $z\sim4.5$ than at $z\lesssim3$ .   Nevertheless, the fraction of the cosmic SFRD that is attenuated by dust remains significant ( $\sim68\pm10\%$ ) even at such an early cosmic epoch.

</div>

<div id="div_fig1">

<img src="tmp_2405.18086/./Fraction_SFRD_z_4.png" alt="Fig5" width="100%"/>

**Figure 5. -** Evolution with stellar mass of the cosmic SFRD at $4<z<5$.
      (_top left_) The fiducial SMF used in our calculations is represented by the gray line and shaded region.
      This is the combination of a Schechter and power-law function fit to the SMF of \citet[][gray and black circles]{Weaver.2023}.
      A Schechter function fit to these data points is shown by the gray dotted line.
      (_bottom left_) Cosmic SFRD (thick black line) and dust-attenuated SFRD (red line) above a given stellar mass, as inferred using the fiducial SMF.
      The dotted gray and red lines present the same quantities but using a simple Schechter SMF in our calculations.
      (_top right_) Fraction of the $M_\ast>10^{8} M_\odot$ SFRD (gray histogram) and dust-attenuated SFRD (red histogram) that is attributed to a given stellar mass bin.
      (_bottom right_) Fraction of the cosmic SFRD above a given stellar mass that is dust-attenuated (red region) and unattenuated (blue region).
      The dotted gray line present the same quantity but using a simple Schechter SMF in our calculations.
      In all panels, the vertical line represents the $0.03\times M^\star$ lower integration limits commonly used to infer the "total" cosmic SFRD \citep[e.g.,][]{Madau.2014}.
      All quantities in the bottom-left, top-right, and bottom-right panels were calculated using our fiducial SMF, IRX--$M_\ast$, MS relations.
      The propagation of the 1-$\sigma$ uncertainties on the SMF and the IRX--$M_\ast$ relation are represented by shaded regions.
      The uncertainties on the MS would move the black and red lines of the bottom-left panel up and down by 0.15 dex, but would have no effect on the other quantities displayed in this figure. (*fig:Frac. SFRD*)

</div>
<div id="div_fig2">

<img src="tmp_2405.18086/./IRX_Beta_z_4.png" alt="Fig4.1" width="50%"/><img src="tmp_2405.18086/./IRX_M_z_4.png" alt="Fig4.2" width="50%"/>

**Figure 4. -** Dust attenution properties of SFGs at $z\sim4.5$.
      (_left_) IRX--$\beta_{\rm UV}$ relation derived at $4<z<5$ by stacking all SFGs with a $\acosmos$ coverage (dark-green hexagons) or by stacking all SFGs except the ALMA primary targets (lime-green circles).
      Hexagons and circles with faded colors correspond to our $\langle L_{\rm UV}^{\rm phot.}\rangle$ and $\langle\beta_{\rm UV}^{\rm phot.}\rangle$ estimates, i.e., inferred from a COSMOS2020 photometry as opposed to their best-fit SED (see Sect. \ref{subsec:LUV}).
      For each stellar mass bin, the fraction of galaxies for which we were able to make such photometric measurement (see text for details) is given in the bottom part of the panel.
      In parenthesis, we provide the same number but after excluding the ALMA primary targets.
      The gray shaded squares present measurements for UV-selected SFGs at $z\sim4-8$ from \citet{Bowler.2024}.
      The black solid, dash-dotted, dashed, and dotted lines show the IRX--$\beta_{\rm UV}$ relation observed in local starbursts by \citet{Meurer.1999} and by \citet{Calzetti.2000}, an updated version for local galaxies \citep{Overzier.2011}, and an SMC-like dust attenuation relation, respectively.
      The orange dash-dot-dotted line shows the relation observed in a UV-selected sample of SFGs at $z\sim4.5$\citep{Fudamoto.2020}.
      (_right_) IRX--$M_{\ast}$ relation at $4<z<5$.
      Symbols are the same as in the left panel.
      Dark-green and lime-green solid lines are linear fits to these data points, while the associated shaded regions show the 1$\sigma$ uncertainties on these fits (i.e., the 16th to 84th ranges).
      The pink dotted line shows the redshift-independent IRX--$M_{\ast}$ relation derived by \citet{Pannella.2015} for $z\lesssim4$ SFGs.
      The black dash-dotted line shows the IRX--$M_{\ast}$ relation at $z\sim4$ inferred by \citet{Koprowski.2018}.
      The blue dashed line shows the IRX--$M_{\ast}$ relation at $z\sim3$ and $z\sim4.5$ found in \citet{Fudamoto.2019,Fudamoto.2020}.
       (*fig:IRX*)

</div>
<div id="div_fig3">

<img src="tmp_2405.18086/./SFR_M_z_4.png" alt="Fig1" width="100%"/>

**Figure 1. -** SFR--$M_{\ast}$ relation at $4<z<5$.
      Dark-gray hexagons show the dust-attenuated SFRs (i.e., SFR$_{\rm IR}$) inferred by stacking all SFGs with a $\acosmos$ coverage, while dark-green hexagons show their total SFRs, i.e., adding the contribution of their unattenuated SFRs measured in the UV (i.e., SFR$_{\rm UV}$; symbols are slightly offset along the stellar mass axis for clarity).
      Light-gray and lime-green circles display the same quantities but for our stacking analysis which excludes the ALMA primary targets (symbols are slightly offset along the stellar mass axis for clarity).
      The dark-green solid and red dotted lines represent the MS and its $\pm0.3 $dex dispersion from \citet{Schreiber.2015}, whose normalization happens to be perfectly consistent with our dark-green hexagons, i.e., MS$_{\rm All,  Fit}\equiv{\rm MS}_{\rm S15}$.
      The lime-green line is obtained by renormalizing the MS of \citet{Schreiber.2015} to fit our lime-green circles, i.e., MS$_{\rm Offcenter, Fit}\equiv{\rm MS}_{\rm S15}-0.3 $dex.
      For each stellar mass bin, we give the fraction of ALMA primary targets and in parenthesis their weights in our stacks.
      Finally, pink squares and diamonds show the total and dust-attenuated SFRs found in \citet{Khusanova.2021} by stacking in three stellar mass bins, ALMA observations of all (detected and undetected) $z\sim4.5$ SFGs from the ALMA ALPINE survey.
       (*fig:MS*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2405.18086"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\head}[2]{\multicolumn{1}{>{\centering\arraybackslash}p{#1}}{#2}}$
$\newcommand{\ebv}{{E(B-V)}}$
$\newcommand{\qsfit}{{\tt QSFIT}}$
$\newcommand{\red}{\textcolor{red}}$
$\newcommand{\green}{\textcolor{orange}}$
$\newcommand{\purp}{\textcolor{cyan}}$
$\newcommand{\orcid}[1]$
$\newcommand{\NUMDetectableIeEWS}{3.1 \times 10^{7} }$
$\newcommand{\NUMDetectableYeEWS}{2.2 \times 10^{7} }$
$\newcommand{\NUMDetectableJeEWS}{3.0 \times 10^{7} }$
$\newcommand{\NUMDetectableHeEWS}{3.5 \times 10^{7} }$
$\newcommand{\NUMDetectableOneFiltEWS}{4.0 \times 10^{7} }$
$\newcommand{\NUMDetectableAllFiltEWS}{2.1 \times 10^{7} }$
$\newcommand{\SDDetectableIeEWS}{2.2 \times 10^{3} }$
$\newcommand{\SDDetectableYeEWS}{1.5 \times 10^{3} }$
$\newcommand{\SDDetectableJeEWS}{2.1 \times 10^{3} }$
$\newcommand{\SDDetectableHeEWS}{2.4 \times 10^{3} }$
$\newcommand{\SDDetectableOneFiltEWS}{2.8 \times 10^{3} }$
$\newcommand{\SDDetectableAllFiltEWS}{1.4 \times 10^{3} }$
$\newcommand{\NUMDetectableIeEDS}{1.9 \times 10^{5} }$
$\newcommand{\NUMDetectableYeEDS}{1.6 \times 10^{5} }$
$\newcommand{\NUMDetectableJeEDS}{2.0 \times 10^{5} }$
$\newcommand{\NUMDetectableHeEDS}{2.3 \times 10^{5} }$
$\newcommand{\NUMDetectableOneFiltEDS}{2.4 \times 10^{5} }$
$\newcommand{\NUMDetectableAllFiltEDS}{1.6 \times 10^{5} }$
$\newcommand{\SDDetectableIeEDS}{3.8 \times 10^{3} }$
$\newcommand{\SDDetectableYeEDS}{3.2 \times 10^{3} }$
$\newcommand{\SDDetectableJeEDS}{4.0 \times 10^{3} }$
$\newcommand{\SDDetectableHeEDS}{4.5 \times 10^{3} }$
$\newcommand{\SDDetectableOneFiltEDS}{4.7 \times 10^{3} }$
$\newcommand{\SDDetectableAllFiltEDS}{3.1 \times 10^{3} }$
$\newcommand{\NUMSelectedEuclidOnlyTypeOneEWS}{4.8 \times 10^{6} }$
$\newcommand{\SDSelectedEuclidOnlyTypeOneEWS}{331 }$
$\newcommand{\CSelectedEuclidOnlyTypeOneEWS}{0.23}$
$\newcommand{\CTypeOneSelectedEuclidOnlyTypeOneEWS}{0.52}$
$\newcommand{\NUMSelectedEuclidLSSTTypeOneEWS}{5.7 \times 10^{6} }$
$\newcommand{\SDSelectedEuclidLSSTTypeOneEWS}{393 }$
$\newcommand{\CSelectedEuclidLSSTTypeOneEWS}{0.45}$
$\newcommand{\CTypeOneSelectedEuclidLSSTTypeOneEWS}{0.75}$
$\newcommand{\NUMSelectedEuclidLSSTAllEWS}{6.0 \times 10^{6} }$
$\newcommand{\SDSelectedEuclidLSSTAllEWS}{413 }$
$\newcommand{\CSelectedEuclidLSSTAllEWS}{0.51}$
$\newcommand{\CTypeOneSelectedEuclidLSSTAllEWS}{0.65}$
$\newcommand{\CTypeTwoSelectedEuclidLSSTAllEWS}{0.33}$
$\newcommand{\NUMSelectedEuclidOnlyTypeOneEDS}{1.7 \times 10^{4} }$
$\newcommand{\SDSelectedEuclidOnlyTypeOneEDS}{346 }$
$\newcommand{\CSelectedEuclidOnlyTypeOneEDS}{0.11}$
$\newcommand{\CTypeOneSelectedEuclidOnlyTypeOneEDS}{0.40}$
$\newcommand{\NUMSelectedEuclidLSSTTypeOneEDS}{2.0 \times 10^{4} }$
$\newcommand{\SDSelectedEuclidLSSTTypeOneEDS}{392 }$
$\newcommand{\CSelectedEuclidLSSTTypeOneEDS}{0.45}$
$\newcommand{\CTypeOneSelectedEuclidLSSTTypeOneEDS}{0.76}$
$\newcommand{\NUMSelectedEuclidLSSTAllEDS}{2.9 \times 10^{4} }$
$\newcommand{\SDSelectedEuclidLSSTAllEDS}{579 }$
$\newcommand{\CSelectedEuclidLSSTAllEDS}{0.32}$
$\newcommand{\CTypeOneSelectedEuclidLSSTAllEDS}{0.51}$
$\newcommand{\CTypeTwoSelectedEuclidLSSTAllEDS}{0.18}$</div>



<div id="title">

# $\Euclid$ preparation

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2405.18126-b31b1b.svg)](https://arxiv.org/abs/2405.18126)<mark>Appeared on: 2024-05-29</mark> -  _36 pages, 21 figures, submitted to A&A_

</div>
<div id="authors">

E. Collaboration, et al. -- incl., <mark>E. Bañados</mark>, <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** We forecast the expected population of active galactic nuclei (AGN) observable in the Euclid Wide Survey (EWS) and Euclid Deep Survey (EDS). Starting from an X-ray luminosity function (XLF) we generate volume-limited samples of the AGN expected in the $\Euclid$ survey footprints. Each AGN is assigned an SED appropriate for its X-ray luminosity and redshift, with perturbations sampled from empirical distributions. The photometric detectability of each AGN is assessed via mock observation of the assigned SED.We estimate 40 million AGN will be detectable in at least one $\Euclid$ band in the EWS and 0.24 million in the EDS, corresponding to surface densities of $\SDDetectableOneFiltEWS$ deg $^{-2}$ and $\SDDetectableOneFiltEDS$ deg $^{-2}$ .Employing $\Euclid$ -only colour selection criteria on our simulated data we select a sample of $\NUMSelectedEuclidOnlyTypeOneEWS$ ( $\SDSelectedEuclidOnlyTypeOneEWS$ deg $^{-2}$ ) AGN in the EWS and $\NUMSelectedEuclidOnlyTypeOneEDS$ ( $\SDSelectedEuclidOnlyTypeOneEDS$ deg $^{-2}$ ) in the EDS, amounting to 10 \% and 8 \% of the AGN detectable in the EWS and EDS.Including ancillary Rubin/LSST bands improves the completeness and purity of AGN selection. These data roughly double the total number of selected AGN to comprise 21 \% and 15 \% of the $\Euclid$ detectable AGN in the EWS and EDS.The total expected sample of colour-selected AGN contains 6.0 $\times$ 10 $^{6}$ (74 \% ) unobscured AGN and 2.1 $\times$ 10 $^{6}$ (26 \% ) obscured AGN, covering $0.02 \leq z \lesssim 5.2$ and $43 \leq \log_{10} (L_{\rm bol} / {\rm erg}  {\rm s}^{-1}) \leq 47$ .With this simple colour selection, expected surface densities are already comparable to the yield of modern X-ray and mid-infrared surveys of similar area.The relative uncertainty on our expectation for $\Euclid$ detectable AGN is 6.7 \% for the EWS and 12.5 \% for the EDS, driven by the uncertainty of the XLF.

</div>

<div id="div_fig1">

<img src="tmp_2405.18126/./figures/selectionsinfo.png" alt="Fig9" width="100%"/>

**Figure 9. -** Redshift distributions (top), redshift-dependent completeness (middle), and $L_{\rm bol}$-$z$ planes (bottom) for the selected AGN in the EWS (left) and EDS (right). AGN selected with \Euclid-only photometric criteria (blue) and the total selected sample defined as the union ($\cup$) of all \Euclid and ancillary $ugrz$ photometric criteria discussed in Sect. \ref{sec:euclidselection} are plotted for each panel. In all plots redshift is binned with width $\delta z = 0.5$. In the $L_{\rm bol}$-$z$ plane points represent the median, vertical lines represent $1\sigma$ standard deviation and horizontal lines represent the width of the redshift bin. (*fig:selectioninfo*)

</div>
<div id="div_fig2">

<img src="tmp_2405.18126/./figures/lf_consistency_refereeupdate.png" alt="Fig15" width="100%"/>

**Figure 15. -** Comparison of different AGN LFs homogenised to the 2--10 keV X-ray band. Corresponding absolute UV magnitudes at 1450 Å , $M_{1450}$, are displayed on the upper axes. In each panel the LFs are realised for the central redshift value. The hard X-ray LF of \citet{fotopoulou2016xlf} employed in this work is shown in black. The grey shaded regions depict the 1$\sigma$ uncertainty. For reference, we plot the \citet{fotopoulou2016xlf} XLF evaluated at $z=0.1$ as orange dotted lines in each panel. The hard XLF of \citet{ueda2014} is shown in green, with the green shaded regions corresponding to the 1$\sigma$ uncertainty generated with sampling from the published parameter uncertainties. The magenta lines portray the bolometric quasar LF of \citet{shen2020}, converted to the X-ray domain. In the final panel the \citet{jiang2016}$z>6$ SDSS quasar LF is represented by the blue curve. The solid orange curve gives the \citet{schindler2023}$z\sim6$ quasar LF derived from Pan-STARRS1 and SHELLQs observations. The red uncertainty interval represents eROSITA high-redshift constraints on the XLF \citep{wolf2021}. In all cases dashed curves and hatched uncertainty intervals indicate extrapolation. (*fig:lf_comparison*)

</div>
<div id="div_fig3">

<img src="tmp_2405.18126/./figures/filter_sed_zevo.png" alt="Fig6" width="100%"/>

**Figure 6. -** All SEDs (unobscured and obscured) assigned to AGN in this work, normalised at 1 \micron. The redshift evolution of the effective wavelength for each \Euclid filter over the redshift range probed in this work are depicted below as black lines. The seven SED classes in this figure are: Unobscured AGN (`Unobsc.'; blue), Passive (`PASS'; pink), Star-forming (`SFG'; brown), Starburst (`SB'; purple), High-luminosity obscured AGN (`QSO2'; orange), Seyfert 2 (`SEY2'; green), and Starburst-AGN composite (`SB-AGN'; red). (*fig:filter_sed_zevo*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2405.18126"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

81  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

5  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
