# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']
R. Hviding  ->  R. Hviding  |  ['R. Hviding']
H. Linz  ->  H. Linz  |  ['H. Linz']
T. Henning  ->  T. Henning  |  ['T. Henning']
M. Flock  ->  M. Flock  |  ['M. Flock']
R. Zhang  ->  R. Zhang  |  ['R. Zhang']
J. Li  ->  J. Li  |  ['J. Li']
K. El-Badry  ->  K. El-Badry  |  ['K. El-Badry']
Arxiv has 54 new papers today
          6 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/6 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2411.00088


extracting tarball to tmp_2411.00088...

 done.


bad escape \o at position 11


Retrieving document from  https://arxiv.org/e-print/2411.00091


extracting tarball to tmp_2411.00091...

 done.
  1: tmp_2411.00091/aassymbols.tex, 579 lines
  2: tmp_2411.00091/pucha2024.tex, 1,015 lines



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


Found 152 bibliographic references in tmp_2411.00091/pucha2024.bbl.
Issues with the citations
syntax error in line 697: '=' expected
Retrieving document from  https://arxiv.org/e-print/2411.00277


extracting tarball to tmp_2411.00277...

 done.


H. Linz  ->  H. Linz  |  ['H. Linz']
T. Henning  ->  T. Henning  |  ['T. Henning']
M. Flock  ->  M. Flock  |  ['M. Flock']


Unable to locate Ghostscript on paths


Retrieving document from  https://arxiv.org/e-print/2411.00333
extracting tarball to tmp_2411.00333...

 done.
Retrieving document from  https://arxiv.org/e-print/2411.00619


extracting tarball to tmp_2411.00619...

 done.
Retrieving document from  https://arxiv.org/e-print/2411.00654


extracting tarball to tmp_2411.00654...

 done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.00091-b31b1b.svg)](https://arxiv.org/abs/2411.00091) | **Tripling the Census of Dwarf AGN Candidates Using DESI Early Data**  |
|| R. Pucha, et al. -- incl., <mark>R. Hviding</mark> |
|*Appeared on*| *2024-11-04*|
|*Comments*| *35 pages, 22 figures, Submitted to AAS Journals, Comments are welcome*|
|**Abstract**|            Using early data from the Dark Energy Spectroscopic Instrument (DESI) survey, we search for AGN signatures in 410,757 line-emitting galaxies. By employing the BPT emission-line ratio diagnostic diagram, we identify AGN in 75,928/296,261 ($\approx$25.6%) high-mass ($\log (M_{\star}/\rm M_{\odot}) >$ 9.5) and 2,444/114,496 ($\approx$2.1%) dwarf ($\log (M_{\star}/\rm M_{\odot}) \leq$ 9.5) galaxies. Of these AGN candidates, 4,181 sources exhibit a broad H$\alpha$ component, allowing us to estimate their BH masses via virial techniques. This study more than triples the census of dwarf AGN as well as that of intermediate-mass black hole (IMBH; $M_{\rm BH} \le 10^6~\rm M_{\odot}$) candidates, spanning a broad discovery space in stellar mass (7 $< \log (M_{\star}/\rm M_{\odot}) <$ 12) and redshift (0.001 $< \rm z <$ 0.45). The observed AGN fraction in dwarf galaxies ($\approx$2.1%) is nearly four times higher than prior estimates, primarily due to DESI's smaller fiber size, which enables the detection of lower luminosity dwarf AGN candidates. We also extend the $M_{\rm BH}$ - $M_{\star}$ scaling relation down to $\log (M_{\star}/\rm M_{\odot}) \approx$ 8.5 and $\log (M_{\rm BH}/M_{\odot}) \approx$ 4.4, with our results aligning well with previous low-redshift studies. The large statistical sample of dwarf AGN candidates from current and future DESI releases will be invaluable for enhancing our understanding of galaxy evolution at the low-mass end of the galaxy mass function.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.00333-b31b1b.svg)](https://arxiv.org/abs/2411.00333) | **Multi-Layer Perceptron for Predicting Galaxy Parameters (MLP-GaP): stellar masses and star formation rates**  |
|| X. Guo, G. Fang, H. Feng, <mark>R. Zhang</mark> |
|*Appeared on*| *2024-11-04*|
|*Comments*| *13 pages, 6 figures, 3 tables. Accepted in Research in Astronomy and Astrophysics*|
|**Abstract**|            The large-scale imaging survey will produce massive photometric data in multi-bands for billions of galaxies. Defining strategies to quickly and efficiently extract useful physical information from this data is mandatory. Among the stellar population parameters for galaxies, their stellar masses and star formation rates (SFRs) are the most fundamental. We develop a novel tool, \textit{Multi-Layer Perceptron for Predicting Galaxy Parameters} (MLP-GaP), that uses a machine-learning (ML) algorithm to accurately and efficiently derive the stellar masses and SFRs from multi-band catalogs. We first adopt a mock dataset generated by the \textit{Code Investigating GALaxy Emission} (CIGALE) for training and testing datasets. Subsequently, we used a multi-layer perceptron model to build MLP-GaP and effectively trained it with the training dataset. The results of the test performed on the mock dataset show that MLP-GaP can accurately predict the reference values. Besides MLP-GaP has a significantly faster processing speed than CIGALE. To demonstrate the science-readiness of the MLP-GaP, we also apply it to a real data sample and compare the stellar masses and SFRs with CIGALE. Overall, the predicted values of MLP-GaP show a very good consistency with the estimated values derived from SED fitting. Therefore, the capability of MLP-GaP to rapidly and accurately predict stellar masses and SFRs makes it particularly well-suited for analyzing huge amounts of galaxies in the era of large sky surveys.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.00619-b31b1b.svg)](https://arxiv.org/abs/2411.00619) | **The Flattest Infrared Extinction Curve in Four Isolated Dense Molecular Cloud Cores**  |
|| <mark>J. Li</mark>, et al. |
|*Appeared on*| *2024-11-04*|
|*Comments*| *Accepted for publication in The Astrophysical Journal Letters (15 pages, 8 figures, 3 tables)*|
|**Abstract**|            The extinction curve of interstellar dust in the dense molecular cloud cores is crucial for understanding dust properties, particularly size distribution and composition. We investigate the infrared extinction law in four nearby isolated molecular cloud cores, L429, L483, L673, and L1165, across the 1.2 - 8.0 $\mu$m wavelength range, using deep near-infrared (NIR) and mid-infrared (MIR) photometric data from UKIDSS and Spitzer Space Telescope. These observations probe an unprecedented extinction depth, reaching $A_V\sim$ 40-60 mag in these dense cloud cores. We derive color-excess ratios $E(K-\lambda)/E(H-K)$ by fitting color-color diagrams of $(K-\lambda)$ versus $(H-K)$, which are subsequently used to calculate the extinction law $A_\lambda/A_K$. Our analysis reveals remarkably similar and exceptionally flat infrared extinction curves for all four cloud cores, exhibiting the most pronounced flattening reported in the literature to date. This flatness is consistent with the presence of large dust grains, suggesting significant grain growth in dense environments. Intriguingly, our findings align closely with the Astrodust model for a diffuse interstellar environment proposed by Hensley \& Draine. This agreement between dense core observations and a diffuse medium model highlights the complexity of dust evolution and the need for further investigation into the processes governing dust properties in different interstellar environments.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.00654-b31b1b.svg)](https://arxiv.org/abs/2411.00654) | **A Fast, Analytic Empirical Model of the Gaia Data Release 3 Astrometric Orbit Catalog Selection Function**  |
|| C. Y. Lam, <mark>K. El-Badry</mark>, J. D. Simon |
|*Appeared on*| *2024-11-04*|
|*Comments*| *24 pages, 14 figures, 2 appendices. Submitted to ApJ, comments welcome. Selection function code will be available here: this https URL*|
|**Abstract**|            In June 2022, the Gaia mission released a catalog of astrometric orbital solutions for 168,065 binary systems, by far the largest such catalog to date. Unlike previous binary stars catalogs, which were heterogeneous collections of orbits from different surveys and instruments, these orbits were derived using Gaia data alone. Despite this homogeneity, the selection function is difficult to characterize because of choices made in the construction of the catalog. Understanding the catalog's selection function is required to model and interpret its contents. We use a combination of analytic and empirical prescriptions to construct a function that computes the probability that a binary with a given set of properties would have been published in the Gaia Data Release 3 astrometric orbit catalog. We also construct a binary population synthesis model based on Moe & Di Stefano (2017) to validate our characterization of the selection function, finding good agreement with the actual Gaia NSS catalog, with the exception of the orbital eccentricity distribution. The NSS catalog suggests high-eccentricity orbits are relatively uncommon at intermediate periods $100 \lesssim P_{orb} \lesssim 1000$ days. As an example application of the selection function, we estimate the Gaia DR3 detection probabilities of the star + BH binaries Gaia BH1, BH2, and BH3. We also estimate the population of Sun-like star + BH binaries in the Galaxy to be $\sim 5000$ for $100 < P_{orb} < 400$ day, $\lesssim 2,000$ for $400 < P_{orb} < 1000$ day, and $ \lesssim 20,000$ for $1000 < P_{orb} < 2000$ days.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.00277-b31b1b.svg)](https://arxiv.org/abs/2411.00277) | **Dust mass in protoplanetary disks with porous dust opacities**  |
|| Y. Liu, et al. -- incl., <mark>H. Linz</mark>, <mark>T. Henning</mark>, <mark>M. Flock</mark> |
|*Appeared on*| *2024-11-04*|
|*Comments*| *11 pages, 7 figures, Accepted for publication in A&A*|
|**Abstract**|            ALMA surveys have suggested that protoplanetary disks are not massive enough to form the known exoplanet population, under the assumption that the millimeter continuum emission is optically thin. In this work, we investigate how the mass determination is influenced when the porosity of dust grains is considered in radiative transfer models. The results show that disks with porous dust opacities yield similar dust temperature, but systematically lower millimeter fluxes compared to disks incorporating compact dust grains. Moreover, we recalibrate the relation between dust temperature and stellar luminosity for a wide range of stellar parameters, and calculate the dust masses of a large sample of disks using the traditionally analytic approach. The median dust mass from our calculation is about 6 times higher than the literature result, and this is mostly driven by the different opacities of porous and compact grains. A comparison of the cumulative distribution function between disk dust masses and exoplanet masses show that the median exoplanet mass is about 2 times lower than the median dust mass, if grains are porous, and there are no exoplanetary systems with masses higher than the most massive disks. Our analysis suggests that adopting porous dust opacities may alleviate the mass budget problem for planet formation. As an example illustrating the combined effects of optical depth and porous dust opacities on the mass estimation, we conduct new IRAM/NIKA-2 observations toward the IRAS 04370+2559 disk and perform a detailed radiative transfer modeling of the spectral energy distribution. The best-fit dust mass is roughly 100 times higher than the value from the traditionally analytic calculation. Future spatially resolved observations at various wavelengths are required to better constrain the dust mass.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error Unable to locate Ghostscript on paths</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.00088-b31b1b.svg)](https://arxiv.org/abs/2411.00088) | **A generative model for Gaia astrometric orbit catalogs: selection functions for binary stars, giant planets, and compact object companions**  |
|| <mark>K. El-Badry</mark>, et al. -- incl., <mark>H.-W. Rix</mark> |
|*Appeared on*| *2024-11-04*|
|*Comments*| *22 pages, 17 figures, accepted to OJAp. Code at this https URL*|
|**Abstract**|            Astrometry from Gaia DR3 has produced a sample of $\sim$170,000 Keplerian orbital solutions, with many more anticipated in the next few years. These data have enormous potential to constrain the population of binary stars, giant planets, and compact objects in the Solar neighborhood. But in order to use the published orbit catalogs for statistical inference, it is necessary to understand their selection function: what is the probability that a binary with a given set of properties ends up in a catalog? We show that such a selection function for the Gaia DR3 astrometric binary catalog can be forward-modeled from the Gaia scanning law, including individual 1D astrometric measurements, the fitting of a cascade of astrometric models, and quality cuts applied in post-processing. We populate a synthetic Milky Way model with binary stars and generate a mock catalog of astrometric orbits. The mock catalog is quite similar to the DR3 astrometric binary sample, suggesting that our selection function is a sensible approximation of reality. Our fitting also produces a sample of spurious astrometric orbits similar to those found in DR3; these are mainly the result of scan angle-dependent astrometric biases in marginally resolved wide binaries. We show that Gaia's sensitivity to astrometric binaries falls off rapidly at high eccentricities, but only weakly at high inclinations. We predict that DR4 will yield $\sim 1$ million astrometric orbits, mostly for bright ($G \lesssim 15$) systems with long periods ($P_{\rm orb} \gtrsim 1000$ d). We provide code to simulate and fit realistic Gaia epoch astrometry for any data release and determine whether any hypothetical binary would receive a cataloged orbital solution.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error bad escape \o at position 11</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2411.00091/./07_agnfrac_mstar.png', 'tmp_2411.00091/./11_mbh-mstar.png', 'tmp_2411.00091/./04_bpt_all.png']
copying  tmp_2411.00091/./07_agnfrac_mstar.png to _build/html/
copying  tmp_2411.00091/./11_mbh-mstar.png to _build/html/
copying  tmp_2411.00091/./04_bpt_all.png to _build/html/
exported in  _build/html/2411.00091.md
    + _build/html/tmp_2411.00091/./07_agnfrac_mstar.png
    + _build/html/tmp_2411.00091/./11_mbh-mstar.png
    + _build/html/tmp_2411.00091/./04_bpt_all.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\todo}[1]{\noindent \textcolor{red}{{ [TODO:~{#1}]}}}$
$\newcommand$
$\newcommand$
$\newcommand$
$\newcommand{\lya}{\textrm{Ly}\ensuremath{\alpha}}$
$\newcommand{\cii}{\textrm{C}\textsc{ii}]}$
$\newcommand{\ciii}{\textrm{C}\textsc{iii}]}$
$\newcommand{\civ}{\textrm{C}\textsc{iv}}$
$\newcommand{\ha}{\textrm{H}\ensuremath{\alpha}}$
$\newcommand{\hb}{\textrm{H}\ensuremath{\beta}}$
$\newcommand{\hgam}{\textrm{H}\ensuremath{\gamma}}$
$\newcommand{\oi}{[\textrm{O} \textsc{i}]}$
$\newcommand{\oii}{[\textrm{O} \textsc{ii}]}$
$\newcommand{\oiilam}{[\textrm{O}~\textsc{ii}]\ensuremath{\lambda\lambda}3726,3729}$
$\newcommand{\oiii}{[\textrm{O}\textsc{iii}]}$
$\newcommand{\nii}{[\textrm{N}\textsc{ii}]}$
$\newcommand{\sii}{[\textrm{S}\textsc{ii}]}$
$\newcommand{\mgii}{\textrm{Mg}\textsc{ii}}$
$\newcommand{\oiiilam}{[\textrm{O}\textsc{iii}]\ensuremath{\lambda}5007}$
$\newcommand{\niilam}{[\textrm{N}\textsc{ii}]\ensuremath{\lambda}6584}$
$\newcommand{\nevlam}{[\textrm{Ne}\textsc{iv}]\ensuremath{\lambda}3425}$
$\newcommand{\oilam}{[\textrm{O}\textsc{i}]\ensuremath{\lambda}6300}$
$\newcommand{\siilam}{[\textrm{S}\textsc{ii}]\ensuremath{\lambda\lambda}6717,6731}$
$\newcommand{\niilamlam}{[\textrm{N}\textsc{ii}]\ensuremath{\lambda\lambda}6548,6584}$
$\newcommand{\zspec}{z_{\rm spec}}$
$\newcommand{\mbh}{{M}\ensuremath{_{\rm BH}}}$
$\newcommand{\mstar}{{M}\ensuremath{_{\star}}}$
$\newcommand{\msun}{{\rm M}\ensuremath{_{\odot}}}$
$\newcommand{\logmass}{\ensuremath{\log (\mstar/\msun)}}$
$\newcommand{\logmbh}{\ensuremath{\log (\mbh/\msun)}}$
$\newcommand{\ergscmsq}{\textrm{ergs s^{-1} cm^{-2}}}$
$\newcommand{\imtxt}[1]{\textcolor{magenta}{#1}}$
$\newcommand{\imsout}[1]{\textcolor{magenta}{\sout{#1}}}$
$\newcommand{\deg}{^{\circ}}$</div>



<div id="title">

# Tripling the Census of Dwarf AGN Candidates Using DESI Early Data

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2411.00091-b31b1b.svg)](https://arxiv.org/abs/2411.00091)<mark>Appeared on: 2024-11-04</mark> -  _35 pages, 22 figures, Submitted to AAS Journals, Comments are welcome_

</div>
<div id="authors">

Ragadeepika~Pucha, et al.

</div>
<div id="abstract">

**Abstract:** Using early data from the Dark Energy Spectroscopic Instrument (DESI) survey, we search for AGN signatures in 410,757 line-emitting galaxies. By employing the BPT emission-line ratio diagnostic diagram, we identify AGN in 75,928/296,261 ( $\approx$ 25.6 \% ) high-mass ( $\logmass >$ 9.5) and 2,444/114,496 ( $\approx$ 2.1 \% ) dwarf ( $\logmass \leq$ 9.5) galaxies. Of these AGN candidates, 4,181 sources exhibit a broad $\ha$ component, allowing us to estimate their BH masses via virial techniques. This study more than triples the census of dwarf AGN as well as that of intermediate-mass black hole (IMBH; $\mbh \le 10^6 \msun$ ) candidates, spanning a broad discovery space in stellar mass (7 $< \logmass <$ 12) and redshift (0.001 $< \rm z <$ 0.45). The observed AGN fraction in dwarf galaxies ( $\approx$ 2.1 \% ) is nearly four times higher than prior estimates, primarily due to DESI's smaller fiber size, which enables the detection of lower luminosity dwarf AGN candidates. We also extend the $\mbh - \mstar$ scaling relation down to $\logmass \approx$ 8.5 and $\logmbh \approx$ 4.4, with our results aligning well with previous low-redshift studies. The large statistical sample of dwarf AGN candidates from current and future DESI releases will be invaluable for enhancing our understanding of galaxy evolution at the low-mass end of the galaxy mass function.

</div>

<div id="div_fig1">

<img src="tmp_2411.00091/./07_agnfrac_mstar.png" alt="Fig12" width="100%"/>

**Figure 12. -** BPT-AGN Fraction as a function of stellar mass: The fraction of BPT-AGN candidates in line-emitting galaxies is shown as red squares, while the fraction of BPT-AGN candidates considering all galaxies is shown as black circles. _ Right:_ BPT-AGN Fraction as a function of stellar mass in the dwarf galaxy regime. The overall observed BPT-AGN fraction from our study and \citet{Reines+2013} are shown as a dashed-dotted red line and dashed pink line, respectively. (*fig:agnfrac-mstar*)

</div>
<div id="div_fig2">

<img src="tmp_2411.00091/./11_mbh-mstar.png" alt="Fig14" width="100%"/>

**Figure 14. -** $\mbh - $\mstar$$ scaling relation of _ confident_ BL-AGN candidates (_ Left_) and all BL-AGN candidates(_ Right_), excluding point sources. The color bar displays the number of sources within each bin of the 2D distribution. In each of the panels, our empirical fits for confident and all BL-AGN candidates are plotted as solid and dashed red lines, respectively. The fits from \citet{Reines&Volonteri2015} and \citet{Suh+2020} are shown as black and orange lines, respectively. (*fig:mbh-mstar*)

</div>
<div id="div_fig3">

<img src="tmp_2411.00091/./04_bpt_all.png" alt="Fig10" width="100%"/>

**Figure 10. -** BPT $\oi$ii/$\hb$ vs $\nii$/$\ha$ narrow-line diagnostic diagram for high-mass ($\logmass > 9.5$; _ Top_) and dwarf ($\logmass \leq 9.5$; _ Bottom_) galaxies, divided as NL (_ Left_) and BL (_ Right_) candidates. The solid line in all the panels is from \citet{Kauffmann+2003}, which separates the pure star-forming sources and those with AGN contribution. The dashed line represents the "maximum starburst line" using stellar photoionization models \citep{Kewley+2001}. (*fig:bpt_all*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2411.00091"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

238  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

9  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

1  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
