# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

X. Zhang  ->  X. Zhang  |  ['X. Zhang']
A. Hughes  ->  A. Hughes  |  ['A. Hughes']
Arxiv has 91 new papers today
          2 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/2 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2504.10032


extracting tarball to tmp_2504.10032...

 done.
Retrieving document from  https://arxiv.org/e-print/2504.10145


extracting tarball to tmp_2504.10145...

 done.


A. Hughes  ->  A. Hughes  |  ['A. Hughes']


Found 78 bibliographic references in tmp_2504.10145/main.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2504.10145-b31b1b.svg)](https://arxiv.org/abs/2504.10145) | **Estimating the dense gas mass of molecular clouds using spatially unresolved 3 mm line observations**  |
|| A. Zakardjian, et al. -- incl., <mark>A. Hughes</mark> |
|*Appeared on*| *2025-04-15*|
|*Comments*| *18 pages, 16 figures, submitted to A&A*|
|**Abstract**|            We aim to develop a new method to infer the sub-beam probability density function (PDF) of H2 column densities and the dense gas mass within molecular clouds using spatially unresolved observations of molecular emission lines in the 3 mm band. We model spatially unresolved line integrated intensity measurements as the average of an emission function weighted by the sub-beam column density PDF. The emission function, which expresses the line integrated intensity as a function of the gas column density, is an empirical fit to high resolution (< 0.05 pc) multi-line observations of the Orion B molecular cloud. The column density PDF is assumed to be parametric, composed of a lognormal distribution at moderate column densities and a power law distribution at higher column densities. To estimate the sub-beam column density PDF, the emission model is combined with a Bayesian inversion algorithm (the Beetroots code), which takes account of thermal noise and calibration errors. We validate our method by demonstrating that it recovers the true column density PDF of the Orion B cloud, reproducing the observed emission line integrated intensities. We apply the method to 12CO(J=1-0), 13CO(J=1-0), C18O(J=1-0), HCN(J=1-0), HCO+(J=1-0) and N2H+(J=1-0) observations of a 700 x 700 pc2 field of view (FoV) in the nearby galaxy M51. On average, the model reproduces the observed intensities within 30%. The column density PDFs obtained for the spiral arm region within our test FoV are dominated by a power-law tail at high column densities, with slopes that are consistent with gravitational collapse. Outside the spiral arm, the column density PDFs are predominantly lognormal, consistent with supersonic isothermal turbulence. We calculate the mass associated with the powerlaw tail of the column density PDFs and observe a strong, linear correlation between this mass and the 24$\mu$m surface brightness.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2504.10032-b31b1b.svg)](https://arxiv.org/abs/2504.10032) | **Broadband Polarized Radio Emission Detected from Starlink Satellites Below 100 MHz with NenuFAR**  |
|| <mark>X. Zhang</mark>, et al. |
|*Appeared on*| *2025-04-15*|
|*Comments*| **|
|**Abstract**|            This study evaluates the impact of Starlink satellites on low-frequency radio astronomy below 100 MHz, focusing on challenges on data processing and scientific goals. We conducted 40 hours of imaging observations using NenuFAR, in the 30.8-78.3 MHz range. Observations included both targeted tracking of specific satellites based on orbital predictions and untargeted searches focused on high-elevation regions of the sky. Images in total intensity and polarimetry were obtained, and full Stokes dynamic spectra were generated for several hundred directions within the Field of View. Detected signals were cross-matched with satellite orbital data to confirm satellite associations. Detailed analyses of the observed spectra, polarization, and temporal characteristics were performed to investigate the origin and properties of the detected emissions. We detected broadband emissions from Starlink satellites, predominantly between 54-66 MHz, with flux densities exceeding 500 Jy. These signals are highly polarized and unlikely to originate from ground-based RFI or reflected astronomical sources. Instead, they are likely intrinsic to the satellites, with distinct differences in emission properties observed between satellite generations. These findings highlight significant challenges to data processing and scientific discoveries at these low frequencies, emphasizing the need for effective mitigation strategies, particularly through collaboration between astronomers and satellite operators.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2504.10145/./figures/N0_sigv_emission_model.jpg', 'tmp_2504.10145/./figures/degen_sigma.jpg', 'tmp_2504.10145/./figures/degen_sigma_rthres.jpg']
copying  tmp_2504.10145/./figures/N0_sigv_emission_model.jpg to _build/html/
copying  tmp_2504.10145/./figures/degen_sigma.jpg to _build/html/
copying  tmp_2504.10145/./figures/degen_sigma_rthres.jpg to _build/html/
exported in  _build/html/2504.10145.md
    + _build/html/tmp_2504.10145/./figures/N0_sigv_emission_model.jpg
    + _build/html/tmp_2504.10145/./figures/degen_sigma.jpg
    + _build/html/tmp_2504.10145/./figures/degen_sigma_rthres.jpg


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\tweco}{^{12}CO(J=1\rightarrow0)}$
$\newcommand{\thico}{^{13}CO(J=1\rightarrow0)}$
$\newcommand{\eigco}{C^{18}O(J=1\rightarrow0)}$
$\newcommand{\hcn}{HCN(J=1\rightarrow0)}$
$\newcommand{\hnc}{HNC(J=1\rightarrow0)}$
$\newcommand{\so}{SO(J_K=3_2\rightarrow2_1)}$
$\newcommand{\cs}{^{12}CS(J=2\rightarrow1)}$
$\newcommand{\hcop}{HCO^+(J=1\rightarrow0)}$
$\newcommand{\nhp}{N_2H^+(J=1\rightarrow0)}$
$\newcommand{\N}{N_0}$
$\newcommand{\NH}{N_{\rm{H_2}}}$
$\newcommand{\addref}{\textcolor{red}{ref}}$
$\newcommand{\rthres}{r_{\rm{thresh}}}$
$\newcommand{\rthresh}{r_{\rm{thresh}}}$
$\newcommand{\todo}[1]{\textbf{\textcolor{red}{TODO:}} \textcolor{red}{#1}}$
$\newcommand{\modifi}[1]{\textcolor{black}{#1}}$
$\newcommand{\modifj}[1]{\textcolor{black}{#1}}$
$\newcommand{\VertCaption}[2]{$
$  \rotatebox{90}{\parbox{#1}{$
$      \centering  \textbf{#2}}}}$
$\newcommand{\HoriCaption}[2]{$
$  \parbox{#1}{\centering  \textbf{#2}}}$
$\newcommand{\FigEmissionFunction}{$
$    \begin{figure}$
$        \centering$
$        \includegraphics[width = 1\linewidth]{figures/NH2_Int_pl.jpg}$
$    \caption{Binned trends of line integrated intensity as a function of column density. The data is binned in 30 equally sized bins of column density. Black circles correspond to the bin average, while the grey shading indicates the standard deviation in each bin. The black solid line is a smoothly varying double PL fit to the trends, specific to each emission line. The red dashed line shows for comparison the empirical fit to nearby clouds by [Tafalla, Usero and Hacar (2023)](), assuming a kinetic temperature of 11 K. Each panel shows a different emission line: \tweco , \thico  and \hcop  (left to right, top row);  \eigco , \hcn  and \cs  (middle); \hnc  \so  and \nhp  (bottom). The standard Milky Way CO-to-H_2 conversion factor and its typical uncertainty  ([Bolatto, Wolfire and Leroy 2013]())  is indicated in the top left panel. An \hcn  dense gas conversion factor of 60 M_\odot (K km s^{-1})^{-1} is indicated in the central panel. }$
$    \label{fig:emission-function}$
$    \end{figure}$
$}$
$\newcommand{\PostPDFExampleCorr}{$
$    \begin{figure}$
$        \centering$
$        \includegraphics[width = 1\linewidth]{figures/beetroots_test_orionb_post_pdf.jpg}$
$    \caption{Two-dimensional projections of the posterior PDF in the form of a scatter plot matrix. The matrix's diagonal shows the posterior PDF of each estimated parameter. The MAP estimation is represented as a vertical cyan line on the histograms and as a cyan square in the scatter plot. The true N-PDF parameters obtained by fitting the dust derived Orion B N-PDF is shown are shown as red crosses. The black dashed line show the range in PL index \alpha of the N-PDF expected for gravitational collapse. The estimations closely match the reference values, although clear degeneracies are present in the posterior PDF. }$
$    \label{fig:orionb-post-pdf-corr}$
$    \end{figure}$
$}$
$\newcommand{\PostPDFExample}{$
$    \begin{figure}$
$        \centering$
$        \includegraphics[width = 0.8\linewidth]{figures/beetroots_test_OrionB.jpg}$
$    \caption{A comparison of the reference and estimated$
$     N-PDFs when inverting the N-PDF on the spatially and spectrally$
$     averaged ORION-B data. The thick red line indicates the N-PDF as a histogram constructed directly from the dust-derived Orion B column densities, and the green line represents a \chi^2 fit to the red histogram. The estimated N-PDFs from the 10 000 MCMC iterations to sample the Bayesian posterior are shown with blue$
$     circles. The dashed orange line is the MAP estimation for the N-PDF. The vertical dotted black line indicates the limit below which the line intensities$
$     predicted by the emission function fall below the typical noise level$
$     of the data, that is 0.1 K km s^{-1}.}$
$    \label{fig:otrionb-post-pdf}$
$    \end{figure}$
$}$
$\newcommand{\ParamsMap}{$
$    \begin{figure}$
$        \centering$
$        \includegraphics[width = 1\linewidth]{figures/map_params_arrow.png}$
$    \caption{MAP estimations of the sub-beam N-PDF parameters across our M51 test region. Clockwise from top left, the panels show the mean column density of the LN part of N-PDF (N_0), the width of the log-normal (\sigma), the power-law index (\alpha), and the column density of transition between the log-normal and power-law parts of the N-PDF (\rthres). Red contours in each panel indicate \thico  integrated intensities of 4~and 12~K km s^{-1} (dashed and solid contours, respectively). The white arrow indicates the direction to the galactic centre. To first order, the gas is denser and more gravitationally unstable inside the spiral arm$
$     than outside the arm.$
$  }$
$  \label{fig:m51:param-map}$
$    \end{figure}$
$}$
$\newcommand{\MdenseMap}{$
$    \begin{figure}$
$        \centering$
$        \includegraphics[width = 1\linewidth]{figures/map_Mdense.jpg}$
$    \caption{The spatial distribution of$
$     the mass of dense gas (left), the gas mass in the power-law$
$     part of the N-PDF (middle), and the 24 \mum surface brightness in our M51 target region. We use the 24 \mum emission as a proxy for star formation. The masses are derived from the MAP estimate of the N-PDF, using equations$
$     \ref{eq:sigma_dense} and \ref{eq:sigma_pl}. The red contours are the samed as in Figure~\ref{fig:m51:param-map}. The masses of dense and PL gas appear highly correlated, with a similar spatial distribution as the 24\mum emission. }$
$    \label{fig:m51:Mdense-map}$
$    \end{figure}$
$}$
$\newcommand{\MdenseSFR}{$
$    \begin{figure}$
$        \centering$
$        \includegraphics[width = 1\linewidth]{figures/mdense_mpl_I24.jpg} \     \caption{Correlation between the 24\mum integrated intensity and the mass of dense gas (\textit{left}) and mass of gas in the power-law part of the N-PDF (\textit{right}) for pixels within our M51 test region. Each data point corresponds to a pixel within our field. The symbol size and grey shading represent f_{\rm PL}, the mass fraction of the gas in the power-law part of the N-PDF. Symbols with a red outline identify pixels where f_\text{PL} \geq 25\% and the slope of the power-law \alpha \in [2.5,5]. The dotted line is a linear fit to the pixels where f_\text{PL} < 5\%. The thick red line is a fit to the points where f_\text{PL} \geq 25\% and \alpha \in [2.5,5]. The latter fit has a correlation coefficient r = 0.85 and slope s = 1.0.}$
$    \label{fig:m51:mdense-sfr-corr}$
$    \end{figure}$
$}$
$\newcommand{\FigRatiosNSig}{$
$    \begin{figure*}[h]$
$        \centering$
$        \includegraphics[width = 0.9\linewidth]{figures/N0_sigv_emission_model.jpg}$
$    \caption{Model predicted line integrated intensities ratios over \tweco  as a function \sigma and N_0 for a purely LN N-PDF. Top row shows the ratios of \thico , \hcop , \hnc  and \hcn  over \tweco , from left to right. In this top row dashed lines show ratio isocontours increasing by factors of two. Bottom row shows the ratios of \cs , \eigco , \so  and \nhp  over \tweco , from left to right. In the bottom row isocontours represent factors of tens.}$
$    \label{fig:emission-function:N0-sig0}$
$    \end{figure*}$
$}$
$\newcommand{\FigRatiosAlphaN}{$
$    \begin{figure*}[h]$
$        \centering$
$        \includegraphics[width = 0.9\linewidth]{figures/ralpha_N0_emission_model.jpg}$
$    \caption{Same as Figure~\ref{fig:emission-function:N0-sig0}, except that the N-PDF is composed of a LN and a PL, with varying N_0 and \alpha while r_{\rm{thres}} and \sigma  are fixed to r_{\rm{thres}} = 3 and \sigma=0.8}$
$    \label{fig:emission-function:N0-alpha}$
$    \end{figure*}$
$}$
$\newcommand{\FigRatiosAlphaSig}{$
$    \begin{figure*}[h]$
$        \centering$
$        \includegraphics[width = 0.9\linewidth]{figures/ralpha_sigv_emission_model.jpg}$
$    \caption{Same as Figure~\ref{fig:emission-function:N0-sig0}, except that the N-PDF is composed of a LN and a PL, with varying \sigma and \alpha while r_{\rm{thres}} and N_0 are fixed to N_0 = 5\times10^{22} cm^{-2} and r_{\rm{thres}}=5.}$
$    \label{fig:emission-function:alpha-sig0}$
$    \end{figure*}$
$}$
$\newcommand{\FigRatiosRthresSig}{$
$    \begin{figure*}[h]$
$        \centering$
$        \includegraphics[width = 0.9\linewidth]{figures/rthres_sigv_emission_model.jpg}$
$    \caption{Same as Figure~\ref{fig:emission-function:N0-sig0}, except that the N-PDF is composed of a LN and a PL, with varying r_{\rm{thres}} and \sigma while \alpha and N_0 are fixed to N_0 = 5\times10^{22} cm^{-2} and \alpha=2.}$
$    \label{fig:emission-function:rthres-sig0}$
$    \end{figure*}$
$}$
$\newcommand{\FigRatiosRthresAlpha}{$
$    \begin{figure*}[h]$
$        \centering$
$        \includegraphics[width = 0.9\linewidth]{figures/rthres_ralpha_emission_model.jpg}$
$    \caption{Same as Figure~\ref{fig:emission-function:N0-sig0}, except that the N-PDF is composed of a LN and a PL, with varying r_{\rm{thres}} and \alpha while \sigma and N_0 are fixed to N_0 = 5\times10^{22} cm^{-2} and \sigma=0.6.}$
$    \label{fig:emission-function:rthres-alpha}$
$    \end{figure*}$
$}$</div>



<div id="title">

# Estimating the dense gas mass of molecular clouds using spatially unresolved 3 mm line observations

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2504.10145-b31b1b.svg)](https://arxiv.org/abs/2504.10145)<mark>Appeared on: 2025-04-15</mark> -  _18 pages, 16 figures, submitted to A&A_

</div>
<div id="authors">

A. Zakardjian, et al. -- incl., <mark>A. Hughes</mark>

</div>
<div id="abstract">

**Abstract:** Emission lines such as $\hcn$ are commonly used by extragalactic studies to trace high density molecular gas (n ${_{\rm{H_2}}}> \sim10^{4}$ cm $^{-3}$ ). Recent Milky Way studies have challenged their utility as unambiguous dense gas tracers, suggesting that a large fraction of their emission in nearby clouds is excited in low density gas. We aim to develop a new method to infer the sub-beam probability density function (PDF) of $H_2$ column densities and the dense gas mass within molecular clouds using spatially unresolved observations of molecular emission lines in the 3 mm band. We model spatially unresolved line integrated intensity measurements as the average of an emission function weighted by the sub-beam column density PDF. The emission function, which expresses the line integrated intensity as a function of the gas column density, is an empirical fit to high resolution ( $<0.05$ pc) multi-line observations of the Orion B molecular cloud. The column density PDF is assumed to be parametric, composed of a lognormal distribution at moderate column densities and a power law distribution at higher column densities. To estimate the sub-beam column density PDF, the emission model is combined with a Bayesian inversion algorithm (implemented in the Beetroots code), which takes account of thermal noise and calibration errors. $\modifi{We validate our method by demonstrating that it recovers the true column density PDF of the Orion B cloud, reproducing the observed emission line integrated intensities within noise and calibration uncertainties.    We apply the method to \tweco , \thico , \eigco , \hcn, \hcop  and \nhp  observations of a $700\times700$ pc$^2$ field of view (FoV) in the nearby galaxy M51. On average, the model reproduces the observed intensities within 30\%.    The column density PDFs obtained for the spiral arm region within our test FoV are dominated by a power-law tail at high column densities, with slopes that are consistent with gravitational collapse. Outside the spiral arm, the column density PDFs are predominantly lognormal, consistent with supersonic isothermal turbulence setting the dynamical state of the molecular gas.    We calculate the mass associated with the power-law tail of the column density PDFs and observe a strong, linear correlation between this mass and the 24$\mu$m surface brightness.}$ $\modifi{Our method is a promising approach to infer the physical conditions within extragalactic molecular clouds using spectral line observations that are feasible with current millimetre facilities. Future work will extend the method to include additional physical parameters that are relevant for the dynamical state and star formation activity of molecular clouds.}$

</div>

<div id="div_fig1">

<img src="tmp_2504.10145/./figures/N0_sigv_emission_model.jpg" alt="Fig12" width="100%"/>

**Figure 12. -** Model predicted line integrated intensities ratios over $\tweco$  as a function $\sigma$ and $N_0$ for a purely LN $N$-PDF. Top row shows the ratios of $\thico$ , $\hcop$ , $\hnc$  and $\hcn$  over $\tweco$ , from left to right. In this top row dashed lines show ratio isocontours increasing by factors of two. Bottom row shows the ratios of $\cs$ , $\eigco$ , $\so$  and $\nhp$  over $\tweco$ , from left to right. In the bottom row isocontours represent factors of tens. (*fig:emission-function:N0-sig0*)

</div>
<div id="div_fig2">

<img src="tmp_2504.10145/./figures/degen_sigma.jpg" alt="Fig7" width="100%"/>

**Figure 7. -** _Left:_ Model predicted integrated intensities for a piecewise LN and PL $N$-PDF, with $N_0 = 2\times 10^{21}$ cm$^{-2}$, $r_{\rm{thres}}$=1.5, $\alpha=3$ and $\sigma$ increasing from 0.3 to 0.8. All predicted integrated intensities vary by 10\% at most, which illustrates the degeneracy on the parameter $\sigma$ in the particular case of low $N_0$ and strong PL component. The right panels displays the corresponding $N$-PDF with varying $\sigma$. The dashed vertical line shows the column density limit below which the emission of all lines is less than 0.1 K km s$^{-1}$. (*fig:degeneracies:sigma*)

</div>
<div id="div_fig3">

<img src="tmp_2504.10145/./figures/degen_sigma_rthres.jpg" alt="Fig8" width="100%"/>

**Figure 8. -** Same as Figure \ref{fig:degeneracies:sigma}, except two specific $N$-PDF are illustrated: the first has a narrow LN width but a strong PL, while the other has a large LN width but a smaller PL. This illustrates how two $N$-PDF with notably different parameters can appear alike and give comparable average integrated intensities. (*fig:degeneracies:sigma-rthres*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2504.10145"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

439  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

10  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
