# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

E. Schinnerer  ->  E. Schinnerer  |  ['E. Schinnerer']
K. Doi  ->  K. Doi  |  ['K. Doi']


J. Liu  ->  J. Liu  |  ['J. Liu']
J. Shi  ->  J. Shi  |  ['J. Shi']
J. Bouwman  ->  J. Bouwman  |  ['J. Bouwman']
T. Henning  ->  T. Henning  |  ['T. Henning']
O. Krause  ->  O. Krause  |  ['O. Krause']
S. Scheithauer  ->  S. Scheithauer  |  ['S. Scheithauer']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
Arxiv has 87 new papers today
          6 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/6 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2412.10506
extracting tarball to tmp_2412.10506... done.


Found 84 bibliographic references in tmp_2412.10506/aanda_arxiv.bbl.
Retrieving document from  https://arxiv.org/e-print/2412.10731


extracting tarball to tmp_2412.10731...

 done.
  0: tmp_2412.10731/sample631.tex, 1,187 lines
  1: tmp_2412.10731/cover_letter.tex, 24 lines



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


K. Doi  ->  K. Doi  |  ['K. Doi']


Found 68 bibliographic references in tmp_2412.10731/sample631.bbl.
Issues with the citations
syntax error in line 39: '=' expected
Retrieving document from  https://arxiv.org/e-print/2412.11460


extracting tarball to tmp_2412.11460... done.
Retrieving document from  https://arxiv.org/e-print/2412.11545


extracting tarball to tmp_2412.11545...

 done.
Retrieving document from  https://arxiv.org/e-print/2412.11627


extracting tarball to tmp_2412.11627...

 done.


Unable to locate Ghostscript on paths


Retrieving document from  https://arxiv.org/e-print/2412.11945


extracting tarball to tmp_2412.11945...

 done.


Found 216 bibliographic references in tmp_2412.11945/aa.bbl.
Issues with the citations
syntax error in line 5: unbalanced braces


 item = \bibitem[{{Abazajian} {et~al.}(2016){Abazajian}, {Adshead}, {Ahmed}, {Allen},  {Alonso}, {Arnold}, {Baccigalupi}, {Bartlett}, {Battaglia}, {Benson},  {Bischoff}, {Borrill}, {Buza}, {Calabrese}, {Caldwell}, {Carlstrom}, {Chang},  {Crawford}, {Cyr-Racine}, {De Bernardis}, {de Haan}, {di Serego Alighieri},  {Dunkley}, {Dvorkin}, {Errard}, {Fabbian}, {Feeney}, {Ferraro}, {Filippini},  {Flauger}, {Fuller}, {Gluscevic}, {Green}, {Grin}, {Grohs}, {Henning},  {Hill}, {Hlozek}, {Holder}, {Holzapfel}, {Hu}, {Huffenberger}, {Keskitalo},  {Knox}, {Kosowsky}, {Kovac}, {Kovetz}, {Kuo}, {Kusaka}, {Le Jeune}, {Lee},  {Lilley}, {Loverde}, {Madhavacheril}, {Mantz}, {Marsh}, {McMahon},  {Meerburg}, {Meyers}, {Miller}, {Munoz}, {Nguyen}, {Niemack}, {Peloso},  {Peloton}, {Pogosian}, {Pryke}, {Raveri}, {Reichardt}, {Rocha}, {Rotti},  {Schaan}, {Schmittfull}, {Scott}, {Sehgal}, {Shandera}, {Sherwin}, {Smith},  {Sorbo}, {Starkman}, {Story}, {van Engelen}, {Vieira}, {Watson}, {Whitehorn},   {Kimmy Wu}}

### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.10506-b31b1b.svg)](https://arxiv.org/abs/2412.10506) | **Dense gas scaling relations at kiloparsec scale across nearby galaxies with the ALMA ALMOND and IRAM 30-m EMPIRE surveys**  |
|| L. Neumann, et al. -- incl., <mark>E. Schinnerer</mark> |
|*Appeared on*| *2024-12-17*|
|*Comments*| *6 pages, 3 figures. Accepted for publication in Astronomy & Astrophysics Letters*|
|**Abstract**|            Dense, cold gas is the key ingredient for star formation. Over the last two decades, HCN(1-0) emission has been utilised as the most accessible dense gas tracer to study external galaxies. We present new measurements tracing the relationship between dense gas tracers, bulk molecular gas tracers, and star formation in the ALMA ALMOND survey, the largest sample of resolved (1-2 kpc resolution) HCN maps of galaxies in the local universe (d < 25 Mpc). We measure HCN/CO, a line ratio sensitive to the physical density distribution, and SFR/HCN, a proxy for the dense gas star formation efficiency, as a function of molecular gas surface density, stellar mass surface density, and dynamical equilibrium pressure across 31 galaxies, increasing the number of galaxies by a factor of > 3 over the previous largest such study (EMPIRE). HCN/CO increases (slope of ~ 0.5 and scatter of ~ 0.2 dex), while SFR/HCN decreases (slope of ~ -0.6 and scatter of ~ 0.4 dex) with increasing molecular gas surface density, stellar mass surface density and pressure. Galaxy centres with high stellar mass surface density show a factor of a few higher HCN/CO and lower SFR/HCN compared to the disc average, but both environments follow the same average trend. Our results emphasise that molecular gas properties vary systematically with the galactic environment and demonstrate that the scatter in the Gao-Solomon relation (SFR against HCN) is of physical origin.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.10731-b31b1b.svg)](https://arxiv.org/abs/2412.10731) | **Dust Scattering Albedo at Millimeter-Wavelengths in the TW Hya Disk**  |
|| T. C. Yoshida, et al. -- incl., <mark>K. Doi</mark> |
|*Appeared on*| *2024-12-17*|
|*Comments*| *19 pages, 16 figures, accepted for publication in ApJ*|
|**Abstract**|            Planetary bodies are formed by coagulation of solid dust grains in protoplanetary disks. Therefore, it is crucial to constrain the physical and chemical properties of the dust grains. In this study, we measure the dust albedo at mm-wavelength, which depends on dust properties at the disk midplane. Since the albedo and dust temperature are generally degenerate in observed thermal dust emission, it is challenging to determine them simultaneously. We propose to break this degeneracy by using multiple optically-thin molecular lines as a dust-albedo independent thermometer. In practice, we employ pressure-broadened CO line wings that provide an exceptionally high signal-to-noise ratio as an optically thin line. We model the CO $J=2-1$ and $3-2$ spectra observed by the Atacama Large Millimeter/sub-millimeter Array (ALMA) at the inner region ($r<6\ {\rm au}$) of the TW Hya disk and successfully derived the midplane temperature. Combining multi-band continuum observations, we constrain the albedo spectrum at $0.9-3$ mm for the first time without assuming a dust opacity model. The albedo at these wavelengths is high, $\sim0.5-0.8$, and broadly consistent with the Ricci et al. (2010), DIANA, and DSHARP dust models. Even without assuming dust composition, we estimate the maximum grain size to be $\sim 340\ \mu m$, the power law index of the grain size distribution to be $>-4.1$, and porosity to be $<0.96$. The derived dust size may suggest efficient fragmentation with the threshold velocity of $\sim 0.08\ {\rm m\ s^{-1}}$. We also note that the absolute flux uncertainty of $\sim10\%$ ($1\sigma$) is measured and used in the analysis, which is approximately twice the usually assumed value.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.11945-b31b1b.svg)](https://arxiv.org/abs/2412.11945) | **Euclid: Field-level inference of primordial non-Gaussianity and cosmic initial conditions**  |
|| A. Andrews, et al. -- incl., <mark>K. Jahnke</mark> |
|*Appeared on*| *2024-12-17*|
|*Comments*| *31 pages and 26 figures, 3 tables. Comments are welcome!*|
|**Abstract**|            A primary target of the \Euclid space mission is to constrain early-universe physics by searching for deviations from a primordial Gaussian random field. A significant detection of primordial non-Gaussianity would rule out the simplest models of cosmic inflation and transform our understanding of the origin of the Universe. This paper forecasts how well field-level inference of galaxy redshift surveys can constrain the amplitude of local primordial non-Gaussianity ($f_{NL}$), within a Bayesian hierarchical framework, in the upcoming \Euclid data. We design and simulate mock data sets and perform Markov chain Monte Carlo analyses using a full-field forward modelling approach. By including the formation history of the cosmic matter field in the analysis, the method takes into account all available probes of primordial non-Gaussianity, and goes beyond statistical summary estimators of $f_{NL}$. Probes include, for example, two-point and higher-order statistics, peculiar velocity fields, and scale-dependent galaxy biases. Furthermore, the method simultaneously handles systematic survey effects, such as selection effects, survey geometries, and galaxy biases. The forecast shows that the method can reach precision levels of up to $\sigma (f_{NL}) = 2.3$ (68.3\% CI, and at the grid resolution $\Delta L = 62.5\,h^{-1}$Mpc) with \Euclid data. We also provide data products, including realistic $N$-body simulations with nonzero values of $f_{NL}$ and maps of adiabatic curvature fluctuations. The results underscore the feasibility and advantages of field-level inference to constrain $f_{NL}$ in galaxy redshift surveys. Our approach consistently captures all the information available in the large-scale structure to constrain $f_{NL}$, and resolves the degeneracy between early-universe physics and late-time gravitational effects, while mitigating the impact of systematic and observational effects.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.11460-b31b1b.svg)](https://arxiv.org/abs/2412.11460) | **Observation of a spectral hardening in cosmic ray boron spectrum with the DAMPE space mission**  |
|| D. Collaboration, et al. -- incl., <mark>J. Liu</mark> |
|*Appeared on*| *2024-12-17*|
|*Comments*| *10 pages, 10 figures, submitted to PRL*|
|**Abstract**|            Secondary cosmic ray fluxes are important probes of the propagation and interaction of high-energy particles in the Galaxy. Recent measurements of primary and secondary cosmic ray nuclei have revealed unexpected spectral features that demand a deeper understanding. In this work we report the direct measurement of the cosmic ray boron spectrum from 10 TeV/n to 8 TeV/n with eight years of data collected by the Dark Matter Particle Explorer (DAMPE) mission. The measured spectrum shows an evident hardening at $182\pm24$ GeV/n with a spectral power index of $\gamma_1 = 3.02 \pm 0.01$ before the break and an index change of $\Delta \gamma = 0.31 \pm 0.05$ after the break. A simple power law model is disfavored at a confidence level of 8$\sigma$. Compared with the hardenings measured in the DAMPE proton and helium spectra, the secondary boron spectrum hardens roughly twice as much as these primaries, which is consistent with a propagation related mechanism to interpret the spectral hardenings of cosmic rays observed at hundreds of GeV/n.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.11545-b31b1b.svg)](https://arxiv.org/abs/2412.11545) | **Minute-cadence Observations of the LAMOST Fields with the TMTS: VI. Absolute Physical Parameters of Contact Binaries**  |
|| Q. Xia, et al. -- incl., <mark>J. Shi</mark> |
|*Appeared on*| *2024-12-17*|
|*Comments*| **|
|**Abstract**|            With the development of wide-field surveys, a large amount of data on short-period W UMa contact binaries have been obtained. Continuous and uninterrupted light curves as well as high-resolution spectroscopic data are crucial in determining the absolute physical parameters. Targets with both TMTS light curves and LAMOST medium-resolution spectra were selected. The absolute physical parameters were inferred with the W-D code for ten systems, all of them are W-type shallow or medium contact binaries. The O'Connell effect observed in the light curves can be explained by adding a spot on the primary or secondary component in the models. According to O-C analysis, the orbital periods exhibit a long-term increasing or decreasing trend, amongst which J0132, J1300, and J1402 show periodic variations that may be attributed to the presence of a third body or magnetic activity cycles. Spectral subtraction analysis revealed that the equivalent width of H$\alpha$ indicates strong magnetic activity in J0047, J0305, J0638, and J1402. Among the 10 selected binary systems, except for J0132 and J0913, the more massive components are found to be main-sequence stars while the less massive components have evolved off the main sequence. In J0132, both components are in the main sequence, whereas both components of J0913 lie above the terminal-age main sequence. Based on the relationship between orbital angular momentum and total mass for these two systems, as well as their low fill-out factors, it is possible that these two systems are newly formed contact binaries, having recently evolved from the detached configuration.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2412.11627-b31b1b.svg)](https://arxiv.org/abs/2412.11627) | **Combined analysis of the 12.8 and 15 $\mu m$ JWST/MIRI eclipse observations of TRAPPIST-1 b**  |
|| E. Ducrot, et al. -- incl., <mark>J. Bouwman</mark>, <mark>T. Henning</mark>, <mark>O. Krause</mark>, <mark>S. Scheithauer</mark> |
|*Appeared on*| *2024-12-17*|
|*Comments*| *49 pages, 3 main text figure, 2 extended figures, 10 supplementary figures, accepted for publication in Nature Astronomy on October 29, 2024*|
|**Abstract**|            The first JWST/MIRI photometric observations of TRAPPIST-1 b allowed for the detection of the thermal emission of the planet at 15 $\mu m$, suggesting that the planet could be a bare rock with a zero albedo and no redistribution of heat. These observations at 15 $\mu m$ were acquired as part of GTO time that included a twin program at 12.8 $\mu m$ in order to have a measurement in and outside the CO$_2$ absorption band. Here we present five new occultations of TRAPPIST-1 b observed with MIRI in an additional photometric band at 12.8 $\mu m$. We perform a global fit of the 10 eclipses and derive a planet-to-star flux ratio and 1-$\sigma$ error of 452 $\pm$ 86 ppm and 775 $\pm$ 90 ppm at 12.8 $\mu m$ and 15 $\mu m$, respectively. We find that two main scenarios emerge. An airless planet model with an unweathered (fresh) ultramafic surface, that could be indicative of relatively recent geological processes fits well the data. Alternatively, a thick, pure-CO2 atmosphere with photochemical hazes that create a temperature inversion and result in the CO2 feature being seen in emission also works, although with some caveats. Our results highlight the challenges in accurately determining a planet's atmospheric or surface nature solely from broadband filter measurements of its emission, but also point towards two very interesting scenarios that will be further investigated with the forthcoming phase curve of TRAPPIST-1 b.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error Unable to locate Ghostscript on paths</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2412.10506/./Figures/HCN_scaling_relations_centre_vs_disc_compressed_v2.png', 'tmp_2412.10506/./Figures/GS_relation_literature_compilation_compressed.png', 'tmp_2412.10506/./Figures/ALMOND_vs_EMPIRE_spectra_compressed_v2.png']
copying  tmp_2412.10506/./Figures/HCN_scaling_relations_centre_vs_disc_compressed_v2.png to _build/html/
copying  tmp_2412.10506/./Figures/GS_relation_literature_compilation_compressed.png to _build/html/
copying  tmp_2412.10506/./Figures/ALMOND_vs_EMPIRE_spectra_compressed_v2.png to _build/html/
exported in  _build/html/2412.10506.md
    + _build/html/tmp_2412.10506/./Figures/HCN_scaling_relations_centre_vs_disc_compressed_v2.png
    + _build/html/tmp_2412.10506/./Figures/GS_relation_literature_compilation_compressed.png
    + _build/html/tmp_2412.10506/./Figures/ALMOND_vs_EMPIRE_spectra_compressed_v2.png
found figures ['tmp_2412.10731/././figure/spectra.png', 'tmp_2412.10731/././figure/fg.png', 'tmp_2412.10731/././figure/fg.png']
copying  tm

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\cmark}{\textcolor{green}{\ding{51}}}$
$\newcommand{\xmark}{\textcolor{red}{\ding{55}}}$</div>



<div id="title">

# Dense gas scaling relations at kiloparsec scale across nearby galaxies with the ALMA ALMOND and IRAM 30-m EMPIRE surveys

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2412.10506-b31b1b.svg)](https://arxiv.org/abs/2412.10506)<mark>Appeared on: 2024-12-17</mark> -  _6 pages, 3 figures. Accepted for publication in Astronomy & Astrophysics Letters_

</div>
<div id="authors">

L. Neumann, et al. -- incl., <mark>E. Schinnerer</mark>

</div>
<div id="abstract">

**Abstract:** Dense, cold gas is the key ingredient for star formation.   Over the last two decades, HCN(1--0) emission has been utilised as the most accessible dense gas tracer to study external galaxies.   We present new measurements tracing the relationship between dense gas tracers, bulk molecular gas tracers, and star formation in the ALMA ALMOND survey, the largest sample of resolved ( $1-2$ kpc resolution) HCN maps of galaxies in the local universe ( $d < 25 $ Mpc).   We measure HCN/CO, a line ratio sensitive to the physical density distribution, and SFR/HCN, a proxy for the dense gas star formation efficiency, as a function of molecular gas surface density, stellar mass surface density, and dynamical equilibrium pressure across 31 galaxies, increasing the number of galaxies by a factor of $>3$ over the previous largest such study (EMPIRE).    HCN/CO increases (slope of $\approx\num{0.5}$ and scatter of $\approx\SI{0.2}{\dex}$ ), while SFR/HCN decreases (slope of $\approx\num{-0.6}$ and scatter of $\approx\SI{0.4}{\dex}$ ) with increasing molecular gas surface density, stellar mass surface density and pressure.    Galaxy centres with high stellar mass surface density show a factor of a few higher HCN/CO and lower SFR/HCN compared to the disc average, but both environments follow the same average trend.   Our results emphasise that molecular gas properties vary systematically with the galactic environment and demonstrate that the scatter in the Gao-Solomon relation (SFR against HCN) is of physical origin.

</div>

<div id="div_fig1">

<img src="tmp_2412.10506/./Figures/HCN_scaling_relations_centre_vs_disc_compressed_v2.png" alt="Fig6" width="100%"/>

**Figure 6. -** Dense gas relations with kiloparsec-scale environment.
HCN/CO _(top)_, a proxy of \fdense, and SFR/HCN _(bottom)_, a proxy of \sfedense, as a function of stellar mass surface density (\sigstar), molecular gas surface density (\sigmol), and dynamical equilibrium pressure (\pde) across 31 galaxies from ALMOND and EMPIRE.
The markers denote significant ($\snr\geq 3$) stacked measurements across disc (circle) and centre (triangle) spaxels.
The downward and upward pointing arrows denote upper (HCN/CO) and lower limits (SFR/HCN).
Filled contours show 25, 50, and 75 percentile kernel density estimates.
Across centres, we indicate the presence of an AGN (cross).
All relations have been fitted with \texttt{LinMix} taking into account measurement uncertainties and upper/lower limits (parameters in Table \ref{tab:environment}).
The black solid line shows the best-fit line and the grey-shaded area indicates the 1-sigma scatter of $\snr\geq 3$ data.
The right panels show violin plots of the HCN/CO and SFR/HCN distribution across the respective samples (disc, centre, centre with AGN), where the black bar and white markers indicate the 25th to 75th percentile range and the median, respectively, across the $\snr\geq 3$ data.
The vertical cyan lines in the disc violins mark the median computed from all \snr data.
 (*fig:hcn_scaling_relations*)

</div>
<div id="div_fig2">

<img src="tmp_2412.10506/./Figures/GS_relation_literature_compilation_compressed.png" alt="Fig5" width="100%"/>

**Figure 5. -** Gao--Solomon relation. Star formation rate (SFR, top) and $\mathrm{SFR}/\lhcn$(proxy of \sfedense, bottom) as a function of HCN luminosity, across a literature compilation and the ALMOND (blue circles) and EMPIRE (red circles) surveys.
Note that we re-calculate SFR across EMPIRE galaxies using a combination of IR and FUV data (see Sect. \ref{sec:data}).
Our literature compilation contains HCN observations that include Galactic clumps and clouds (squares), resolved nearby galaxies (circles) and unresolved entire galaxies (diamonds).
For more details on the compilation see Appendix \ref{sec:app:literature}.
The plotted data points show all (3-sigma) detected sightlines.
The black solid line shows the median SFR/HCN computed from these data points across all data sets (without duplicates across targets), and the dashed lines mark the 1-sigma scatter (Table \ref{tab:gao_solomon}).
The bottom panel shows the ratio SFR/HCN as a function of \lhcn, grouping the data into the same sub-samples where the 10-percentile density contours of the respective sub-samples are shown.
We plot ALMOND and EMPIRE separately, and the blue and red contours present the 10-percentile levels of these surveys.
 (*fig:gao_solomon_relation*)

</div>
<div id="div_fig3">

<img src="tmp_2412.10506/./Figures/ALMOND_vs_EMPIRE_spectra_compressed_v2.png" alt="Fig2" width="100%"/>

**Figure 2. -** EMPIRE versus ALMOND: \hcnone average spectra.
The blue and red lines show average HCN brightness temperatures within $\rgal\leq\SI{5}{\kilo\parsec}$ obtained from spatially and spectrally matched ALMOND and EMPIRE observations, respectively, across the three galaxies NGC 628, NGC 2903, NGC 4321 from top to bottom.
The grey dashed line shows (homogenised) \cotwo intensities from PHANGS--ALMA  ([Leroy, Schinnerer and Hughes 2021]()) , scaled down by a factor of 20.
The grey-shaded area indicates the velocity-integration window constructed using the highly significant \cotwo data.
The resulting integrated intensities are quoted in the text.
 (*fig:empire_vs_almond_spectra*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2412.10506"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\kms}{{\rm km s^{-1}}}$</div>



<div id="title">

# Dust Scattering Albedo at Millimeter-Wavelengths in the TW Hya Disk

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2412.10731-b31b1b.svg)](https://arxiv.org/abs/2412.10731)<mark>Appeared on: 2024-12-17</mark> -  _19 pages, 16 figures, accepted for publication in ApJ_

</div>
<div id="authors">

T. C. Yoshida, et al. -- incl., <mark>K. Doi</mark>

</div>
<div id="abstract">

**Abstract:** Planetary bodies are formed by coagulation of solid dust grains in protoplanetary disks.Therefore, it is crucial to constrain the physical and chemical properties of the dust grains.In this study, we measure the dust albedo at mm-wavelength, which depends on dust properties at the disk midplane.Since the albedo and dust temperature are generally degenerate in observed thermal dust emission, it is challenging to determine them simultaneously.We propose to break this degeneracy by using multiple optically-thin molecular lines as a dust-albedo independent thermometer.In practice, we employ pressure-broadened CO line wings that provide an exceptionally high signal-to-noise ratio as an optically thin line.We model the CO $J=2-1$ and $3-2$ spectra observed by the Atacama Large Millimeter/sub-millimeter Array (ALMA) at the inner region ( $r<6 {\rm au}$ ) of the TW Hya disk and successfully derived the midplane temperature.Combining multi-band continuum observations, we constrain the albedo spectrum at $0.9-3$ mm for the first time without assuming a dust opacity model.The albedo at these wavelengths is high, $\sim0.5-0.8$ , and broadly consistent with the \citet{ricc10} , DIANA, and DSHARP dust models.Even without assuming dust composition, we estimate the maximum grain size to be $\sim 340 \mu m$ , power law index of the grain size distribution to be $>-4.1$ , and porosity to be $<0.96$ .The derived dust size may suggest efficient fragmentation with the threshold velocity of $\sim 0.08 {\rm m s^{-1}}$ .We also note that the absolute flux uncertainty of $\sim10\%$ ( $1\sigma$ ) is measured and used in the analysis, which is approximately twice the usually assumed value.

</div>

<div id="div_fig1">

<img src="tmp_2412.10731/././figure/spectra.png" alt="Fig14" width="100%"/>

**Figure 14. -**  CO $J=2-1$(blue) and $J=3-2$(orange) spectra at the center ($r<6 {\rm au}$) of the TW Hya disk. The bottom panels are the zoom-in version of the top panel. The grey lines indicate the models created from parameters randomly sampled from the MCMC chain (Section \ref{sec:res1}). The gray-shaded regions ($|v| < 4 {\rm km s^{-1}}$) are not used for fitting. (*fig:spec*)

</div>
<div id="div_fig2">

<img src="tmp_2412.10731/././figure/fg.png" alt="Fig9" width="100%"/>

**Figure 9. -**  Line emerging factor $f_g$ as a function of $\omega$ and $B(T_g)/B(T_d)$. (*fig:fg*)

</div>
<div id="div_fig3">

<img src="tmp_2412.10731/././figure/fg.png" alt="Fig12" width="100%"/>

**Figure 12. -**  Line emerging factor $f_g$ as a function of $\omega$ and $B(T_g)/B(T_d)$. (*fig:fg*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2412.10731"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\borg}{\texttt{BORG}\xspace}$
$\newcommand{\fnl}{{f_\mathrm{NL}}}$
$\newcommand{\bpd}{{b_{\phi ,   \delta}}}$
$\newcommand{\euclid}{\textit{Euclid}}$
$\newcommand{\aanote}[1]{\textbf{\textcolor{purple}{\textbf{AA:} #1\\}}}$
$\newcommand{\fnll}{f_{\mathrm{NL}}^{\mathrm{local}}}$
$\newcommand{\Mpch}{\ensuremath{h^{-1} \text{Mpc}}}$
$\newcommand{\hMpc}{\ensuremath{h\;\text{Mpc}^{-1}}}$
$\newcommand{\mvec}[1]{{\mathbf{#1}}}$
$\newcommand{\GL}[1]{{\bf\color{darkgreen} #1}}$
$\newcommand{\GLrep}[2]{{\bf\color{darkgreen} [GL: \sout{#1} #2]}}$
$\newcommand{\replace}[2]{#2}$
$\newcommand{\new}[1]{#1}$
$\newcommand{\orcid}[1]$
$\newcommand\mb{#1}$
$\newcommand\MB{#1}$
$\newcommand{\UrlFont}{\rmfamily}$</div>



<div id="title">

# $\Euclid$: Field-level inference of primordial non-Gaussianity and cosmic initial conditions$\thanks{This paper is published on behalf of the Euclid Consortium.}$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2412.11945-b31b1b.svg)](https://arxiv.org/abs/2412.11945)<mark>Appeared on: 2024-12-17</mark> -  _31 pages and 26 figures, 3 tables. Comments are welcome!_

</div>
<div id="authors">

A. Andrews, et al. -- incl., <mark>K. Jahnke</mark>

</div>
<div id="abstract">

**Abstract:** A primary target of the $\Euclid$ space mission is to constrain early-universe physics by searching for deviations from a primordial Gaussian random field. A significant detection of primordial non-Gaussianity would rule out the simplest models of cosmic inflation and transform our understanding of the origin of the Universe.  This paper forecasts how well field-level inference of galaxy redshift surveys can constrain the amplitude of local primordial non-Gaussianity ( $\fnll$ ), within a Bayesian hierarchical framework, in the upcoming $\Euclid$ data. We design and simulate mock data sets and perform Markov chain Monte Carlo analyses using a full-field forward modelling approach.  By including the formation history of the cosmic matter field in the analysis, the method takes into account all available probes of primordial non-Gaussianity, and goes beyond statistical summary estimators of $\fnll$ . Probes include, for example, two-point and higher-order statistics, peculiar velocity fields, and scale-dependent galaxy biases. Furthermore, the method simultaneously handles systematic survey effects, such as selection effects, survey geometries, and galaxy biases.  The forecast shows that the method can reach precision levels of up to $\sigma \left( \fnll \right) = 2.3$ (68.3 \% confidence interval, and at the grid resolution $\Delta L = 62.5 \Mpch$ ) with $\Euclid$ data. We also provide data products, including realistic $N$ -body simulations with nonzero values of $\fnll$ and maps of adiabatic curvature fluctuations.  The results underscore the feasibility and advantages of field-level inference to constrain $\fnll$ in galaxy redshift surveys. Our approach consistently captures all the information available in the large-scale structure to constrain $\fnll$ , and resolves the degeneracy between early-universe physics and late-time gravitational effects, while mitigating the impact of systematic and observational effects.

</div>

<div id="div_fig1">

<img src="tmp_2412.11945/./Images/Results/pyramid_cat1_temp.png" alt="Fig18" width="100%"/>

**Figure 18. -** Corner plot for $\fnl$ and bias parameters, for Run \#3, catalogue 1. The corner plot displays the joint distributions and marginal distributions of the variables in the multidimensional data set covered by $\fnl$ and the bias parameters. Each subplot captures the relationships between pairs of variables, offering an overview of the data set structure and dependencies. For the main run, there are few to no degeneracies in the bias parameters. (*fig:pyramid_3_1*)

</div>
<div id="div_fig2">

<img src="tmp_2412.11945/./Images/Results/test_one_2250.png" alt="Fig6" width="100%"/>

**Figure 6. -** Mollweide projection of the **ground truth** adiabatic curvature fluctuation map. The projection is computed for a distance of $r=2250   $\Mpch$$, for an observer placed in the centre of the cube, and multiplied by the window selection function. (*fig:acf_projection_one*)

</div>
<div id="div_fig3">

<img src="tmp_2412.11945/./Images/Pedagogical/physics_fc2.png" alt="Fig10" width="100%"/>

**Figure 10. -** Flow chart illustrating the forward model implemented into $\borg$\citep{jasche_bayesian_2013,andrews_bayesian_2023}. The forward model connects a set of initial conditions to a model prediction. This output can then be compared to the data at the field level through a likelihood evaluation. The parameter under each box represents the output of the box and what is provided to the next step of the forward model. The parameters above some boxes represent the additional input of each computation, especially highlighting the inclusion of the $f_{\mathrm{NL}}^{\mathrm{local}}$ parameter. (*fig:flowchart*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2412.11945"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

286  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

15  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

5  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
