# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

Arxiv has 46 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates[:-1]):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/3 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2304.06904


extracting tarball to tmp_2304.06904...

 done.
Retrieving document from  https://arxiv.org/e-print/2304.07158



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2304.07158...

 done.


Found 35 bibliographic references in tmp_2304.07158/aa_2023_46196.bbl.
Retrieving document from  https://arxiv.org/e-print/2304.07237


extracting tarball to tmp_2304.07237...

 done.


Found 138 bibliographic references in tmp_2304.07237/ALMA_HMSF_paper1_draft_5.0.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2304.07158-b31b1b.svg)](https://arxiv.org/abs/arXiv:2304.07158) | **Investigating Gaia EDR3 parallax systematics using asteroseismology of  Cool Giant Stars observed by Kepler, K2, and TESS I. Asteroseismic distances  to 12,500 red-giant stars**  |
|| S. Khan, et al. -- incl., <mark>T. Cantat-Gaudin</mark> |
|*Appeared on*| *2023-04-17*|
|*Comments*| *11 pages, 8 figures, Accepted for publication in A&A*|
|**Abstract**| Gaia EDR3 has provided unprecedented data that generate a lot of interest in the astrophysical community, despite the fact that systematics affect the reported parallaxes at the level of ~ 10 muas. Independent distance measurements are available from asteroseismology of red-giant stars with measurable parallaxes, whose magnitude and colour ranges more closely reflect those of other stars of interest. In this paper, we determine distances to nearly 12,500 red-giant branch and red clump stars observed by Kepler, K2, and TESS. This is done via a grid-based modelling method, where global asteroseismic observables, constraints on the photospheric chemical composition, and on the unreddened photometry are used as observational inputs. This large catalogue of asteroseismic distances allows us to provide a first comparison with Gaia EDR3 parallaxes. Offset values estimated with asteroseismology show no clear trend with ecliptic latitude or magnitude, and the trend whereby they increase (in absolute terms) as we move towards redder colours is dominated by the brightest stars. The correction model proposed by Lindegren et al. (2021) is not suitable for all the fields considered in this study. We find a good agreement between asteroseismic results and model predictions of the red clump magnitude. We discuss possible trends with the Gaia scan law statistics, and show that two magnitude regimes exist where either asteroseismology or Gaia provides the best precision in parallax. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2304.07237-b31b1b.svg)](https://arxiv.org/abs/arXiv:2304.07237) | **Physical and chemical complexity in high-mass star-forming regions with  ALMA. I. Overview and evolutionary trends of physical properties**  |
|| C. Gieser, et al. -- incl., <mark>H. Beuther</mark>, <mark>D. Semenov</mark>, <mark>M. Wells</mark> |
|*Appeared on*| *2023-04-17*|
|*Comments*| *21 pages, 10 figures, submitted to A&A*|
|**Abstract**| In this study, we investigate how physical properties, such as the density and temperature profiles, evolve on core scales through the evolutionary sequence during high-mass star formation ranging from protostars in cold infrared dark clouds to evolved UCHII regions. We observed 11 high-mass star-forming regions with ALMA at 3 mm wavelengths. Based on the 3 mm continuum morphology and recombination line emission, tracing locations with free-free (ff) emission, the fragmented cores analyzed in this study are classified into either dust or dust+ff cores. In addition, we resolve three cometary UCHII regions with extended 3 mm emission that is dominated by free-free emission. The temperature structure and radial profiles (T~r^-q ) are determined by modeling molecular emission of CH3CN and CH313CN with XCLASS and by using the HCN-to- HNC intensity ratio as probes for the gas kinetic temperature. The density profiles (n~r^-p ) are estimated from the 3 mm continuum visibility profiles. The masses M and H2 column densities N(H2) are then calculated from the 3 mm dust continuum emission. Results. We find a large spread in mass and peak H2 column density in the detected sources ranging from 0.1-150 Msun and 10^23 - 10^26 cm-2 , respectively. Including the results of the CORE and CORE-extension studies (Gieser et al. 2021, 2022) to increase the sample size, we find evolutionary trends on core scales for the temperature power-~law index q increasing from 0.1 to 0.7 from infrared dark clouds to UCHII regions, while for the the density power-law index p on core scales, we do not find strong evidence for an evolutionary trend. However, we find that on the larger clump scales throughout these evolutionary phases the density profile flattens from p = 2.2 to p = 1.2. (abridged) |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2304.06904-b31b1b.svg)](https://arxiv.org/abs/arXiv:2304.06904) | **The Kinematics, Metallicities, and Orbits of Six Recently Discovered  Galactic Star Clusters with Magellan/M2FS Spectroscopy**  |
|| A. B. Pace, et al. -- incl., <mark>T. S. Li</mark> |
|*Appeared on*| *2023-04-17*|
|*Comments*| *19 pages, 15 figures, submitted to MNRAS, associated data products available at this https URL*|
|**Abstract**| We present Magellan/M2FS spectroscopy of four recently discovered Milky Way star clusters (Gran 3, Gran 4, Garro 01, LP 866) and two newly discovered open clusters (Gaia 9, Gaia 10) at low Galactic latitudes. We measure line-of-sight velocities and stellar parameters ([Fe/H], $\log{g}$, $T_{\rm eff}$, [Mg/Fe]) from high resolution spectroscopy centered on the Mg triplet and identify 20-80 members per star cluster. We determine the kinematics and chemical properties of each cluster and measure the systemic proper motion and orbital properties by utilizing Gaia astrometry. We find Gran 3 to be an old, metal-poor (mean metallicity of [Fe/H]=-1.84) globular cluster located in the Galactic bulge on a retrograde orbit. Gran 4 is an old, metal-poor ([Fe/H]}=-1.84) globular cluster with a halo-like orbit that happens to be passing through the Galactic plane. The orbital properties of Gran 4 are consistent with the proposed LMS-1/Wukong and/or Helmi streams merger events. Garro 01 is an old, metal-rich ([Fe/H]=-0.30) globular cluster on a near circular orbit in the outer disk. Gaia 9 and Gaia 10 are among the most distant known open clusters at $R_{GC}\sim 18, 21.2~kpc$ and most metal-poor with [Fe/H]~-0.50,-0.46 for Gaia 9 and Gaia 10, respectively. LP 866 is a nearby, metal-rich open cluster ([Fe/H]$=+0.1$). The discovery and confirmation of multiple star clusters in the Galactic plane shows the power of {\it Gaia} astrometry and the star cluster census remains incomplete. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2304.07158.md
    + _build/html/tmp_2304.07158/./images/2_Gmag_vs_plxdiff_E20_APODR17.png
    + _build/html/tmp_2304.07158/./images/1_mw_skymap_details_APOGEEDR17_final.png
    + _build/html/tmp_2304.07158/./images/3_plxdiff_beta_G_nueff_E20_min10.png
exported in  _build/html/2304.07237.md
    + _build/html/tmp_2304.07237/./Overview_CORNISH_IRDC_G1111.png
    + _build/html/tmp_2304.07237/./Overview_AGAL_IRDC_G1111.png
    + _build/html/tmp_2304.07237/./Overview_Herschel_SPIRE_IRDC_G1111.png
    + _build/html/tmp_2304.07237/./Overview_Herschel_PACS_IRDC_G1111.png
    + _build/html/tmp_2304.07237/./Overview_Spitzer_MIPS_IRDC_G1111.png
    + _build/html/tmp_2304.07237/./Overview_Spitzer_IRAC_IRDC_G1111.png
    + _build/html/tmp_2304.07237/./Continuum_all.png
    + _build/html/tmp_2304.07237/./visibility_fit_HMC_G0962_2.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\diff}{{\hbox{d}}}$
$\newcommand{\numax}{\mbox{\nu_{\rm max}}\xspace}$
$\newcommand{\deltanu}{\mbox{\langle \Delta\nu \rangle}\xspace}$
$\newcommand{\teff}{\mbox{T_{\rm eff}}\xspace}$
$\newcommand{\logg}{\mbox{\log g}\xspace}$
$\newcommand{\feh}{\mbox{\rm{[Fe/H]}}\xspace}$
$\newcommand{\mh}{\mbox{\rm{[M/H]}}\xspace}$
$\newcommand{\afe}{\mbox{\rm{[\alpha/Fe]}}\xspace}$
$\newcommand{\msun}{\mbox{\mathrm{M}_{\odot}}\xspace}$
$\newcommand{\lsun}{\mbox{\mathrm{L}_{\odot}}\xspace}$
$\newcommand{\mearth}{\mbox{\mathrm{M}_{\oplus}}\xspace}$
$\newcommand{\rsun}{\mbox{\mathrm{R}_{\odot}}\xspace}$
$\newcommand{\muas}{\mbox{\mu \rm as}\xspace}$
$\newcommand{\kepler}{\emph{Kepler}\xspace}$
$\newcommand{\gaia}{\emph{Gaia}\xspace}$
$\newcommand{\ktwo}{K2\xspace}$
$\newcommand{\tess}{TESS\xspace}$</div>



<div id="title">

# Investigating $\gaia$ EDR3 parallax systematics using asteroseismology of Cool Giant Stars observed by $\$$\kepler$, $\ktwo$, and $\tess$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2304.07158-b31b1b.svg)](https://arxiv.org/abs/2304.07158)<mark>Appeared on: 2023-04-17</mark> -  _11 pages, 8 figures, Accepted for publication in A&A_

</div>
<div id="authors">

S. Khan, et al. -- incl., <mark>T. Cantat-Gaudin</mark>

</div>
<div id="abstract">

**Abstract:** $\gaia$ EDR3 has provided unprecedented data that generate a lot of interest in the astrophysical community, despite the fact that systematics affect the reported parallaxes at the level of $\sim 10   \rm \mu as$ . Independent distance measurements are available from asteroseismology of red-giant stars with measurable parallaxes, whose magnitude and colour ranges more closely reflect those of other stars of interest. In this paper, we determine distances to nearly 12,500 red-giant branch and red clump stars observed by $\kepler$ , $\ktwo$ , and $\tess$ . This is done via a grid-based modelling method, where global asteroseismic observables, constraints on the photospheric chemical composition, and on the unreddened photometry are used as observational inputs. This large catalogue of asteroseismic distances allows us to provide a first comparison with $\gaia$ EDR3 parallaxes. Offset values estimated with asteroseismology show no clear trend with ecliptic latitude or magnitude, and the trend whereby they increase (in absolute terms) as we move towards redder colours is dominated by the brightest stars. The correction model proposed by [Lindegren, Bastian and Biermann (2021)]() is not suitable for all the fields considered in this study. We find a good agreement between asteroseismic results and model predictions of the red clump magnitude. We discuss possible trends with the $\gaia$ scan law statistics, and show that two magnitude regimes exist where either asteroseismology or $\gaia$ provides the best precision in parallax.

</div>

<div id="div_fig1">

<img src="tmp_2304.07158/./images/2_Gmag_vs_plxdiff_E20_APODR17.png" alt="Fig6" width="100%"/>

**Figure 6. -** Parallax difference $\varpi_{\rm EDR3}-\varpi_{\rm PARAM}$ as a function of the $G$ magnitude for the full sample (top), $\kepler$(bottom left), $\ktwo$(bottom middle), and $\tess$(bottom right panel), using \citetalias{Elsworth2020} and APOGEE DR17. The colour scale indicates the density of stars, increasing from black to white. The red, yellow, and blue-shaded areas show the median parallax difference binned by magnitude for $\kepler$, $\ktwo$, and $\tess$, respectively. (*fig:trend_G*)

</div>
<div id="div_fig2">

<img src="tmp_2304.07158/./images/1_mw_skymap_details_APOGEEDR17_final.png" alt="Fig5" width="100%"/>

**Figure 5. -** Skymap in Galactic coordinates, showing the location and coverage resulting from the crossmatch between the various asteroseismic fields considered in this study and APOGEE DR17. This figure has been generated using the \texttt{python} package \texttt{mw-plot}(\url{milkyway-plot.readthedocs.io}). The background image comes from ESA/Gaia/DPAC. (*fig:skymap*)

</div>
<div id="div_fig3">

<img src="tmp_2304.07158/./images/3_plxdiff_beta_G_nueff_E20_min10.png" alt="Fig1" width="100%"/>

**Figure 1. -** _Top_: Median parallax offsets as estimated from asteroseismology (\citetalias{Elsworth2020}+APOGEE), as a function of the sine of ecliptic latitude. $\kepler$ and $\tess$ are plotted as white and black symbols, respectively. The coloured symbols correspond to the various $\ktwo$ fields, and follow the colour scheme adopted in Fig. \ref{fig:skymap}. _Middle and bottom_: Median parallax difference binned by $G$ magnitude (middle) and effective wavenumber (bottom panel). $\kepler$ and $\tess$ are plotted as black solid and dashed lines, respectively. The median uncertainty on the parallax difference is shown in the lower part of each panel. C15 does not appear in the two bottom panels as there are not enough stars to bin in $G$ and $\nu_{\rm eff}$. (*fig:offset_summary*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2304.07158"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\arraystretch}{1.1}$
$\newcommand{\arraystretch}{1.1}$
$\newcommand{\arraystretch}{1.1}$
$\newcommand{\arraystretch}{1.1}$
$\newcommand{\}{as}$</div>



<div id="title">

# Physical and chemical complexity in high-mass star-forming regions with ALMA.

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2304.07237-b31b1b.svg)](https://arxiv.org/abs/2304.07237)<mark>Appeared on: 2023-04-17</mark> -  _21 pages, 10 figures, submitted to A&A_

</div>
<div id="authors">

C. Gieser, et al. -- incl., <mark>H. Beuther</mark>, <mark>D. Semenov</mark>

</div>
<div id="abstract">

**Abstract:** High-mass star formation is a hierarchical process from cloud ( $>$ 1 pc), to clump (0.1-1 pc) to core scales ( $<$ 0.1 pc). Modern interferometers achieving high angular resolutions at mm wavelengths allow us to probe the physical and chemical properties of the gas and dust of protostellar cores in the earliest evolutionary formation phases. In this study, we investigate how physical properties, such as the density and temperature profiles, evolve on core scales through the evolutionary sequence during high-mass star formation ranging from protostars in cold infrared dark clouds to evolved UCH ${\sc ii}$ regions. We observed 11 high-mass star-forming regions with the Atacama Large Millimeter/submillimeter Array (ALMA) at 3 mm wavelengths. Based on the 3 mm continuum morphology and H(40) $\alpha$ recombination line emission, tracing locations with free-free (ff) emission, the fragmented cores analyzed in this study are classified into either "dust" or "dust+ff" cores. In addition, we resolve three cometary UCH ${\sc ii}$ regions with extended 3 mm emission that is dominated by free-free emission. The temperature structure and radial profiles ( $T \sim r^{-q}$ ) are determined by modeling molecular emission of CH $_{3}$ CN and CH $_{3}^{13}$ CN with \texttt{XCLASS} and by using the HCN-to-HNC intensity ratio as probes for the gas kinetic temperature. The density profiles ( $n \sim r^{-p}$ ) are estimated from the 3 mm continuum visibility profiles. The masses $M$ and H $_{2}$ column densities $N$ (H $_{2}$ ) are then calculated from the 3 mm dust continuum emission. We find a large spread in mass and peak H $_{2}$ column density in the detected sources ranging from 0.1 - 150 $M_\odot$ and 10 $^{23}$ - 10 $^{26}$ cm $^{-2}$ , respectively. Including the results of the CORE and CORE-extension studies  ([Gieser, Beuther and Semenov 2021](), [Gieser, Beuther and Semenov 2022]())  to increase the sample size, we find evolutionary trends on core scales for the temperature power-law index $q$ increasing from 0.1 to 0.7 from infrared dark clouds to UCH ${\sc ii}$ regions, while for the the density power-law index $p$ on core scales, we do not find strong evidence for an evolutionary trend. However, we find that on the larger clump scales throughout these evolutionary phases the density profile flattens from $p \approx 2.2$ to $p \approx 1.2$ . By characterizing a large statistical sample of individual fragmented cores, we find that the physical properties, such as the temperature on core scales and density profile on clump scales, evolve even during the earliest evolutionary phases in high-mass star-forming regions. These findings provide observational constraint for theoretical models describing the formation of massive stars. In follow-up studies we aim to further characterize the chemical properties of the regions by analyzing the large amount of molecular lines detected with ALMA in order to investigate how the chemical properties of the molecular gas evolve during the formation of massive stars.

</div>

<div id="div_fig1">

<img src="tmp_2304.07237/./Overview_CORNISH_IRDC_G1111.png" alt="Fig12.1" width="16%"/><img src="tmp_2304.07237/./Overview_AGAL_IRDC_G1111.png" alt="Fig12.2" width="16%"/><img src="tmp_2304.07237/./Overview_Herschel_SPIRE_IRDC_G1111.png" alt="Fig12.3" width="16%"/><img src="tmp_2304.07237/./Overview_Herschel_PACS_IRDC_G1111.png" alt="Fig12.4" width="16%"/><img src="tmp_2304.07237/./Overview_Spitzer_MIPS_IRDC_G1111.png" alt="Fig12.5" width="16%"/><img src="tmp_2304.07237/./Overview_Spitzer_IRAC_IRDC_G1111.png" alt="Fig12.6" width="16%"/>

**Figure 12. -** Overview of IRDC G11.11$-$4.Multi wavelength overview of IRDC G11.11$-$4. In color, CORNISH 6 cm, ATLASGAL 870$\upmu$m, _Herschel_ SPIRE 250 $\upmu$m, _Herschel_ PACS 70 $\upmu$m, _Spitzer_ MIPS 24 $\upmu$m, and _Spitzer_ IRAC 4.5 $\upmu$m data are presented as labeled. In all panels, the ALMA primary beam size is indicated by a grey circle. In the top right and left and bottom right panel, the ALMA 3 mm continuum data are shown by red contours. The dotted red contour marks the $-5\sigma_\mathrm{cont}$ level. The solid red contours start at $5\sigma_\mathrm{cont}$ and contour steps increase by a factor of 2 (e.g., 5, 10, 20, $40\sigma_\mathrm{cont}$). The ALMA synthesized beam size is shown in the bottom left corner. (*fig:overview_IRDC_G1111*)

</div>
<div id="div_fig2">

<img src="tmp_2304.07237/./Continuum_all.png" alt="Fig6" width="100%"/>

**Figure 6. -** ALMA 3 mm continuum images of the sample.ALMA 3 mm continuum. In each panel, the continuum data of the region is shown in color and black contours. The dotted black contour marks the $-5\sigma_\mathrm{cont}$ level. The solid black contours start at $5\sigma_\mathrm{cont}$ and contour steps increase by a factor of 2 (e.g., 5, 10, 20, $40\sigma_\mathrm{cont}$). The synthesized beam size is shown in the bottom left corner. The bar in the top left corner indicates a linear spatial scale of 0.1 pc. The continuum noise and synthesized beam size are listed in Table \ref{tab:ALMAcontinuumdataproducts}. The continuum fragments are classified into dust cores (red), dust+ff cores (orange), cometary UCH{\sc ii} regions (cyan), further explained in Sect. \ref{sec:ALMAfrag}. Fragments with $S$/$N < 15$ are not analyzed in this study and are labeled in grey. (*fig:ALMAcontinuum*)

</div>
<div id="div_fig3">

<img src="tmp_2304.07237/./visibility_fit_HMC_G0962_2.png" alt="Fig2" width="100%"/>

**Figure 2. -** Visibility profile of dust+ff core 2 in HMC G9.62$+$0.19. The profile of the non-core-subtracted and core-subtracted data is shown in grey and black, respectively (further explained in Sect. \ref{sec:ALMAsourcesub}). Two power-law profiles, tracing roughly the clump and core scales, are fitted to the core-subtracted data shown in red and green, respectively. The bottom axis shows the $uv$ distance in k$\lambda$ and the top axis is the corresponding spatial scale. The purple dashed line indicates the source diameter (Table \ref{tab:ALMApositions}). The figures for the remaining sources are shown in Fig. \ref{fig:ALMAvisibilityprofileapp}. (*fig:ALMAvisibilityprofile*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2304.07237"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

189  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

4  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
