# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

T. Henning  ->  T. Henning  |  ['T. Henning']
G. Perotti  ->  G. Perotti  |  ['G. Perotti']
K. Schwarz  ->  K. Schwarz  |  ['K. Schwarz']
H. Beuther  ->  H. Beuther  |  ['H. Beuther']
S. Li  ->  S. Li  |  ['S. Li']


Arxiv has 87 new papers today
          3 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/3 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2407.05070
extracting tarball to tmp_2407.05070...

 done.


Found 71 bibliographic references in tmp_2407.05070/aanda.bbl.
syntax error in line 67: '=' expected
Retrieving document from  https://arxiv.org/e-print/2407.05770


extracting tarball to tmp_2407.05770...

 done.


Found 127 bibliographic references in tmp_2407.05770/aa49390_24corr.bbl.
Retrieving document from  https://arxiv.org/e-print/2407.05968
extracting tarball to tmp_2407.05968... done.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.05070-b31b1b.svg)](https://arxiv.org/abs/2407.05070) | **MINDS. The DR Tau disk II: probing the hot and cold H$_2$O reservoirs in the JWST-MIRI spectrum**  |
|| M. Temmink, et al. -- incl., <mark>T. Henning</mark>, <mark>G. Perotti</mark>, <mark>K. Schwarz</mark> |
|*Appeared on*| *2024-07-09*|
|*Comments*| *Accepted for publication in Astronomy & Astrophysics on 05/07/2024*|
|**Abstract**|            The MRS mode of the JWST-MIRI instrument gives insights into the chemical richness and complexity of the inner regions of planet-forming disks. Here, we analyse the H$_2$O-rich spectrum of the compact disk DR Tau. We probe the excitation conditions of the H$_2$O transitions observed in different wavelength regions across the entire spectrum using LTE slab models, probing both the rovibrational and rotational transitions. These regions suggest a radial temperature gradient, as the excitation temperature (emitting radius) decreases (increases) with increasing wavelength. To explain the derived emitting radii, we require a larger inclination for the inner disk (i~20-23 degrees) compared to the outer disk (i~5 degrees), agreeing with our previous analysis on CO. We also analyse the pure rotational spectrum (<10 micron) using a large, structured disk (CI Tau) as a template, confirming the presence of the radial gradient, and by fitting multiple components to further characterise the radial and vertical temperature gradients present in the spectrum. At least three temperature components (T~180-800 K) are required to reproduce the rotational spectrum of H$_2$O arising from the inner ~0.3-8 au. These components describe a radial temperature gradient that scales roughly as ~R$^{-0.5}$ in the emitting layers. As the H$_2$O is mainly optically thick, we derive a lower limit on the abundance ratio of H$_2$O/CO~0.17, suggesting a potential depletion of H$_2$O. Similarly to previous work, we detect a cold H$_2$O component (T~180 K) originating from near the snowline. We cannot conclude if an enhancement of the H$_2$O reservoir is observed following radial drift. A consistent analysis of a larger sample of compact disks is necessary to study the importance of drift in enhancing the H$_2$O abundances.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.05770-b31b1b.svg)](https://arxiv.org/abs/2407.05770) | **A global view on star formation: The GLOSTAR Galactic plane survey X. Galactic HII region catalog using radio recombination lines**  |
|| S. Khan, et al. -- incl., <mark>H. Beuther</mark> |
|*Appeared on*| *2024-07-09*|
|*Comments*| *Accepted for publication in A&A*|
|**Abstract**|            Studies of Galactic HII regions are of crucial importance for studying star formation and the evolution of the interstellar medium. Gaining an insight into their physical characteristics contributes to a more comprehensive understanding of these phenomena. The GLOSTAR project aims to provide a GLObal view on STAR formation in the Milky Way by performing an unbiased and sensitive survey. This is achieved by using the extremely wideband (4{-}8 GHz) C-band receiver of the Karl G. Jansky Very Large Array and the Effelsberg 100 m telescope. Using radio recombination lines observed in the GLOSTAR survey with the VLA in D-configuration with a typical line sensitivity of 1{\sigma} {\sim} 3.0 mJy beam{^-1} at {\sim} 5 km s{^-1} and an angular resolution of 25", we cataloged 244 individual Galactic HII regions and derived their physical properties. We examined the mid-infrared (MIR) morphology of these HII regions and find that a significant portion of them exhibit a bubble-like morphology in the GLIMPSE 8 {\mu}m emission. We also searched for associations with the dust continuum and sources of methanol maser emission, other tracers of young stellar objects, and find that 48\% and 14\% of our HII regions, respectively, are coextensive with those. We measured the electron temperature for a large sample of HII regions within Galactocentric distances spanning from 1.6 to 13.1 kpc and derived the Galactic electron temperature gradient as {\sim} 372 {\pm} 28 K kpc{^-1} with an intercept of 4248 {\pm} 161 K, which is consistent with previous studies.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2407.05968-b31b1b.svg)](https://arxiv.org/abs/2407.05968) | **Fermi-LAT discovery of the GeV emission of the superluminous supernovae SN 2017egm**  |
|| <mark>S. Li</mark>, et al. |
|*Appeared on*| *2024-07-09*|
|*Comments*| **|
|**Abstract**|            Superluminous supernovae (SLSNe) are a new class of transients with luminosities $\sim10 -100$ times larger than the usual core-collapse supernovae (SNe). Their origin is still unclear and one widely discussed scenario involves a millisecond magnetar central engine. The GeV-TeV emission of SLSNe has been predicted in the literature but has not been convincingly detected yet. Here we report the search for the $\gamma$-ray emission in the direction of SN 2017egm, one of the closest SLSNe detected so far, with the 15-year {\it Fermi}-LAT Pass 8 data. There is a transient $\gamma$-ray source appearing about 2 months after this event and lasting a few months. Both the peak time and the luminosity of the GeV emission are consistent with the magnetar model prediction, suggesting that such a GeV transient is the high-energy counterpart of SN 2017egm and the central engine of this SLSNe is a young magnetar.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2407.05070/./Spectrum_Region1-NF.png', 'tmp_2407.05070/./Spectrum_Region2-NF.png', 'tmp_2407.05070/./Spectrum_Region3-NF.png', 'tmp_2407.05070/./Spectrum_Region4-NF.png', 'tmp_2407.05070/./Spectrum_Region5-NF.png', 'tmp_2407.05070/./Spectrum_Region6-NF.png', 'tmp_2407.05070/./DRTau-CITau-NF.png']
copying  tmp_2407.05070/./Spectrum_Region1-NF.png to _build/html/
copying  tmp_2407.05070/./Spectrum_Region2-NF.png to _build/html/
copying  tmp_2407.05070/./Spectrum_Region3-NF.png to _build/html/
copying  tmp_2407.05070/./Spectrum_Region4-NF.png to _build/html/
copying  tmp_2407.05070/./Spectrum_Region5-NF.png to _build/html/
copying  tmp_2407.05070/./Spectrum_Region6-NF.png to _build/html/
copying  tmp_2407.05070/./DRTau-CITau-NF.png to _build/html/
exported in  _build/html/2407.05070.md
    + _build/html/tmp_2407.05070/./Spectrum_Region1-NF.png
    + _build/html/tmp_2407.05070/./Spectrum_Region2-NF.png
    + _build/html/tmp_2407.05070/./Spectrum_Region3-NF.png
    + _bu

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# MINDS. The DR Tau disk II: probing the hot and cold $\ce{H_2O}$ reservoirs in the JWST-MIRI spectrum

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2407.05070-b31b1b.svg)](https://arxiv.org/abs/2407.05070)<mark>Appeared on: 2024-07-09</mark> -  _Accepted for publication in Astronomy & Astrophysics on 05/07/2024_

</div>
<div id="authors">

M. Temmink, et al. -- incl., <mark>T. Henning</mark>, <mark>G. Perotti</mark>, <mark>K. Schwarz</mark>

</div>
<div id="abstract">

**Abstract:** The Medium Resolution Spectrometer (MRS) of the Mid-InfraRed Instrument (MIRI) on the James Webb Space Telescope (JWST) gives insights into the chemical richness and complexity of the inner regions of planet-forming disks. Several disks that are compact in the millimetre dust emission have been found by Spitzer to be particularly bright in $\ce{H_2O}$ , which is thought to be caused by the inward drift of icy pebbles. Here, we analyse the $\ce{H_2O}$ -rich spectrum of the compact disk DR Tau using high-quality JWST-MIRI observations. We infer the $\ce{H_2O}$ column densities (in cm $^{-2}$ ) using methods presented in previous works, as well as introducing a new method to fully characterise the pure rotational spectrum. We aim to further characterise the abundances of $\ce{H_2O}$ in the inner regions of this disk and its abundance relative to $\ce{CO}$ . We also search for emission of other molecular species, such as $\ce{CH_4}$ , $\ce{NH_3}$ , $\ce{CS}$ , $\ce{H_2}$ , $\ce{SO_2}$ , and larger hydrocarbons; commonly detected species, such as $\ce{CO}$ , $\ce{CO_2}$ , $\ce{HCN}$ , and $\ce{C_2H_2}$ , have been investigated in our previous paper. We first use 0D local thermodynamic equilibrium (LTE) slab models to investigate the excitation properties observed in different wavelength regions across the entire spectrum, probing both the ro-vibrational and rotational transitions. To further analyse the pure rotational spectrum ( $\geq$ 10 $\mathrm{\mu}$ m), we use the spectrum of a large, structured disk (CI Tau) as a template to search for differences with our compact disk. Finally, we fit multiple components to characterise the radial (and vertical) temperature gradient(s) present in the spectrum of DR Tau. The 0D slab models indicate a radial gradient in the disk, as the excitation temperature (emitting radius) decreases (increases) with increasing wavelength, which is confirmed by the analysis involving the large disk template. To explain the derived emitting radii, we need a larger inclination for the inner disk ( $i\sim$ 10-23◦ee), agreeing with our previous analysis on $\ce{CO}$ . From our multi-component fit, we find that at least three temperature components ( $T_1\sim$ 800 K, $T_2\sim$ 470 K, and $T_3\sim$ 180 K) are required to reproduce the observed rotational spectrum of $\ce{H_2O}$ arising from the inner $R_\textnormal{em}\sim$ 0.3-8 au. By comparing line ratios, we derived an upper limit on the column densities (in cm $^{-2}$ ) for the first two components of $\log_{10}(N)\leq$ 18.4 within $\sim$ 1.2 au. We note that the models with a pure temperature gradient provide as robust results as the more complex models, which include spatial line shielding. No robust detection of the isotopologue $\ce{H_2 ^{18}O}$ can be made and upper limits are provided for other molecular species. Our analysis confirms the presence of a pure radial temperature gradient present in the inner disk of DR Tau, which can be described by at least three components. This gradient scales roughly as $\sim R_\textnormal{em}^{-0.5}$ in the emitting layers, in the inner 2 au. As the observed $\ce{H_2O}$ is mainly optically thick, a lower limit on the abundance ratio of $\ce{H_2O}$ / $\ce{CO}$ $\sim$ 0.17 is derived, suggesting a potential depletion of $\ce{H_2O}$ . Similarly to previous work, we detect a cold $\ce{H_2O}$ component ( $T\sim$ 180 K) originating from near the snowline, now with a multi-component analysis. Yet, we cannot conclude whether an enhancement of the $\ce{H_2O}$ reservoir is observed following radial drift. A consistent analysis of a larger sample is necessary to study the importance of drift in enhancing the $\ce{H_2O}$ abundances.

</div>

<div id="div_fig1">

<img src="tmp_2407.05070/./Spectrum_Region1-NF.png" alt="Fig10.1" width="33%"/><img src="tmp_2407.05070/./Spectrum_Region2-NF.png" alt="Fig10.2" width="33%"/><img src="tmp_2407.05070/./Spectrum_Region3-NF.png" alt="Fig10.3" width="33%"/>

**Figure 10. -** Best fitting slab models (without line overlap) for the different regions. In each subfigure, the top panel displays the continuum subtracted JWST spectrum in a specific region, while the full model spectrum is shown in red. The bottom panels show the models for the individually detected molecules. In addition, we show the \ce{CO} model in pink from \citet{TemminkEA24}. The horizontal bar in each top panel indicate the line regions used in the $\chi^2_\textnormal{red}$-fits. (*fig:RegionSpectra*)

</div>
<div id="div_fig2">

<img src="tmp_2407.05070/./Spectrum_Region4-NF.png" alt="Fig11.1" width="33%"/><img src="tmp_2407.05070/./Spectrum_Region5-NF.png" alt="Fig11.2" width="33%"/><img src="tmp_2407.05070/./Spectrum_Region6-NF.png" alt="Fig11.3" width="33%"/>

**Figure 11. -** Continuation of Figure \ref{fig:RegionSpectra}. The best fit to the \ce{OH} emission is shown in magenta. In addition, we show the best fitting slab models, adopted from \citet{TemminkEA24}, for \ce{CO_2}(green), \ce{HCN}(orange), and \ce{C_2H_2}(yellow) in the wavelength region of 13.6-16.3 $\mathrm{\mu}$m.  (*fig:RegionSpectra*)

</div>
<div id="div_fig3">

<img src="tmp_2407.05070/./DRTau-CITau-NF.png" alt="Fig5" width="100%"/>

**Figure 5. -** The spectra (across 13.4-24.0 $\mathrm{\mu}$m) of DR Tau (grey) and CI Tau (black, scaled; see Section \ref{sec:LDT-CITau}) shown together with the residual spectrum (in red) of DR Tau after subtraction of the scaled spectrum of CI Tau. The best fitting \ce{H_2O} slab model ($T$=375 K) to the residuals is shown in blue. The black dashed box just shortward of $\sim$24.0 $\mathrm{\mu}$m indicates the pair of lines identified by \citet{BanzattiEA23Subm}, hinting at a third component ($\sim$170 K) needed to fully explain the observed \ce{H_2O} reservoir. (*fig:LDTemplate*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2407.05070"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$</div>



<div id="title">

# A global view on star formation: The GLOSTAR Galactic plane survey \ X. Galactic $\ion{H}{ii}$ region catalog using radio recombination lines$\thanks{Tables \ref{tab:catalog} and \ref{tab:phy_prop} are only available in electronic form at the CDS via anonymous ftp to \url{cdsarc.u-strasbg.fr} (130.79.128.5) or via \url{http://cdsweb.u-strasbg.fr/cgi-bin/qcat?J/A+A/}. }$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2407.05770-b31b1b.svg)](https://arxiv.org/abs/2407.05770)<mark>Appeared on: 2024-07-09</mark> -  _Accepted for publication in A&A_

</div>
<div id="authors">

S. Khan, et al. -- incl., <mark>H. Beuther</mark>

</div>
<div id="abstract">

**Abstract:** Studies of Galactic $\ion{H}{ii}$ regions are of crucial importance for studying star formation and the evolution of the interstellar medium. Gaining an insight into their physical characteristics contributes to a more comprehensive understanding of these phenomena. The GLOSTAR project aims to provide a GLObal view on STAR formation in the Milky Way by performing an unbiased and sensitive survey. This is achieved by using the extremely wideband (4 $-$ 8 GHz) _C_ -band receiver of the _Karl G. Jansky_ Very Large Array and the Effelsberg 100 m telescope. Using radio recombination lines observed in the GLOSTAR survey with the VLA in D-configuration with a typical line sensitivity of 1 $\sigma \sim \rm 3.0 mJy beam^{-1}$ at $\sim \rm 5 km s^{-1}$ and an angular resolution of 25 $\arcsec$ , we cataloged 244 individual Galactic $\ion{H}{ii}$ regions ( $-$ 2 $◦ \leq$ $\ell$ $\leq$ 60 $◦$ \& | _b_ | $\leq$ 1 $◦$ and 76 $◦ \leq$ $\ell$ $\leq$ 83 $◦$ \& $-$ 1 $◦ \leq$ _b_ $\leq$ 2 $◦$ ) and derived their physical properties. We examined the mid-infrared (MIR) morphology of these $\ion{H}{ii}$ regions and find that a significant portion of them exhibit a bubble-like morphology in the GLIMPSE 8 $\mu$ m emission. We also searched for associations with the dust continuum and sources of methanol maser emission, other tracers of young stellar objects, and ﬁnd that 48 \% and 14 \% of our $\ion{H}{ii}$ regions, respectively, are coextensive with those. We measured the electron temperature for a large sample of $\ion{H}{II}$ regions within Galactocentric distances spanning from 1.6 to 13.1 kpc and derived the Galactic electron temperature gradient  as $\sim$ 372 $\pm$ 28 K kpc $^{-1}$ with an intercept of 4248 $\pm$ 161 K, which is consistent with previous studies.

</div>

<div id="div_fig1">

<img src="tmp_2407.05770/./figures/hist_distribution2.png" alt="Fig15" width="100%"/>

**Figure 15. -** Distributions of various $\ion${H}{ii} region physical properties from top left to bottom right, continuum flux density, RRL amplitude, RRL FWHM, effective radius, electron temperature, electron density, EM, Lyman photon rate, and ionized gas mass. The electron temperature reported by [Quireza, et. al (2006)]() is indicated in red, while the one reported by [Wenger, et. al (2019)]() is shown in yellow, and these are compared to the GLOSTAR $\ion${H}{ii} region sample (aquamarine). On average, the electron temperature of GLOSTAR $\ion${H}{ii} regions (6707 K) is lower than that of [Quireza, et. al (2006)]()(8214 K) and [Wenger, et. al (2019)]()(8055 K). Electron density histograms are presented for both the GLOSTAR (aquamarine) and [Quireza, et. al (2006)]()(red) $\ion${H}{ii} region samples. On average, the GLOSTAR nebulae have higher electron density than the [Quireza, et. al (2006)]() sources. (*fig:phy_prop_dist*)

</div>
<div id="div_fig2">

<img src="tmp_2407.05770/./figures/velo_diff.png" alt="Fig11.1" width="50%"/><img src="tmp_2407.05770/./figures/distance_compare.png" alt="Fig11.2" width="50%"/>

**Figure 11. -** Top panel: Plot of the difference in source velocity ($\rm V_{lsr}$) and the velocity of tangent point ($\rm V_T$), versus the difference in the velocity of the first absorption minimum ($\rm V_A$) and the $\rm V_T$. The diagonal and horizontal shaded regions represent the expected location of the near and far distance sources, respectively, while the solid line shows the approximate boundaries  ([Kolpak, et. al 2003](), [Urquhart, Hoare and Lumsden 2012]()) . The dashed diagonal line divides the darker triangular region in the lower left quadrant of the plot into two regions: sources above the line are more likely to be at a near distance, while those below are more likely to be at a far distance. Bottom panel: Plot shows the relation between the distance determined using the Bayesian method presented by [Reid, et. al (2016)](), [Reid, Menten and Brunthaler (2019)]() and the near/far distance determined using the [Reid, Menten and Brunthaler (2014)]() rotation curve. The red circles and blue stars represent the distances taken from literature and obtained distance for 53 $\ion${H}{ii} regions, respectively. The solid black line shows the line of equality, and the gray shaded region represents $\pm$1 kpc.  (*fig:final_dist*)

</div>
<div id="div_fig3">

<img src="tmp_2407.05770/./figures/cdf_HII_te_assoc_ad.png" alt="Fig18.1" width="16%"/><img src="tmp_2407.05770/./figures/cdf_HII_ne_assoc.png" alt="Fig18.2" width="16%"/><img src="tmp_2407.05770/./figures/cdf_HII_em_assoc.png" alt="Fig18.3" width="16%"/><img src="tmp_2407.05770/./figures/cdf_HII_nycl_assoc.png" alt="Fig18.4" width="16%"/><img src="tmp_2407.05770/./figures/cdf_HII_mass_assoc.png" alt="Fig18.5" width="16%"/><img src="tmp_2407.05770/./figures/cdf_HII_dist_assoc.png" alt="Fig18.6" width="16%"/>

**Figure 18. -** Comparison between the properties of GLOSTAR $\ion${H}{ii} regions that are associated with ATLASGAL dust clumps (in red) and those that are not linked to ATLASGAL clumps (in blue). CDFs are presented for various properties of GLOSTAR $\ion${H}{ii} regions. The results of the Anderson-Darling (AD) tests, indicated above each plot, provide insights into the statistical significance of the observed differences. (*fig:dust_without_dust_hist*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2407.05770"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

119  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

13  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

5  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
