# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

R. v. Boekel  ->  R. v. Boekel  |  ['R. v. Boekel']
M. Scheuck  ->  M. Scheuck  |  ['M. Scheuck']
S. Li  ->  S. Li  |  ['S. Li']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
S. Pfalzner  ->  S. Pfalzner  |  ['S. Pfalzner']
Z.-L. Xie  ->  Z.-L. Xie  |  ['Z.-L. Xie']
S. Belladitta  ->  S. Belladitta  |  ['S. Belladitta']
Arxiv has 96 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2401.03437


extracting tarball to tmp_2401.03437...

 done.




✔ → 0:header
  ↳ 8579:\section{Introduction}


✔ → 8579:\section{Introduction}
  ↳ 21428:\section{Observations and data processing}


✘ → 21428:\section{Observations and data processing}
  ↳ 33014:\section{Model-based imaging}


✘ → 33014:\section{Model-based imaging}
  ↳ 46401:\section{Results}


✘ → 46401:\section{Results}
  ↳ 76464:\section{Discussion}


✘ → 76464:\section{Discussion}
  ↳ 110064:\section{Summary}


✔ → 110064:\section{Summary}
  ↳ 119300:\begin{appendix}
✔ → 119300:\begin{appendix}
  ↳ 119317:\section{VLTI data plots}
✔ → 119317:\section{VLTI data plots}
  ↳ 122366:\section{Additional notes on the MATISSE data processing}
✔ → 122366:\section{Additional notes on the MATISSE data processing}
  ↳ 124305:\section{Visibility calculation}


✘ → 124305:\section{Visibility calculation}
  ↳ 127481:\section{Opacity curves}
✔ → 127481:\section{Opacity curves}
  ↳ 129271:\section{Two-step optimization procedure}
✔ → 129271:\section{Two-step optimization procedure}
  ↳ 133401:\section{Degeneracy in the spectral decomposition}


✔ → 133401:\section{Degeneracy in the spectral decomposition}
  ↳ 136472:\section{Supplementary model plots}
✔ → 136472:\section{Supplementary model plots}
  ↳ 139515:end


Unable to locate Ghostscript on paths


Retrieving document from  https://arxiv.org/e-print/2401.03507


extracting tarball to tmp_2401.03507... done.
Retrieving document from  https://arxiv.org/e-print/2401.03549


extracting tarball to tmp_2401.03549...

 done.
Retrieving document from  https://arxiv.org/e-print/2401.03775



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


extracting tarball to tmp_2401.03775...

 done.
Retrieving document from  https://arxiv.org/e-print/2401.04009


extracting tarball to tmp_2401.04009...

 done.


E. Bañados  ->  E. Bañados  |  ['E. Bañados']
S. Belladitta  ->  S. Belladitta  |  ['S. Belladitta']
F. Davies  ->  F. Davies  |  ['F. Davies']


Found 51 bibliographic references in tmp_2401.04009/sample631.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.04009-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.04009) | **Recognizing Blazars Using Radio Morphology from the VLA Sky Survey**  |
|| <mark>Z.-L. Xie</mark>, et al. -- incl., <mark>S. Belladitta</mark> |
|*Appeared on*| *2024-01-09*|
|*Comments*| *21 pages, 11 figures, accepted for publication in The Astrophysical Journal*|
|**Abstract**| Blazars are radio-loud Active Galactic Nuclei (AGN) whose jets have a very small angle to our line of sight. Observationally, the radio emission are mostly compact or a compact-core with a 1-sided jet. With 2.5$^{\prime\prime}$ resolution at 3 GHz, the Very Large Array Sky Survey (VLASS) enables us to resolve the structure of some blazar candidates in the sky north of Decl. $-40$ deg. We introduce an algorithm to classify radio sources as either blazar-like or non-blazar-like based on their morphology in the VLASS images. We apply our algorithm to three existing catalogs, including one of known blazars (Roma-BzCAT) and two of blazar candidates identified by WISE colors and radio emission (WIBRaLS, KDEBLLACS). We show that in all three catalogs, there are objects with morphology inconsistent with being blazars. Considering all the catalogs, more than 12% of the candidates are unlikely to be blazars, based on this analysis. Notably, we show that 3% of the Roma-BzCAT "confirmed" blazars could be a misclassification based on their VLASS morphology. The resulting table with all sources and their radio morphological classification is available online. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.03507-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.03507) | **Detection of pairwise kSZ effect with DESI galaxy groups and Planck in  Fourier space**  |
|| <mark>S. Li</mark>, et al. |
|*Appeared on*| *2024-01-09*|
|*Comments*| *25 pages, 14 figures, ApJS accepted*|
|**Abstract**| We report a $\sim5.2\sigma$ detection of the kinetic Sunyaev-Zel'dovich (kSZ) effect in Fourier space, by combining the DESI galaxy groups and the Planck data. We use the density-weighted pairwise kSZ power spectrum as the summary statistic, and the detailed procedure of its measurement is presented in this paper. Meanwhile, we analyze the redshift space group density power spectrum to constrain its bias parameters and photo-z uncertainties. These best fitted parameters are substituted to a non-linear kSZ model, and we fit the measured kSZ power spectrum with this model to constrain the group optical depth $\bar{\tau}$. Selected by a varying lower mass threshold $M_{\rm th}$, the galaxy group catalogs with different median masses ($\tilde{M}$) are constructed from the DR9 data of the DESI Legacy Imaging Surveys. $\tilde{M}$ spans a wide range of $\sim10^{13}-10^{14}{\rm M}_\odot/h$ and the heaviest $\tilde{M}\sim10^{14} {\rm M}_\odot/h$ is larger than those of most other kSZ detections. When the aperture photometric filter radius $\theta_{\rm AP}$ is set to be $4.2$ arcmin, the $\tilde{M}=1.75\times10^{13}{\rm M}_\odot/h$ group sample at the median redshift $\tilde{z}=0.64$ has the highest kSZ detection ${\rm S/N}=5.2$. By fitting $\bar{\tau}$s from various samples against their $\tilde{M}$s, we obtain a linear $\log\bar{\tau}-\log \tilde{M}$ relation: $\log\bar{\tau} = \gamma(\log \tilde{M}-14)+\log\beta$, in which $\gamma=0.55\pm0.1$. We also vary the aperture photometric filter radius and measure the $\bar{\tau}$ profiles of group samples, whose constraints on the baryon distribution within and around dark matter halos will be discussed in a companion paper. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.03549-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.03549) | **The JCMT Transient Survey: Six-Year Summary of 450/850\,$μ$m  Protostellar Variability and Calibration Pipeline Version 2.0**  |
|| S. Mairs, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2024-01-09*|
|*Comments*| *Accepted for Publication in the The Astrophysical Journal. DOI link to data will become public after the proof stage is complete*|
|**Abstract**| The JCMT Transient Survey has been monitoring eight Gould Belt low-mass star-forming regions since December 2015 and six somewhat more distant intermediate-mass star-forming regions since February 2020 with SCUBA-2 on the JCMT at \ShortS and \LongS and with an approximately monthly cadence. We introduce our Pipeline v2 relative calibration procedures for image alignment and flux calibration across epochs, improving on our previous Pipeline v1 by decreasing measurement uncertainties and providing additional robustness. These new techniques work at both \LongS and \ShortNS, where v1 only allowed investigation of the \LongS data. Pipeline v2 achieves better than $0.5^{\prime\prime}$ relative image alignment, less than a tenth of the submillimeter beam widths. The v2 relative flux calibration is found to be 1\% at \LongS and $<5$\% at \ShortNS. The improvement in the calibration is demonstrated by comparing the two pipelines over the first four years of the survey and recovering additional robust variables with v2. Using the full six years of the Gould Belt survey the number of robust variables increases by 50\,\%, and at \ShortS we identify four robust variables, all of which are also robust at \LongNS. The multi-wavelength light curves for these sources are investigated and found to be consistent with the variability being due to dust heating within the envelope in response to accretion luminosity changes from the central source. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.03775-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.03775) | **Low-mass stars: Their Protoplanetary Disc Lifetime Distribution**  |
|| <mark>S. Pfalzner</mark>, F. Dincer |
|*Appeared on*| *2024-01-09*|
|*Comments*| *14 pages, 5 figures, accepted by ApJ*|
|**Abstract**| While most protoplanetary discs lose their gas within less than 10 Myr, individual disc lifetimes vary from < 1 Myr to >> 20 Myr, with some discs existing for > 40 Myr. Mean disc half lifetimes hide this diversity; only a so-far non-existing disc lifetime distribution could capture this fact. The benefit of a disc lifetime distribution would be twofold. First, it provides a stringent test on disc evolution theories. Second, it can function as input for planet formation models. Here, we derive such a disc lifetime distribution. We heuristically test different standard distribution forms for their ability to account for the observed disc fractions at certain ages. Here, we concentrate on the distribution for low-mass stars (spectral type M3.7 - M6, $M_s \approx $ 0.1 - 0.24 M$_{sun}$) because disc lifetimes depend on stellar mass. A Weibull-type distribution ($k$=1.78, $\lambda$=9.15) describes the observational data if all stars have a disc at a cluster age $t_c$=0. However, a better match exists for lower initial disc fractions. For f(t=0)= 0.65, a Weibull distribution (k=2.34, $\lambda$=11.22) and a Gauss distribution ($\sigma$=9.52, $\mu$=9.52) fit similarly well the data. All distributions have in common that they are wide, and most discs are dissipated at ages > 5 Myr. The next challenge is to quantitatively link the diversity of disc lifetimes to the diversity in planets. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2401.03437-b31b1b.svg)](https://arxiv.org/abs/arXiv:2401.03437) | **Mid-infrared evidence for iron-rich dust in the multi-ringed inner disk  of HD 144432**  |
|| J. Varga, et al. -- incl., <mark>R. v. Boekel</mark>, <mark>M. Scheuck</mark> |
|*Appeared on*| *2024-01-09*|
|*Comments*| *29 pages, 24 figures*|
|**Abstract**| Context. Rocky planets form by the concentration of solid particles in the inner few au regions of planet-forming disks. Their chemical composition reflects the materials in the disk available in the solid phase at the time the planets were forming. Aims. We aim to constrain the structure and dust composition of the inner disk of the young star HD 144432, using an extensive set of infrared interferometric data taken by the Very Large Telescope Interferometer (VLTI), combining PIONIER, GRAVITY, and MATISSE observations. Methods. We introduced a new physical disk model, TGMdust, to image the interferometric data, and to fit the disk structure and dust composition. We also performed equilibrium condensation calculations with GGchem. Results. Our best-fit model has three disk zones with ring-like structures at 0.15, 1.3, and 4.1 au. Assuming that the dark regions in the disk at ~0.9 au and at ~3 au are gaps opened by planets, we estimate the masses of the putative gap-opening planets to be around a Jupiter mass. We find evidence for an optically thin emission ($\tau<0.4$) from the inner two disk zones ($r<4$ au) at $\lambda>3\ \mu$m. Our silicate compositional fits confirm radial mineralogy gradients. To identify the dust component responsible for the infrared continuum emission, we explore two cases for the dust composition, one with a silicate+iron mixture and the other with a silicate+carbon one. We find that the iron-rich model provides a better fit to the spectral energy distribution. Conclusions. We propose that in the warm inner regions ($r<5$ au) of typical planet-forming disks, most if not all carbon is in the gas phase, while iron and iron sulfide grains are major constituents of the solid mixture along with forsterite and enstatite. Our analysis demonstrates the need for detailed studies of the dust in inner disks with new mid-infrared instruments such as MATISSE and JWST/MIRI. |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error Unable to locate Ghostscript on paths</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2401.04009.md
    + _build/html/tmp_2401.04009/./figures/morphology.png
    + _build/html/tmp_2401.04009/./figures/artifacts.png
    + _build/html/tmp_2401.04009/./figures/roma_2ss_1.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\angstrom}{\text{\normalfontÅ}}$
$\newcommand{\arraystretch}{0.8}$
$\newcommand{\arraystretch}{0.8}$
$\newcommand{\arraystretch}{0.8}$
$\newcommand{\arraystretch}{0.8}$</div>



<div id="title">

# Recognizing Blazars Using Radio Morphology from the VLA Sky Survey

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2401.04009-b31b1b.svg)](https://arxiv.org/abs/2401.04009)<mark>Appeared on: 2024-01-09</mark> -  _21 pages, 11 figures, accepted for publication in The Astrophysical Journal_

</div>
<div id="authors">

Z.-L. X. (谢彰亮), et al. -- incl., <mark>E. Bañados</mark>, <mark>S. Belladitta</mark>, <mark>F. Davies</mark>

</div>
<div id="abstract">

**Abstract:** Blazars are radio-loud Active Galactic Nuclei (AGN) whose jets have a very small angle to our line of sight. Observationally, the radio emission are mostly compact or a compact-core with a 1-sided jet. With 2.5 $^{\prime\prime}$ resolution at 3 GHz, the Very Large Array Sky Survey (VLASS) enables us to resolve the structure of some blazar candidates in the sky north of Decl. $-40$ deg. We introduce an algorithm to classify radio sources as either blazar-like or non-blazar-like based on their morphology in the VLASS images. We apply our algorithm to three existing catalogs, including one of known blazars (Roma-BzCAT) and two of blazar candidates identified by WISE colors and radio emission (WIBRaLS, KDEBLLACS). We show that in all three catalogs, there are objects with morphology inconsistent with being blazars. Considering all the catalogs, more than 12 \% of the candidates are unlikely to be blazars, based on this analysis. Notably, we show that 3 \% of the Roma-BzCAT "confirmed” blazars could be a misclassification based on their VLASS morphology. The resulting table with all sources and their radio morphological classification is available online.

</div>

<div id="div_fig1">

<img src="tmp_2401.04009/./figures/morphology.png" alt="Fig2" width="100%"/>

**Figure 2. -** Morphological classification of VLASS images using our automated algorithm. The images are categorized into six distinct morphological classes, grouped into two sets. Blazar-like morphologies include _COMPACT_, _OFFSET_, _1-SIDE EXTENDED_, and _1-SIDE SEPARATED_; while non-blazar-like morphologies comprise _2-SIDE EXTENDED_ and _2-SIDE SEPARATED_. In each morphological class, their corresponding 1D signals are shown in two directions, indicated by blue lines. Automatically identified peaks are marked by pink crosses, with morphological classification determined by the properties of peaks, including number, width and distance to the center position. Descriptions of each morphological class and their characteristics can be found in Sec. \ref{subsec:class}. (*fig:all_signal*)

</div>
<div id="div_fig2">

<img src="tmp_2401.04009/./figures/artifacts.png" alt="Fig3" width="100%"/>

**Figure 3. -** An illustration of two artifact types and a _NON-DETECTION_ source in VLASS images. The algorithm identifies cross pattern and dark pixel artifacts, assigning appropriate quality flag values (1 for cross pattern presence, 2 for dark pixel presence, and 3 for both artifacts in the image). Sources with low signal-to-noise ratios that are difficult to discern by visual inspection are classified as _NON-DETECTION_ sources. (*fig:artifacts*)

</div>
<div id="div_fig3">

<img src="tmp_2401.04009/./figures/roma_2ss_1.png" alt="Fig5" width="100%"/>

**Figure 5. -** All _2-SIDE SEPARATED_ sources with their VLASS2 image in Roma-BzCAT. Each source has undergone visual inspection, and any discrepancies found during this process are indicated by a visual flag in the resulting table. For each of these sources, our revised visual classification is provided. These revised sources are marked with a white star on their image. (*fig:2ss_1*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2401.04009"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

354  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

7  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
