# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

Z. Kaczmarek  ->  Z. Kaczmarek  |  ['Z. Kaczmarek']
J. Li  ->  J. Li  |  ['J. Li']
K. Lee  ->  K. Lee  |  ['K. Lee']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Isbell  ->  J. Isbell  |  ['J. Isbell']
A. d. Graaff  ->  A. D. Graaff  |  ['A. D. Graaff']
L. Xie  ->  Z.-L. Xie  |  ['L. Xie']
Arxiv has 61 new papers today
          7 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/7 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2312.11667


extracting tarball to tmp_2312.11667...

 done.
Retrieving document from  https://arxiv.org/e-print/2312.11900


extracting tarball to tmp_2312.11900...

 done.
Retrieving document from  https://arxiv.org/e-print/2312.11984


extracting tarball to tmp_2312.11984...

 done.
Retrieving document from  https://arxiv.org/e-print/2312.12015


extracting tarball to tmp_2312.12015...

 done.
Retrieving document from  https://arxiv.org/e-print/2312.12125


extracting tarball to tmp_2312.12125...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)




✔ → 0:header
  ↳ 5277:\section{Introduction}


✔ → 5277:\section{Introduction}
  ↳ 15210:\section{Observations}\label{s:Obs}
✔ → 15210:\section{Observations}\label{s:Obs}
  ↳ 16180:\section{Model}\label{S:model}


✘ → 16180:\section{Model}\label{S:model}
  ↳ 45462:\section{Method}\label{s:Method}


✘ → 45462:\section{Method}\label{s:Method}
  ↳ 54161:\section{Results}\label{s:Results}


✔ → 54161:\section{Results}\label{s:Results}
  ↳ 64186:\section{Comparison of models, improvements, and future prospects}\label{s:Discussion}


✔ → 64186:\section{Comparison of models, improvements, and future prospects}\label{s:Discussion}
  ↳ 77140:\section{Conclusions}\label{s:Conclusions}
✔ → 77140:\section{Conclusions}\label{s:Conclusions}
  ↳ 81118:\begin{appendix}
✔ → 81118:\begin{appendix}
  ↳ 81135:\section{Band-by-band results - Additional}


✘ → 81135:\section{Band-by-band results - Additional}
  ↳ 91518:\section{Figures and Tables}


✘ → 91518:\section{Figures and Tables}
  ↳ 114722:end
J. Isbell  ->  J. Isbell  |  ['J. Isbell']


Found 51 bibliographic references in tmp_2312.12125/Main_Paper.bbl.
Retrieving document from  https://arxiv.org/e-print/2312.12207


extracting tarball to tmp_2312.12207...

 done.


A. d. Graaff  ->  A. D. Graaff  |  ['A. D. Graaff']


Found 194 bibliographic references in tmp_2312.12207/main.bbl.
Retrieving document from  https://arxiv.org/e-print/2312.12380


extracting tarball to tmp_2312.12380...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2312.12125-b31b1b.svg)](https://arxiv.org/abs/arXiv:2312.12125) | **Chromatically modelling the parsec scale dusty structure in the centre  of NGC1068**  |
|| J. Leftley, et al. -- incl., <mark>J. Isbell</mark> |
|*Appeared on*| *2023-12-20*|
|*Comments*| *15 pages, 17 appendix pages, submitted to A&A*|
|**Abstract**| Context: The Very Large Telescope Interferometer (VLTI) has been providing breakthrough images of the dust in the central parsecs of Active Galactic Nuclei (AGN), thought to be a key component of the AGN unification scheme and AGN host galaxy interaction. In single infrared bands, these images can have multiple interpretations some of which could challenge the unification scheme. This is the case for the archetypal type 2 AGN of NGC1068. The degeneracy is reduced by multi-band temperature maps which are hindered by ambiguity in alignment between different single band images. Aims: To solve this problem by creating a chromatic model capable of simultaneously explaining the VLTI GRAVITY+MATISSE $2\mu$m$-13\mu$m observations of the AGN hosted by NGC1068. Methods: We employ a simple disk and wind geometry populated with black body emitters and dust obscuration to create a versatile multi-wavelength modelling method for chromatic IR interferometric data of dusty objects. Results: This simple geometry is capable of reproducing the spectro-interferometric data of NGC1068 from K$-$N-band, explains the complex single band images with obscuration and inclination effects, and solves the alignment problem between bands. We find that the resulting inclination and position angle of the model is consistent with those inferred in previous larger scale studies of the narrow line region. Furthermore, the resulting model images visually resemble the multiple achromatic image reconstructions of the same data when evaluated at the same wavelengths. We conclude that the AGN of NGC1068 can indeed be explained by the clumpy disk+wind iteration of the AGN unification scheme. Within the scheme, we find that it is best explained as a type 2 AGN and the obscuring dust chemistry can be explained by a mix of olivine silicates and $16\pm1\%$ amorphous carbon. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2312.12207-b31b1b.svg)](https://arxiv.org/abs/arXiv:2312.12207) | **To high redshift and low mass: exploring the emergence of quenched  galaxies and their environments at $3<z<6$ in the ultra-deep JADES MIRI F770W  parallel**  |
|| S. Alberts, et al. -- incl., <mark>A. d. Graaff</mark> |
|*Appeared on*| *2023-12-20*|
|*Comments*| *27 pages, 10 figures, 2 tables (not including appendices or references). Submitted to ApJ. Comments welcome!*|
|**Abstract**| We present the robust selection of quiescent (QG) and post-starburst (PSB) galaxies using ultra-deep NIRCam and MIRI imaging from the JWST Advanced Deep Extragalactic Survey (JADES). Key to this is MIRI 7.7$\mu$m imaging which breaks the degeneracy between old stellar populations and dust attenuation at $3<z<6$ by providing rest-frame $J$-band. Using this, we identify 23 passively evolving galaxies in UVJ color space in a mass-limited (log $M_{\star}/M_{\odot}\geq8.5$) sample over 8.8 arcmin$^2$. Evaluation of this selection with and without 7.7$\,\mu$m shows that dense wavelength coverage with NIRCam ($8-11$ bands including $1-4$ medium-bands) can compensate for lacking the $J-$band anchor, meaning that robust selection of high-redshift QGs is possible with NIRCam alone. Our sample is characterized by rapid quenching timescales ($\sim100-600$ Myr) with formation redshifts $z_{\rm f}\lesssim8.5$ and includes a potential record-holding massive QG at $z_{\rm phot}=5.33_{-0.17}^{+0.16}$ and two QGs with evidence for significant residual dust content ($A_{\rm V}\sim1-2$). In addition, we present a large sample of 12 log $M_{\star}/M_{\odot}=8.5-9.5$ PSBs, demonstrating that UVJ selection can be extended to low mass. Analysis of the environment of our sample reveals that the group known as the Cosmic Rose contains a massive QG and a dust-obscured star-forming galaxy (a so-called Jekyll and Hyde pair) plus three additional QGs within $\sim20$ kpc. Moreover, the Cosmic Rose is part of a larger overdensity at $z\sim3.7$ which contains 7/12 of our low-mass PSBs. Another 4 low-mass PSBs are members of an overdensity at $z\sim3.4$; this result strongly indicates low-mass PSBs are preferentially associated with overdense environments at $z>3$. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2312.11667-b31b1b.svg)](https://arxiv.org/abs/arXiv:2312.11667) | **Spatially resolved microlensing timescale distributions across the  Galactic bulge with the VVV survey**  |
|| <mark>Z. Kaczmarek</mark>, et al. |
|*Appeared on*| *2023-12-20*|
|*Comments*| *13 pages, 11 figures, submitted to MNRAS*|
|**Abstract**| We analyze 1602 microlensing events found in the VISTA Variables in the Via Lactea (VVV) near-infrared (NIR) survey data. We obtain spatially-resolved, efficiency-corrected timescale distributions across the Galactic bulge ($|\ell|<10^\circ,$ $|b|<5^\circ$), using a Bayesian hierarchical model. Spatially-resolved peaks and means of the timescale distributions, along with their marginal distributions in strips of longitude and latitude, are in agreement at a 1$\sigma$ level with predictions based on the Besan\c{c}on model of the Galaxy. We find that the event timescales in the central bulge fields ($|\ell| < 5^\circ$) are on average shorter than the non-central ($|\ell| > 5^\circ$) fields, with the average peak of the lognormal timescale distribution at 23.6 $\pm$ 1.9 days for the central fields and 29.0 $\pm$ 3.0 days for the non-central fields. Our ability to probe the structure of the Bulge with this sample of NIR microlensing events is limited by the VVV survey's sparse cadence and relatively small number of detected microlensing events compared to dedicated optical surveys. Looking forward to future surveys, we investigate the capability of the Roman telescope to detect spatially-resolved asymmetries in the timescale distributions. We propose two pairs of Roman fields, centred on ($\ell = \pm 9,5^\circ$, $b=-0.125^\circ$) and ($\ell = -5^\circ$, $b=\pm 1.375^\circ$) as good targets to measure the asymmetry in longitude and latitude, respectively. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: '69117' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2312.11900-b31b1b.svg)](https://arxiv.org/abs/arXiv:2312.11900) | **MeV Astrophysical Spectroscopic Surveyor (MASS): A Compton Telescope  Mission Concept**  |
|| J. Zhu, et al. -- incl., <mark>J. Li</mark> |
|*Appeared on*| *2023-12-20*|
|*Comments*| *accepted for publication in Experimental Astronomy*|
|**Abstract**| We propose a future mission concept, the MeV Astrophysical Spectroscopic Surveyor (MASS), which is a large area Compton telescope using 3D position sensitive cadmium zinc telluride (CZT) detectors optimized for emission line detection. The payload consists of two layers of CZT detectors in a misaligned chessboard layout, with a total geometric area of 4096 cm$^2$ for on-axis observations. The detectors can be operated at room-temperature with an energy resolution of 0.6\% at 0.662 MeV. The in-orbit background is estimated with a mass model. At energies around 1 MeV, a line sensitivity of about $10^{-5}$ photons cm$^{-2}$ s$^{-1}$ can be obtained with a 1 Ms observation. The main science objectives of MASS include nucleosynthesis in astrophysics and high energy astrophysics related to compact objects and transient sources. The payload CZT detectors weigh roughly 40 kg, suggesting that it can be integrated into a micro- or mini-satellite. We have constructed a pathfinder, named as MASS-Cube, to have a direct test of the technique with 4 detector units in space in the near future. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2312.11984-b31b1b.svg)](https://arxiv.org/abs/arXiv:2312.11984) | **PSR B0943+10: Mode Switch, Polar Cap Geometry, and Orthogonally  Polarized Radiation**  |
|| S. Cao, et al. -- incl., <mark>K. Lee</mark> |
|*Appeared on*| *2023-12-20*|
|*Comments*| *27 pages, 28 figures, 2 tables, submitted to ApJ*|
|**Abstract**| As one of the paradigm examples to probe into pulsar magnetospheric dynamics, PSR B0943+10 (J0946+0951) manifests representatively, showing mode switch, orthogonal polarization and subpulse drifting. Both integrated and single pulses are studied with the Five-hundred-meter Aperture Spherical radio Telescope (FAST). The mode switch phenomenon of this pulsar is studied using an eigen-mode searching method, based on parameter estimation. A phase space evolution for the pulsar's mode switch shows a strange-attractor-like pattern. The radiative geometry is proposed by fitting polarization position angles with the rotating vector model. The pulsar pulse profile is then mapped to the sparking location on pulsar surface, and the differences between the main pulse's and the precursor component's radiative process may explain the X-ray's synchronization with radio mode switch. Detailed single pulse studies on B0943+10's orthogonally polarized radiation are presented, which may support for certain models of radiative transfer of polarized emission. B0943+10's B and Q modes evolve differently with frequency and with proportions of orthogonal modes, which indicates possible magnetospheric changes during mode switch. An extra component is found in B mode, and it shows distinct polarization and modulation properties compared with main part of B mode pulse component. For Q mode pulse profile, the precursor and the main pulse components are orthogonally polarized, showing that the precursor component radiated farther from the pulsar could be radiated in O-mode (X-mode) if the main pulse originates from low altitude in X-mode (O-mode). The findings could impact significantly on pulsar electrodynamics and the radiative mechanism related. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2312.12015-b31b1b.svg)](https://arxiv.org/abs/arXiv:2312.12015) | **ASASSN-18ap: A Dusty Tidal Disruption Event Candidate with an Early Bump  in the Light Curve or an extraodinary Type IIn supernovae**  |
|| Y. Wang, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2023-12-20*|
|*Comments*| *Submitted to ApJ on 2023-06-13 (Currently after the first revision)*|
|**Abstract**| We re-examined the classification of the optical transient ASASSN-18ap, which was initially identified as a supernova (SNe) upon its discovery. Based on newly emerged phenomena, such as a delayed luminous infrared outburst and the emergence of luminous coronal emission lines, we suggest that ASASSN-18ap is more likely a tidal disruption event (TDE) in a dusty environment, rather than a supernova. The total energy in the infrared outburst is $\rm 3.1\times10^{51}$ erg, which is an order of magnitude higher than the total energy in the optical-to-ultraviolet range, indicating a large dust extinction, an extra-EUV component, or anisotropic continuum emission. A bumpy feature appeared in the optical light curve at the start of brightening, which was reported in a couple of TDEs very recently. This early bump may have been overlooked in the past due to the lack of sufficient sampling of the light curves of most TDEs during their ascending phase, and it could provide insight into the origin of optical emission. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2312.12380-b31b1b.svg)](https://arxiv.org/abs/arXiv:2312.12380) | **The stellar mass function of quiescent galaxies in 2 < z < 2.5  protoclusters**  |
|| A. H. Edward, et al. -- incl., <mark>L. Xie</mark> |
|*Appeared on*| *2023-12-20*|
|*Comments*| *23 pages, 22 figures. Accepted for publication in MNRAS*|
|**Abstract**| We present an analysis of the galaxy stellar mass function (SMF) of 14 known protoclusters between $2.0 < z < 2.5$ in the COSMOS field, down to a mass limit of $10^{9.5}$ M$_{\odot}$. We use existing photometric redshifts with a statistical background subtraction, and consider star-forming and quiescent galaxies identified from $(NUV - r)$ and $(r - J)$ colours separately. Our fiducial sample includes galaxies within 1 Mpc of the cluster centres. The shape of the protocluster SMF of star-forming galaxies is indistinguishable from that of the general field at this redshift. Quiescent galaxies, however, show a flatter SMF than in the field, with an upturn at low mass, though this is only significant at $\sim 2\sigma$. There is no strong evidence for a dominant population of quiescent galaxies at any mass, with a fraction of $< 15\%$ at $1\sigma$ confidence for galaxies with log$M_{\ast}/M_{\odot} < 10.5$. We compare our results with a sample of galaxies groups at $1 < z < 1.5$, and demonstrate that a significant amount of environmental quenching must take place between these epochs, increasing the relative abundance of high-mass ($\rm M > 10^{10.5} M_{\odot}$) quiescent galaxies by a factor of $\gtrsim$ 2. However, we find that at lower masses ($\rm M < 10^{10.5} M_{\odot}$), no additional environmental quenching is required. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2312.12125.md
    + _build/html/tmp_2312.12125/./photocent.png
    + _build/html/tmp_2312.12125/./V2Poly.png
exported in  _build/html/2312.12207.md
    + _build/html/tmp_2312.12207/./uvj_comparison_w_inset.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

 and the image reconstructions from these works (bottom row). All images are colour scaled by a power of 0.6 to highlight faint structure. (*fig:polyimcomp*)

</div>
<div id="div_fig2">

<img src="tmp_2312.12125/./photocent.png" alt="Fig1" width="100%"/>

**Figure 1. -** The location of the brightest spot from the polychromatic model with wavelength, (0,0) is the model centre. (*fig:photocentre*)

</div>
<div id="div_fig3">

<img src="tmp_2312.12125/./V2Poly.png" alt="Fig2" width="100%"/>

**Figure 2. -** The V$^2$ of the polychromatic model. The line is the best polychromatic model by maximum likelihood evaluated at different PA. (*fig:polyv2*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2312.12125"></div>

 as a function of redshift.  Red stars (orange pluses) are our QG (PSB) candidates. The red open star is the close companion source to JADES 172799 (172799b; Section \ref{sec:rose}). Solid plus signs indicate our primary PSB sample, while open symbols are the secondary sample.  Only one PSB is a contaminant on the MS. White circles are galaxies in our parent sample not selected as QG or PSB. Purple x's are galaxies selected by the [ and Antwi-Danso (2023)]() NIRCam color selection (Section \ref{sec:ncselection}) that are not in our sample.  (*fig:ms*)

</div>
<div id="div_fig3">

<img src="tmp_2312.12207/./uvj_comparison_w_inset.png" alt="Fig7" width="100%"/>

**Figure 7. -** The rest-frame UVJ colors of log $\logM\geq8.5$ galaxies in the JADES MIRI parallel footprint at $z=3-4$(left) and $z=4-6$(right).  Closed symbols colored by stellar mass are colors derived from SED modeling including the F770W datapoint.  Open gray symbols are colors derived from fits excluding F770W. The connecting lines show where sources move in UVJ space when MIRI is added. The red open star is the close companion source to JADES 172799 (172799b; Section \ref{sec:rose}). The purple solid (dashed) shows the main (expanded) UVJ selection region for QGs from [ and Antwi-Danso (2023)](). The gray dotted line is the selection from B19, which extends past the standard $U-V$ boundary. The inset histogram shows that the color shifts are consistent within the measurement uncertainties (gray hatched region) for log $\logM=8.5-9.5$(blue), but show a small systematic shift redward at higher masses (orange). (*fig:uvj*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2312.12207"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

346  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

4  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

0  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
