# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Wolf', 'Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

P. Gaikwad  ->  P. Gaikwad  |  ['P. Gaikwad']
F. Nasir  ->  F. Nasir  |  ['F. Nasir']
E. Bañados  ->  E. Bañados  |  ['E. Bañados']
P. Gaikwad  ->  P. Gaikwad  |  ['P. Gaikwad']
F. Walter  ->  F. Walter  |  ['F. Walter']
J. Mueller  ->  J. Mueller-Horn  |  ['J. Mueller']
J. Liu  ->  J. Liu  |  ['J. Liu']
F. Walter  ->  F. Walter  |  ['F. Walter']


K. Schwarz  ->  K. Schwarz  |  ['K. Schwarz']
Arxiv has 66 new papers today
          6 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/6 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2405.12273


extracting tarball to tmp_2405.12273... done.


P. Gaikwad  ->  P. Gaikwad  |  ['P. Gaikwad']


Found 31 bibliographic references in tmp_2405.12273/aanda.bbl.
Retrieving document from  https://arxiv.org/e-print/2405.12275


extracting tarball to tmp_2405.12275... done.


Found 75 bibliographic references in tmp_2405.12275/output.bbl.
Retrieving document from  https://arxiv.org/e-print/2405.12281
extracting tarball to tmp_2405.12281... done.
Retrieving document from  https://arxiv.org/e-print/2405.12302


extracting tarball to tmp_2405.12302... done.
Retrieving document from  https://arxiv.org/e-print/2405.12529


extracting tarball to tmp_2405.12529... done.
Retrieving document from  https://arxiv.org/e-print/2405.12829


not a gzip file


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.12273-b31b1b.svg)](https://arxiv.org/abs/2405.12273) | **Damping wings in the Lyman-{\alpha} forest: a model-independent measurement of the neutral fraction at 5.4<z<6.1**  |
|| B. Spina, et al. -- incl., <mark>P. Gaikwad</mark> |
|*Appeared on*| *2024-05-22*|
|*Comments*| *7 pages, 4 figures, submitted to A&A Letters*|
|**Abstract**|            Recent observations have positioned the endpoint of the Epoch of Reionisation (EoR) at redshift $z \sim 5.3$. However, observations of the Lyman-$\alpha$ forest have not yet been able to discern whether reionisation occurred slowly and late, with substantial neutral hydrogen persisting at redshift $\sim 6$, or rapidly and earlier, with the apparent late end driven by the fluctuating UV background. Gunn-Peterson (GP) absorption troughs are solid indicators that reionisation is not complete until $z=5.3$, but whether they contain significantly neutral gas has not yet been proven. We aim to answer this question by directly measuring, for the first time, the neutral hydrogen fraction ($x_\mathrm{HI}$) at the end of the EoR ($5 \lesssim z \lesssim 6$) in high-redshift quasars spectra. For high neutral fractions $x_\mathrm{HI}\gtrsim0.1$, GP troughs exhibit damping wing (DW) absorption extending over $1000$ km s$^{-1}$ beyond the troughs. While conclusively detected in Lyman-$\alpha$ emission lines of quasars at $z\geq7$, DWs are challenging to observe in the general Lyman-$\alpha$ forest due to absorption complexities and small-scale stochastic transmission features. We report the first successful identification of the stochastic DW signal adjacent to GP troughs at redshifts $z=5.6$ through careful stacking of the dark gaps in Lyman-$\alpha$ forest. We use the signal to present a measurement of the corresponding global $x_\mathrm{HI}=0.19\pm0.07$ $(_{-0.16}^{+0.11})$ at $1\sigma$ $(2\sigma)$ at $z=5.6$ and a limit $x_\mathrm{HI}<0.44$ at $z=5.9$. The detection of this signal demonstrates the existence of substantially neutral islands near the conclusion of the EoR, unequivocally signaling a late-and-slow reionization scenario.         |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.12275-b31b1b.svg)](https://arxiv.org/abs/2405.12275) | **Damping Wing-Like Features in the Stacked Ly$\alpha$ Forest: Potential Neutral Hydrogen Islands at $z<6$**  |
|| Y. Zhu, et al. -- incl., <mark>F. Nasir</mark>, <mark>E. Bañados</mark>, <mark>P. Gaikwad</mark>, <mark>F. Walter</mark> |
|*Appeared on*| *2024-05-22*|
|*Comments*| *8 pages, 5 figures, 1 table; Submitted to MNRAS Letters*|
|**Abstract**|            Recent quasar absorption line observations suggest that reionization may end as late as $z \approx 5.3$. As a means to search for large neutral hydrogen islands at $z<6$, we revisit long dark gaps in the Ly$\beta$ forest in VLT/X-Shooter and Keck/ESI quasar spectra. We stack the Ly$\alpha$ forest corresponding to the edges of these Ly$\beta$ dark gaps and identify a damping wing-like extended absorption profile. The average redshift of the stacked forest is $z=5.8$. By comparing these observations with reionization simulations, we infer that such a damping wing-like feature can be naturally explained if these gaps are at least partially created by neutral islands. Conversely, simulated dark gaps lacking neutral hydrogen struggle to replicate the observed damping wing features. Furthermore, this damping wing-like profile implies that the volume-averaged neutral hydrogen fraction must be $\langle x_{\rm HI} \rangle \geq 6.1 \pm 3.9\%$ at $z = 5.8$. Our results offer robust evidence that reionization extends below $z=6$.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.12281-b31b1b.svg)](https://arxiv.org/abs/2405.12281) | **Detection of a 2.85 micrometer Feature on 5 Spinel-rich Asteroids from JWST**  |
|| J. G. Barrientos, et al. -- incl., <mark>J. Mueller</mark> |
|*Appeared on*| *2024-05-22*|
|*Comments*| *11 pages, 5 figures, and 2 tables. Published in ApjL*|
|**Abstract**|            Ground-based observations of `Barbarian' L-type asteroids at 1 to 2.5-$\mu$m indicate that their near-infrared spectra are dominated by the mineral spinel, which has been attributed to a high abundance of calcium-aluminum inclusions (CAIs) -- the first solids to condense out of the protoplanetary disk during the formation of the Solar System. However, the spectral properties of these asteroids from 2.5 to 5-$\mu$m, a wavelength region that covers signatures of hydrated minerals, water, and organics, have not yet been explored. Here, we present 2 to 5-$\mu$m reflectance spectra of five spinel-rich asteroids obtained with the NIRSpec instrument on the James Webb Space Telescope. All five targets exhibit a $\sim$ 2.85-$\mu$m absorption feature with a band depth of 3-6$\%$ that appears correlated in strength with that of the 2-$\mu$m spinel absorption feature. The shape and position of the 2.85-$\mu$m feature are not a good match to the 2.7-$\mu$m feature commonly seen in carbonaceous CM meteorites or C-type asteroids. The closest spectral matches are to the Moon and Vesta, suggesting commonalities in aqueous alteration across silicate bodies, infall of hydrated material, and/or space weathering by solar wind H implantation. Lab spectra of CO/CV chondrites, CAIs, as well as the minerals cronstedtite and spinel, also show a similar feature, providing clues into the origin of the 2.85-$\mu$m feature.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.12302-b31b1b.svg)](https://arxiv.org/abs/2405.12302) | **The Significance of Void Shape: Neutrino Mass from Voronoi Void-Halos?**  |
|| A. E. Bayer, <mark>J. Liu</mark>, C. D. Kreisch, A. Pisani |
|*Appeared on*| *2024-05-22*|
|*Comments*| *7 pages, 5 figures*|
|**Abstract**|            Massive neutrinos suppress the growth of cosmic structure on nonlinear scales, motivating the use of information beyond the power spectrum to tighten constraints on the neutrino mass, for example by considering cosmic voids. It was recently proposed that constraints on neutrino mass from the halo mass function (HMF) can be improved by considering only the halos that reside within voids -- the void-halo mass function (VHMF). We extend this analysis, which made spherical assumptions about the shape of voids, to take into account the non-spherical nature of voids as defined by the Voronoi-tessellation-based void finder, VIDE. In turn, after accounting for one spurious non-spherical void, we find no evidence that the VHMF contains information beyond the HMF. Given this finding, we then introduce a novel summary statistic by splitting halos according to the emptiness of their individual environments, defined by the Voronoi cell volume each halo resides in, and combining the mass functions from each split. We name the corresponding statistic the VorHMF and find that it could provide information regarding neutrino mass beyond the HMF. Our work thus motivates the importance of accounting for the full shape of voids in future analyses, both in terms of removing outliers to achieve robust results and as an additional source of cosmological information.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.12529-b31b1b.svg)](https://arxiv.org/abs/2405.12529) | **BSN: First Photometric Light Curve Analysis of Two W-type Contact Binary Systems OP Boo and V0511 Cam**  |
|| A. Poro, et al. -- incl., <mark>F. Walter</mark> |
|*Appeared on*| *2024-05-22*|
|*Comments*| *5 Tables, 4 Figures, Astrophysics Journal*|
|**Abstract**|            This study presented the first light curve analysis of the OP Boo and V0511 Cam binary stars, which was conducted in the frame of the Binary Systems of South and North (BSN) Project. Photometric ground-based observations were conducted with standard filters at two observatories in the Czech Republic. We computed a new ephemeris for each of the systems using our extracted times of minima, TESS data, and additional literature. Linear fits for O-C diagrams of both systems were considered using the Markov Chain Monte Carlo (MCMC) method. The light curves were analyzed using the Wilson-Devinney (WD) binary code combined with the Monte Carlo (MC) simulation. The light curve solutions of both target systems required a cold starspot. The absolute parameters of the systems were calculated by using a P-M parameter relationship. The positions of the systems were also depicted on the Hertzsprung-Russell (HR), P-L, logMtot-logJ0, and T-M diagrams. The second component in both systems is determined to be a more massive and hotter star. Therefore, it can be concluded that both systems are W-type contact binary systems.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2405.12829-b31b1b.svg)](https://arxiv.org/abs/2405.12829) | **Single Aperture Large Telescope for Universe Studies (SALTUS): Science Overview**  |
|| G. Chin, et al. -- incl., <mark>K. Schwarz</mark> |
|*Appeared on*| *2024-05-22*|
|*Comments*| *49 pages, 10 figures, 3 tables, submitted to SPIE JATIS*|
|**Abstract**|            The SALTUS Probe mission will provide a powerful far-infrared (far-IR) pointed space observatory to explore our cosmic origins and the possibility of life elsewhere. The observatory employs an innovative deployable 14-m aperture, with a sunshield that will radiatively cool the off-axis primary to <45K. This cooled primary reflector works in tandem with cryogenic coherent and incoherent instruments that span the 34 to 660 micron far-IR range at both high and moderate spectral resolutions.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error not a gzip file</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2405.12273.md
    + _build/html/tmp_2405.12273/./plot2.png
    + _build/html/tmp_2405.12273/./plot1.png
    + _build/html/tmp_2405.12273/./fig3_2p_new.png
exported in  _build/html/2405.12275.md


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\PG}[1]{{\color{red} \bf  #1}}$
$\newcommand{\orcidauthorA}{0000-0003-1634-1283}$
$\newcommand{\orcidauthorB}{0000-0001-8582-7012}$
$\newcommand{\orcidauthorC}{0000-0003-0821-3644}$
$\newcommand{\orcidauthorD}{0000-0002-2423-7905}$
$\newcommand{\orcidauthorE}{0000-0003-3307-7525}$</div>



<div id="title">

# Damping wings in the Lyman-$\alpha$ forest

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2405.12273-b31b1b.svg)](https://arxiv.org/abs/2405.12273)<mark>Appeared on: 2024-05-22</mark> -  _7 pages, 4 figures, submitted to A&A Letters_

</div>
<div id="authors">

B. Spina, et al. -- incl., <mark>P. Gaikwad</mark>

</div>
<div id="abstract">

**Abstract:** Recent observations have positioned the endpoint of the Epoch of Reionisation (EoR) at redshift $z \sim 5.3$ . However, observations of the Lyman- $\alpha$ forest have not yet been able to discern whether reionisation occurred slowly and late, with substantial neutral hydrogen    persisting at redshift $\sim 6$ , or rapidly and earlier, with the apparent late end driven by the fluctuating UV background.    Gunn-Peterson (GP) absorption troughs    are solid indicators that reionisation is not complete until $z=5.3$ , but whether they contain significantly neutral gas has not yet been proven. We aim to answer this question by directly measuring, for the first time, the neutral hydrogen fraction ( $x_\mathrm{HI}$ ) at the end of the EoR ( $5 \lesssim z \lesssim 6$ ) in high-redshift quasars spectra. For high neutral fractions $x_\mathrm{HI}\gtrsim0.1$ , GP troughs exhibit damping wing (DW) absorption extending over $1000$ km s $^{-1}$ beyond the troughs. While conclusively detected in Lyman- $\alpha$ emission lines of quasars at $z\geq7$ , DWs are challenging to observe in the general Lyman- $\alpha$ forest due to absorption complexities and small-scale stochastic transmission features. We report the first successful identification of the stochastic DW signal adjacent to GP troughs at redshifts $z=5.6$ through careful stacking of the dark gaps in Lyman- $\alpha$ forest. We use the signal to present a measurement of the corresponding global $x_\mathrm{HI}=0.19\pm0.07$ $(_{-0.16}^{+0.11})$ at $1\sigma$ $(2\sigma)$ at $z=5.6$ and a limit $x_\mathrm{HI}<0.44$ at $z=5.9$ . The detection of this signal demonstrates the existence of substantially neutral islands near the conclusion of the EoR, unequivocally signaling a late-and-slow reionization scenario.

</div>

<div id="div_fig1">

<img src="tmp_2405.12273/./plot2.png" alt="Fig2" width="100%"/>

**Figure 2. -** Stacked spectrum around long and short gaps for two redshift bins (top: $z=5.6$, bottom: $z=5.9$). In each panel, the red and blue curves depict the stacked profiles of long and short gaps, respectively, with their widths being the uncertainties from bootstrap resampling. The continuous light-blue line and the dotted orange line indicate the best-fit transmission model fit to the stack around long gaps for the step-function model and the piece-wise model, respectively.  Long gaps are defined as those with velocity widths greater than $340$ km s$^{-1}$, while short gaps have lengths under $200$ km s$^{-1}$. The curves in the bottom panel show the models which are permitted at at $2\sigma$ level. (*plot2*)

</div>
<div id="div_fig2">

<img src="tmp_2405.12273/./plot1.png" alt="Fig1" width="100%"/>

**Figure 1. -** Example of the gap-finding procedure for the sight-line towards quasar ATLASJ029.9915-36.5658. The Ly$\alpha$(bottom) and Ly$\beta$(top) spectra are depicted along with identified gaps (colored bands). Contamination from metal systems are also displayed (blue vertical lines) and the corresponding gaps removed (gray-strip bands). In this example, a high-ionisation metal system potentially affecting the Ly$\alpha$ forest is identified, leading to the removal of $3$ joint gaps. (*plot1*)

</div>
<div id="div_fig3">

<img src="tmp_2405.12273/./fig3_2p_new.png" alt="Fig3" width="100%"/>

**Figure 3. -** Constraints on the neutral fraction across cosmic time. The coloured solid and dashed boxes indicate our $1\sigma$ and $2\sigma$ constraints, respectively. Right: the constraints obtained in this work, in the two redshift bins and with the two $x_\mathrm{HI}$-$L$ relations proposed. Left: existing constraints from the literature: [McGreer and Mesinger (2015)](), [Greig, et. al (2017)](), [Greig, Mesinger and Bañados (2019)](), [Davies, Hennawi and Bañados (2018)](), [Mason, Treu and Dijkstra (2018)](), [Mason, Fontana and Treu (2019)](),   [Wang, Davies and Yang (2020)](), [Yang, Wang and Fan (2020)](), [Yang, Wang and Fan (2020)](),  [Zhu, Becker and Bosman (2022)](), [Bosman, Davies and Becker (2022)](), [Jin, Yang and Fan (2023)](), [Zhu, Becker and Bosman (2024)](). Some of the literature points have been slightly shifted in redshift, and the constraints of [Greig, Mesinger and Bañados (2024)]() have been combined for improved clarity. (*plot3*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2405.12273"></div>

. Here, we include *all* dark gaps regardless of their individual neutral hydrogen fraction. We display the shape of the mock stacked profiles from $z=5.4$ and 6.2 snapshots just for illustration purpose because their mean forest transmission and dark gap length distribution are significantly different from those at $z=5.8$, and different flux thresholds for dark gap detection are used at these redshifts.
    **(b)**$v_{\rm ext}$ distribution from the patchy reionization simulation at $z=5.8$. The observed damping wing-like profile is consistent with the model prediction that has $\langle x_{\rm HI} \rangle = 7.4\%$.
     (*fig:cmp2*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2405.12275"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

77  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

5  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

4  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
