# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# get list from MPIA website
# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`
mpia_authors = mpia.get_mpia_mitarbeiter_list()
normed_mpia_authors = [k[1] for k in mpia_authors]   # initials + fullname
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Li  ->  J. Li  |  ['J. Li']
A. d. Graaff  ->  A. D. Graaff  |  ['A. D. Graaff']
H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']


M. Fouesneau  ->  M. Fouesneau  |  ['M. Fouesneau']
P. Gaikwad  ->  P. Gaikwad  |  ['P. Gaikwad']
H. Beuther  ->  H. Beuther  |  ['H. Beuther']
Arxiv has 67 new papers today
          5 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [4]:
documents = []
failed = []
for paper in tqdm(candidates):
    paper_id = paper['identifier'].lower().replace('arxiv:', '')
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [mpia.get_initials(k) for k in doc.authors], 
                normed_mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/5 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2310.12208


extracting tarball to tmp_2310.12208...

 done.
Retrieving document from  https://arxiv.org/e-print/2310.12340


extracting tarball to tmp_2310.12340...

 done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


Found 143 bibliographic references in tmp_2310.12340/dr2_deepfield.bbl.
Retrieving document from  https://arxiv.org/e-print/2310.12528


extracting tarball to tmp_2310.12528... done.



  exec(code_obj, self.user_global_ns, self.user_ns)

  exec(code_obj, self.user_global_ns, self.user_ns)


M. Fouesneau  ->  M. Fouesneau  |  ['M. Fouesneau']


list index out of range


Retrieving document from  https://arxiv.org/e-print/2310.12720


extracting tarball to tmp_2310.12720...

 done.




✔ → 0:header
  ↳ 4548:\section{Introduction}
✔ → 4548:\section{Introduction}
  ↳ 9676:\section{Simulations \& Method}


✔ → 9676:\section{Simulations \& Method}
  ↳ 17196:\section{Results}


✔ → 17196:\section{Results}
  ↳ 28764:\section{Discussion}


✔ → 28764:\section{Discussion}
  ↳ 41143:\section{Conclusions}
✔ → 41143:\section{Conclusions}
  ↳ 44161:end


Found 52 bibliographic references in tmp_2310.12720/paper2_jcap.bbl.
Retrieving document from  https://arxiv.org/e-print/2310.12867


 item = \bibitem{Rauch_1998}M.~Rauch, \emph{The lyman alpha forest in the spectra of quasistellar objects},  \href{http://dx.doi.org/10.1146/annurev.astro.36.1.267}{\emph{Annual Review  of Astronomy and Astrophysics} {\bf 36} (1998) 267--316},  [\href{http://arxiv.org/abs/https://doi.org/10.1146/annurev.astro.36.1.267}{{\tt  https://doi.org/10.1146/annurev.astro.36.1.267}}].
 regex = 
        \\bibitem(\[[^\[\]]*?\]){(?P<bibkey>[a-zA-Z0-9\-\+\.\S]+?)}(?P<authors>|([\D]*?))(?P<year>[12][0-9]{3}).*?href(.*?{(?P<url>http[\S]*)})(?P<rest>.*)
        
 item = \bibitem{Weinberg_2003}D.~H. Weinberg, \emph{The lyman-$\alpha$ forest as a cosmological tool},  in  \emph{{AIP} Conference Proceedings}, {AIP}, 2003.\newblock \href{http://dx.doi.org/10.1063/1.1581786}{DOI}.
 regex = 
        \\bibitem(\[[^\[\]]*?\]){(?P<bibkey>[a-zA-Z0-9\-\+\.\S]+?)}(?P<authors>|([\D]*?))(?P<year>[12][0-9]{3}).*?href(.*?{(?P<url>http[\S]*)})(?P<rest>.*)
        
 item = \bibitem{Meiksin_2009}A.~A. Meiksin, \emph{The 

extracting tarball to tmp_2310.12867...

 done.


bad escape \l at position 10


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [5]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2310.12340-b31b1b.svg)](https://arxiv.org/abs/arXiv:2310.12340) | **The JADES Origins Field: A New JWST Deep Field in the JADES Second  NIRCam Data Release**  |
|| D. J. Eisenstein, et al. -- incl., <mark>A. d. Graaff</mark>, <mark>H.-W. Rix</mark> |
|*Appeared on*| *2023-10-20*|
|*Comments*| *Submitted to ApJ Supplement. Images and catalogs are available at this https URL . A FITSmap portal to view the images is at this https URL*|
|**Abstract**| We summarize the properties and initial data release of the JADES Origins Field (JOF), which will soon be the deepest imaging field yet observed with the James Webb Space Telescope (JWST). This field falls within the GOODS-S region about 8' south-west of the Hubble Ultra Deep Field (HUDF), where it was formed initially in Cycle 1 as a parallel field of HUDF spectroscopic observations within the JWST Advanced Deep Extragalactic Survey (JADES). This imaging will be greatly extended in Cycle 2 program 3215, which will observe the JOF for 5 days in six medium-band filters, seeking robust candidates for z>15 galaxies. This program will also include ultra-deep parallel NIRSpec spectroscopy (up to 104 hours on-source, summing over the dispersion modes) on the HUDF. Cycle 3 observations from program 4540 will add 20 hours of NIRCam slitless spectroscopy to the JOF. With these three campaigns, the JOF will be observed for 380 open-shutter hours with NIRCam using 15 imaging filters and 2 grism bandpasses. Further, parts of the JOF have deep 43 hr MIRI observations in F770W. Taken together, the JOF will soon be one of the most compelling deep fields available with JWST and a powerful window into the early Universe. This paper presents the second data release from JADES, featuring the imaging and catalogs from the year 1 JOF observations. |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2310.12720-b31b1b.svg)](https://arxiv.org/abs/arXiv:2310.12720) | **A modified lognormal approximation of the Lyman-$α$ forest:  comparison with full hydrodynamic simulations at $2\leq z\leq 2.7$**  |
|| B. Arya, T. R. Choudhury, A. Paranjape, <mark>P. Gaikwad</mark> |
|*Appeared on*| *2023-10-20*|
|*Comments*| *17 pages, 8 figures*|
|**Abstract**| Observations of the Lyman-$\alpha$ forest in distant quasar spectra with upcoming surveys are expected to provide significantly larger and higher-quality datasets. To interpret these datasets, it is imperative to develop efficient simulations. One such approach is based on the assumption that baryonic densities in the intergalactic medium (IGM) follow a lognormal distribution. We extend our earlier work to assess the robustness of the lognormal model of the Lyman-$\alpha$ forest in recovering the parameters characterizing IGM state, namely, the mean-density IGM temperature ($T_0$), the slope of the temperature-density relation ($\gamma$), and the hydrogen photoionization rate ($\Gamma_{12}$), by comparing with high-resolution Sherwood SPH simulations across the redshift range $2 \leq z \leq 2.7$. These parameters are estimated through a Markov Chain Monte Carlo technique, using the mean and power spectrum of the transmitted flux. We find that the usual lognormal distribution of IGM densities cannot recover the parameters of the SPH simulations. This limitation arises from the fact that the SPH baryonic density distribution cannot be described by a simple lognormal form. To address this, we extend the model by scaling the linear density contrast by a parameter $\nu$. While the resulting baryonic density is still lognormal, the additional parameter gives us extra freedom in setting the variance of density fluctuations. With this extension, values of $T_0$ and $\gamma$ implied in the SPH simulations are recovered at $\sim 1-\sigma$ ($\lesssim$ 10%) of the median (best-fit) values for most redshifts bins. However, this extended lognormal model cannot recover $\Gamma_{12}$ reliably, with the best-fit value discrepant by $\gtrsim 3-\sigma$ for $z > 2.2$. Despite this limitation in the recovery of $\Gamma_{12}$, we argue that the model remains useful for constraining cosmological parameters. |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2310.12208-b31b1b.svg)](https://arxiv.org/abs/arXiv:2310.12208) | **Dynamical Friction Models for Black-Hole Binary Formation in AGN Disks**  |
|| K. Qian, <mark>J. Li</mark>, D. Lai |
|*Appeared on*| *2023-10-20*|
|*Comments*| *17 pages, 13 figures, submitted to ApJ*|
|**Abstract**| Stellar-mass black holes (sBH) embedded in gaseous disks of active galactic nuclei (AGN) can be important sources of detectable gravitational radiation for LIGO/Virgo when they form binaries and coalesce due to orbital decay. In this paper, we study the effect of gas dynamical friction (DF) on the formation of BH binaries in AGN disks using $N$-body simulations. We employ two simplified models of DF, with the force on the BH depending on $\Delta {\bf v}$, the velocity of the sBH relative to the background Keplerian gas. We integrate the motion of two sBH initially on circular orbits around the central supermassive black hole (SMBH), and evaluate the probability of binary formation under various conditions. We find that both models of DF (with different dependence of the frictional coefficient on $|\Delta{\bf v}|$) can foster the formation of binaries when the effective friction timescale $\tau$ satisfies $\Omega_{\rm K}\tau\lesssim 20-30$ (where $\Omega_{\rm K}$ is the Keplerian frequency around the SMBH): prograde binaries are formed when the DF is stronger (smaller $\tau$), while retrograde binaries dominate when the DF is weaker (larger $\tau$). We determine the distribution of both prograde and retrograde binaries as a function of initial orbital separation and the DF strength. Using our models of DF, we show that for a given sBH number density in the AGN disk, the formation rate of sBH binaries increases with decreasing $\tau$ and can reach a moderate value with a sufficiently strong DF. |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2310.12867-b31b1b.svg)](https://arxiv.org/abs/arXiv:2310.12867) | **Far-Infrared Luminosity Bursts Trace Mass Accretion onto Protostars**  |
|| W. J. Fischer, et al. -- incl., <mark>H. Beuther</mark> |
|*Appeared on*| *2023-10-20*|
|*Comments*| *To be submitted to AAS Journals*|
|**Abstract**| Evidence abounds that young stellar objects undergo luminous bursts of intense accretion that are short compared to the time it takes to form a star. It remains unclear how much these events contribute to the main-sequence masses of the stars. We demonstrate the power of time-series far-infrared (far-IR) photometry to answer this question compared to similar observations at shorter and longer wavelengths. We start with model spectral energy distributions that have been fit to 86 Class 0 protostars in the Orion molecular clouds. The protostars sample a broad range of envelope densities, cavity geometries, and viewing angles. We then increase the luminosity of each model by factors of 10, 50, and 100 and assess how these luminosity increases manifest in the form of flux increases over wavelength ranges of interest. We find that the fractional change in the far-IR luminosity during a burst more closely traces the change in the accretion rate than photometric diagnostics at mid-infrared and submillimeter wavelengths. We also show that observations at far-IR and longer wavelengths reliably track accretion changes without confusion from large, variable circumstellar and interstellar extinction that plague studies at shorter wavelengths. We close by discussing the ability of a proposed far-IR surveyor for the 2030s to enable improvements in our understanding of the role of accretion bursts in mass assembly. |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error bad escape \l at position 10</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-arXiv:2310.12528-b31b1b.svg)](https://arxiv.org/abs/arXiv:2310.12528) | **Constructing Impactful Machine Learning Research for Astronomy: Best  Practices for Researchers and Reviewers**  |
|| D. Huppenkothen, et al. -- incl., <mark>M. Fouesneau</mark> |
|*Appeared on*| *2023-10-20*|
|*Comments*| *14 pages, 3 figures; submitted to the Bulletin of the American Astronomical Society*|
|**Abstract**| Machine learning has rapidly become a tool of choice for the astronomical community. It is being applied across a wide range of wavelengths and problems, from the classification of transients to neural network emulators of cosmological simulations, and is shifting paradigms about how we generate and report scientific results. At the same time, this class of method comes with its own set of best practices, challenges, and drawbacks, which, at present, are often reported on incompletely in the astrophysical literature. With this paper, we aim to provide a primer to the astronomical community, including authors, reviewers, and editors, on how to implement machine learning models and report their results in a way that ensures the accuracy of the results, reproducibility of the findings, and usefulness of the method. |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error list index out of range</p> |

## Export documents

We now write the .md files and export relevant images

In [6]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [7]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

exported in  _build/html/2310.12340.md
    + _build/html/tmp_2310.12340/./jades-redshift-frontier-composite-v3.png
    + _build/html/tmp_2310.12340/./real_pz_snr.png
    + _build/html/tmp_2310.12340/./z_F444W_3215.png
exported in  _build/html/2310.12720.md
    + _build/html/tmp_2310.12720/./figures_jcap/param_redshift_evol_mcmc.png
    + _build/html/tmp_2310.12720/./figures_jcap/xJ_nu_chi2_stat_redshift_KS15_fbar_gaikwad.png
    + _build/html/tmp_2310.12720/./figures_jcap/xJ_nu_chi2_redshift_KS15_pk_fbar_gaikwad.png


## Display the papers

Not necessary but allows for a quick check.

In [8]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\lya}{\ensuremath{{\rm Ly}\alpha}}$
$\newcommand{\kms}{\ensuremath{{\rm\;km\;s^{-1}}}}$
$\newcommand{\Mpc}{\ensuremath{{\rm\;Mpc}}}$
$\newcommand{\Myr}{\ensuremath{{\rm\;Myr}}}$
$\newcommand{\Msun}{\ensuremath{{\rm\;M_\odot}}}$
$\newcommand{\yr}{\ensuremath{{\rm\;yr}}}$
$\newcommand{\cm}{\ensuremath{{\rm\;cm}}}$
$\newcommand{\ergscms}{\ensuremath{{\rm\;ergs\;cm^{-2}\;s^{-1}}}}$
$\newcommand{\ergss}{\ensuremath{{\rm\;ergs\;s^{-1}}}}$
$\newcommand{\mic}{\ensuremath{\mu\rm m}}$
$\newcommand{\todo}[1]{{\color{blue} \tt #1}}$
$\newcommand{\tbc}[1]{#1 ({\color{red} \tt TBC})}$
$\newcommand{\tbd}{({\color{red} \tt TBD})}$
$\newcommand{\outline}[1]{{\color{black}\it #1}}$
$\newcommand{\BRC}[1]{{\color{red!55!black} BR: #1}}$
$\newcommand{\CWC}[1]{{\color{purple!55!black} CW: #1}}$
$\newcommand{\RM}[1]{{\color{green!25!black} RM: #1}}$
$\newcommand{\DJE}[1]{{\color{blue!25!black} DE: #1}}$
$\newcommand{\um}{\ensuremath{\mu{\rm m}}}$
$\newcommand{\nod}{---}$</div>



<div id="title">

# The JADES Origins Field: A New JWST Deep Field in the JADES Second NIRCam Data Release

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2310.12340-b31b1b.svg)](https://arxiv.org/abs/2310.12340)<mark>Appeared on: 2023-10-20</mark> -  _Submitted to ApJ Supplement. Images and catalogs are available at this https URL . A FITSmap portal to view the images is at this https URL_

</div>
<div id="authors">

D. J. Eisenstein, et al. -- incl., <mark>A. d. Graaff</mark>, <mark>H.-W. Rix</mark>

</div>
<div id="abstract">

**Abstract:** We summarize the properties and initial data release of the JADES Origins Field (JOF),which will soon be the deepest imaging field yet observed with the James Webb Space Telescope (JWST).This field falls within the GOODS-S region about 8' south-west of the Hubble Ultra Deep Field (HUDF), where it was formed initially in Cycle 1 as a parallel field of HUDF spectroscopic observations within the JWST AdvancedDeep Extragalactic Survey (JADES).This imaging will be greatly extended in Cycle 2 program 3215,which will observe the JOF for 5 days in six medium-bandfilters, seeking robust candidates for $z>15$ galaxies.This program will also include ultra-deep parallel NIRSpec spectroscopy (up to 104 hours on-source, summing over the dispersion modes) on the HUDF.Cycle 3 observations from program 4540 will add 20 hours of NIRCam slitless spectroscopy to the JOF.With these three campaigns, the JOF will be observed for 380 open-shutter hours with NIRCam using 15 imaging filters and 2 grism bandpasses.  Further, parts of the JOF have deep 43 hr MIRI observations in F770W.Taken together, the JOF will soon be one of the most compelling deep fields availablewith JWST and a powerful window into the early Universe.  This paper presents the second data release from JADES, featuring the imaging and catalogs from the year 1 JOF observations.

</div>

<div id="div_fig1">

<img src="tmp_2310.12340/./jades-redshift-frontier-composite-v3.png" alt="Fig1" width="100%"/>

**Figure 1. -** 
The layout of data sets in the GOODS-S field most immediate to this paper, showing the context of observations most germane to this deep field.  The grey-scale shows the F277W exposure map, as rendered from the Cycle 1 program 1180 \& 1210 APT files.  The parallel imaging in 1210 is the deepest portion; program 3215 is extending this with 6 medium-bands. The forthcoming JADES 1286 Observation 5 NIRCam footprint is also shown.
The footprints of the HUDF ACS field  ([Beckwith, Stiavelli and Koekemoer 2006]()) , deep MUSE spectroscopy  ([Bacon, Brinchmann and Conseil 2023]()) , FRESCO grism \citep[][program 1895;]{Oesch23}, JEMS medium-band
\citep[program 1963;][]{Williams2023}, and the
NGDEEP NIRISS field \citep[program 2079;][]{Bagley23} are shown, as these are immediately supportive of the target selection for the 3215 NIRSpec observations.  There are many other powerful data sets in this region, not shown for brevity! (*fig:layout*)

</div>
<div id="div_fig2">

<img src="tmp_2310.12340/./real_pz_snr.png" alt="Fig2" width="100%"/>

**Figure 2. -** 
We now take the high-redshift fit and shift it in redshift, holding F277W $S/N=7$, and fit with Prospector both for the case of only the Cycle 1 wide-bands and for the case with the Cycle 2 imaging.  We report the $\Delta\chi^2$ that a true high-$z$ galaxy would be mistaken at low-$z$.  The combined imaging allows the two solutions to be robustly separated, with $\Delta\chi^2>15$.  One also sees how the wide-bands alone fail to separate these cases, with the confusion increasing badly at $z>15$, where the dropout shifts from F150W to F200W.
 (*fig:mb2*)

</div>
<div id="div_fig3">

<img src="tmp_2310.12340/./z_F444W_3215.png" alt="Fig3" width="100%"/>

**Figure 3. -** 
F444W magnitude versus photometric redshift for the galaxies for which shutters were allocated for NIRSpec observations in program 3215. The color coding indicates the exposure time with the prism for each target.
 (*fig:ns_targets*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2310.12340"></div>

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\keywords}[1]$
$\newcommand{\p}{\ensuremath{\partial}}$
$\newcommand{\Msun}{\ensuremath{M_{\odot}}}$
$\newcommand{\Mh}{\ensuremath{h^{-1}M_{\odot}}}$
$\newcommand{\Mhsq}{\ensuremath{h^{-2}M_{\odot}}}$
$\newcommand{\Mpch}{\ensuremath{h^{-1}{\rm Mpc}}}$
$\newcommand{\kpch}{\ensuremath{h^{-1}{\rm kpc}}}$
$\newcommand{\avg}[1]{\ensuremath{\left\langle #1  \right\rangle}}$
$\newcommand{\e}[1]{\ensuremath{{\rm e}^{#1}}}$
$\newcommand{\der}{\ensuremath{{\rm d}}}$
$\newcommand{\Der}{\ensuremath{{\rm D}}}$
$\newcommand{\dir}{\ensuremath{\delta_{\rm D}}}$
$\newcommand{\erfc}[1]{\ensuremath{{\rm erfc}\left(#1\right)}}$
$\newcommand{\erf}[1]{\ensuremath{{\rm erf}\left(#1\right)}}$
$\newcommand{\eqn}[1]{equation~\eqref{#1}}$
$\newcommand{\eqns}[1]{equations~\eqref{#1}}$
$\newcommand{\ph}[1]{\phantom{#1}}$
$\newcommand{\be}{\begin{equation}}$
$\newcommand{\ee}{\end{equation}}$
$\newcommand{\Cal}[1]{\ensuremath{\mathcal{#1}}}$
$\newcommand{\AP}[1]{\emph{\color{blue}[AP: #1]}}$
$\newcommand{\TRC}[1]{{\color{Green}[{\bf TRC:} #1]}}$
$\newcommand{\BA}[1]{\emph{\color{red}[BA: #1]}}$
$\newcommand{\red}[1]{\textcolor{red}{#1}}$
$\newcommand{\PG}[1]{{\color{magenta}{\bf #1}}}$
$\title{\boldmath A modified lognormal approximation of the Lyman-\alpha forest: comparison with full hydrodynamic simulations at 2\leq z\leq 2.7}$
$\author[a,1]{B. Arya,\note{Corresponding author.}}$
$\author[b]{T. Roy Choudhury,}$
$\author[a]{A. Paranjape.}$
$\author[c]{and P. Gaikwad}$
$\affiliation[a]{Inter-University Centre for Astronomy \& Astrophysics,\Ganeshkhind, Post Bag 4, Pune 411007, India}$
$\affiliation[b]{National Centre for Radio Astrophysics, TIFR,\\Post Bag 3, Ganeshkhind, Pune 411007, India}$
$\affiliation[c]{Max-Planck-Institut für Astronomie,\Königstuhl 17, D-69117 Heidelberg, Germany}$
$\emailAdd{bharya@iucaa.in}$
$\emailAdd{tirth@ncra.tifr.res.in}$
$\emailAdd{aseem@iucaa.in}$
$\emailAdd{gaikwad@mpia-hd.mpg.de}$
$\abstract{Observations of the Lyman-\alpha forest in distant quasar spectra with upcoming surveys are expected to new significantly larger and higher-quality datasets. To interpret these datasets, it is imperative to develop efficient simulations. One such approach is based on the assumption that baryonic densities in the intergalactic medium (IGM) follow a lognormal distribution.$
$We extend our earlier work to assess the robustness of the lognormal model of the Lyman-\alpha forest in recovering the parameters characterizing IGM state, namely, the mean-density IGM temperature (T_0), the slope of the temperature-density relation (\gamma), and the hydrogen photoionization rate (\Gamma_{12}), by comparing with high-resolution Sherwood SPH simulations across the redshift range 2 \leq z \leq 2.7. These parameters are estimated through a Markov Chain Monte Carlo (MCMC) technique, using the mean and power spectrum of the transmitted flux.$
$We find that the usual lognormal distribution of IGM densities cannot recover the parameters of the SPH simulations. This limitation arises from the fact that the SPH baryonic density distribution cannot be described by a simple lognormal form. To address this, we extend the model by scaling the linear density contrast by a parameter \nu. While the resulting baryonic density is still lognormal, the additional parameter gives us extra freedom in setting the variance of density fluctuations. With this extension, values of T_0 and \gamma implied in the SPH simulations are recovered at \sim 1-\sigma (\lesssim 10\%) of the median (best-fit) values for most redshifts bins. However, this extended lognormal model cannot recover \Gamma_{12} reliably, with the best-fit value discrepant by \gtrsim 3-\sigma for z > 2.2. Despite this limitation in the recovery of \Gamma_{12}, whose origins we explain, we argue that the model remains useful for constraining cosmological parameters.}$
$\keywords{intergalactic media, Lyman-\alpha forest, power spectrum}$
$\begin{document}$
$\label{firstpage}$
$\maketitle$
$\flushbottom$
$\n\end{document}\end{equation}}}}}$
$\newcommand{\e}[1]{\ensuremath{{\rm e}^{#1}}}$
$\newcommand{\der}{\ensuremath{{\rm d}}}$
$\newcommand{\Der}{\ensuremath{{\rm D}}}$
$\newcommand{\dir}{\ensuremath{\delta_{\rm D}}}$
$\newcommand{\erfc}[1]{\ensuremath{{\rm erfc}\left(#1\right)}}$
$\newcommand{\erf}[1]{\ensuremath{{\rm erf}\left(#1\right)}}$
$\newcommand{\eqn}[1]{equation~\eqref{#1}}$
$\newcommand{\eqns}[1]{equations~\eqref{#1}}$
$\newcommand{\ph}[1]{\phantom{#1}}$
$\newcommand{\be}{\begin{equation}}$
$\newcommand{\ee}{\end{equation}}$
$\newcommand{\Cal}[1]{\ensuremath{\mathcal{#1}}}$
$\newcommand{\AP}[1]{\emph{\color{blue}[AP: #1]}}$
$\newcommand{\TRC}[1]{{\color{Green}[{\bf TRC:} #1]}}$
$\newcommand{\BA}[1]{\emph{\color{red}[BA: #1]}}$
$\newcommand{\red}[1]{\textcolor{red}{#1}}$
$\newcommand{\PG}[1]{{\color{magenta}{\bf #1}}}$
$\title{\boldmath A modified lognormal approximation of the Lyman-\alpha forest: comparison with full hydrodynamic simulations at 2\leq z\leq 2.7}$
$\author[a,1]{B. Arya,\note{Corresponding author.}}$
$\author[b]{T. Roy Choudhury,}$
$\author[a]{A. Paranjape.}$
$\author[c]{and P. Gaikwad}$
$\affiliation[a]{Inter-University Centre for Astronomy \& Astrophysics,\Ganeshkhind, Post Bag 4, Pune 411007, India}$
$\affiliation[b]{National Centre for Radio Astrophysics, TIFR,\\Post Bag 3, Ganeshkhind, Pune 411007, India}$
$\affiliation[c]{Max-Planck-Institut für Astronomie,\Königstuhl 17, D-69117 Heidelberg, Germany}$
$\emailAdd{bharya@iucaa.in}$
$\emailAdd{tirth@ncra.tifr.res.in}$
$\emailAdd{aseem@iucaa.in}$
$\emailAdd{gaikwad@mpia-hd.mpg.de}$
$\abstract{Observations of the Lyman-\alpha forest in distant quasar spectra with upcoming surveys are expected to new significantly larger and higher-quality datasets. To interpret these datasets, it is imperative to develop efficient simulations. One such approach is based on the assumption that baryonic densities in the intergalactic medium (IGM) follow a lognormal distribution.$
$We extend our earlier work to assess the robustness of the lognormal model of the Lyman-\alpha forest in recovering the parameters characterizing IGM state, namely, the mean-density IGM temperature (T_0), the slope of the temperature-density relation (\gamma), and the hydrogen photoionization rate (\Gamma_{12}), by comparing with high-resolution Sherwood SPH simulations across the redshift range 2 \leq z \leq 2.7. These parameters are estimated through a Markov Chain Monte Carlo (MCMC) technique, using the mean and power spectrum of the transmitted flux.$
$We find that the usual lognormal distribution of IGM densities cannot recover the parameters of the SPH simulations. This limitation arises from the fact that the SPH baryonic density distribution cannot be described by a simple lognormal form. To address this, we extend the model by scaling the linear density contrast by a parameter \nu. While the resulting baryonic density is still lognormal, the additional parameter gives us extra freedom in setting the variance of density fluctuations. With this extension, values of T_0 and \gamma implied in the SPH simulations are recovered at \sim 1-\sigma (\lesssim 10\%) of the median (best-fit) values for most redshifts bins. However, this extended lognormal model cannot recover \Gamma_{12} reliably, with the best-fit value discrepant by \gtrsim 3-\sigma for z > 2.2. Despite this limitation in the recovery of \Gamma_{12}, whose origins we explain, we argue that the model remains useful for constraining cosmological parameters.}$
$\keywords{intergalactic media, Lyman-\alpha forest, power spectrum}$
$\begin{document}$
$\label{firstpage}$
$\maketitle$
$\flushbottom$
$\n\end{document}\end{equation}}}}$
$\newcommand{\erf}[1]{\ensuremath{{\rm erf}\left(#1\right)}}$
$\newcommand{\eqn}[1]{equation~\eqref{#1}}$
$\newcommand{\eqns}[1]{equations~\eqref{#1}}$
$\newcommand{\ph}[1]{\phantom{#1}}$
$\newcommand{\be}{\begin{equation}}$
$\newcommand{\ee}{\end{equation}}$
$\newcommand{\Cal}[1]{\ensuremath{\mathcal{#1}}}$
$\newcommand{\AP}[1]{\emph{\color{blue}[AP: #1]}}$
$\newcommand{\TRC}[1]{{\color{Green}[{\bf TRC:} #1]}}$
$\newcommand{\BA}[1]{\emph{\color{red}[BA: #1]}}$
$\newcommand{\red}[1]{\textcolor{red}{#1}}$
$\newcommand{\PG}[1]{{\color{magenta}{\bf #1}}}$
$\title{\boldmath A modified lognormal approximation of the Lyman-\alpha forest: comparison with full hydrodynamic simulations at 2\leq z\leq 2.7}$
$\author[a,1]{B. Arya,\note{Corresponding author.}}$
$\author[b]{T. Roy Choudhury,}$
$\author[a]{A. Paranjape.}$
$\author[c]{and P. Gaikwad}$
$\affiliation[a]{Inter-University Centre for Astronomy \& Astrophysics,\Ganeshkhind, Post Bag 4, Pune 411007, India}$
$\affiliation[b]{National Centre for Radio Astrophysics, TIFR,\\Post Bag 3, Ganeshkhind, Pune 411007, India}$
$\affiliation[c]{Max-Planck-Institut für Astronomie,\Königstuhl 17, D-69117 Heidelberg, Germany}$
$\emailAdd{bharya@iucaa.in}$
$\emailAdd{tirth@ncra.tifr.res.in}$
$\emailAdd{aseem@iucaa.in}$
$\emailAdd{gaikwad@mpia-hd.mpg.de}$
$\abstract{Observations of the Lyman-\alpha forest in distant quasar spectra with upcoming surveys are expected to new significantly larger and higher-quality datasets. To interpret these datasets, it is imperative to develop efficient simulations. One such approach is based on the assumption that baryonic densities in the intergalactic medium (IGM) follow a lognormal distribution.$
$We extend our earlier work to assess the robustness of the lognormal model of the Lyman-\alpha forest in recovering the parameters characterizing IGM state, namely, the mean-density IGM temperature (T_0), the slope of the temperature-density relation (\gamma), and the hydrogen photoionization rate (\Gamma_{12}), by comparing with high-resolution Sherwood SPH simulations across the redshift range 2 \leq z \leq 2.7. These parameters are estimated through a Markov Chain Monte Carlo (MCMC) technique, using the mean and power spectrum of the transmitted flux.$
$We find that the usual lognormal distribution of IGM densities cannot recover the parameters of the SPH simulations. This limitation arises from the fact that the SPH baryonic density distribution cannot be described by a simple lognormal form. To address this, we extend the model by scaling the linear density contrast by a parameter \nu. While the resulting baryonic density is still lognormal, the additional parameter gives us extra freedom in setting the variance of density fluctuations. With this extension, values of T_0 and \gamma implied in the SPH simulations are recovered at \sim 1-\sigma (\lesssim 10\%) of the median (best-fit) values for most redshifts bins. However, this extended lognormal model cannot recover \Gamma_{12} reliably, with the best-fit value discrepant by \gtrsim 3-\sigma for z > 2.2. Despite this limitation in the recovery of \Gamma_{12}, whose origins we explain, we argue that the model remains useful for constraining cosmological parameters.}$
$\keywords{intergalactic media, Lyman-\alpha forest, power spectrum}$
$\begin{document}$
$\label{firstpage}$
$\maketitle$
$\flushbottom$
$\n\end{document}\end{equation}}}$
$\newcommand{\eqn}[1]{equation~\eqref{#1}}$
$\newcommand{\eqns}[1]{equations~\eqref{#1}}$
$\newcommand{\ph}[1]{\phantom{#1}}$
$\newcommand{\be}{\begin{equation}}$
$\newcommand{\ee}{\end{equation}}$
$\newcommand{\Cal}[1]{\ensuremath{\mathcal{#1}}}$
$\newcommand{\AP}[1]{\emph{\color{blue}[AP: #1]}}$
$\newcommand{\TRC}[1]{{\color{Green}[{\bf TRC:} #1]}}$
$\newcommand{\BA}[1]{\emph{\color{red}[BA: #1]}}$
$\newcommand{\red}[1]{\textcolor{red}{#1}}$
$\newcommand{\PG}[1]{{\color{magenta}{\bf #1}}}$
$\title{\boldmath A modified lognormal approximation of the Lyman-\alpha forest: comparison with full hydrodynamic simulations at 2\leq z\leq 2.7}$
$\author[a,1]{B. Arya,\note{Corresponding author.}}$
$\author[b]{T. Roy Choudhury,}$
$\author[a]{A. Paranjape.}$
$\author[c]{and P. Gaikwad}$
$\affiliation[a]{Inter-University Centre for Astronomy \& Astrophysics,\Ganeshkhind, Post Bag 4, Pune 411007, India}$
$\affiliation[b]{National Centre for Radio Astrophysics, TIFR,\\Post Bag 3, Ganeshkhind, Pune 411007, India}$
$\affiliation[c]{Max-Planck-Institut für Astronomie,\Königstuhl 17, D-69117 Heidelberg, Germany}$
$\emailAdd{bharya@iucaa.in}$
$\emailAdd{tirth@ncra.tifr.res.in}$
$\emailAdd{aseem@iucaa.in}$
$\emailAdd{gaikwad@mpia-hd.mpg.de}$
$\abstract{Observations of the Lyman-\alpha forest in distant quasar spectra with upcoming surveys are expected to new significantly larger and higher-quality datasets. To interpret these datasets, it is imperative to develop efficient simulations. One such approach is based on the assumption that baryonic densities in the intergalactic medium (IGM) follow a lognormal distribution.$
$We extend our earlier work to assess the robustness of the lognormal model of the Lyman-\alpha forest in recovering the parameters characterizing IGM state, namely, the mean-density IGM temperature (T_0), the slope of the temperature-density relation (\gamma), and the hydrogen photoionization rate (\Gamma_{12}), by comparing with high-resolution Sherwood SPH simulations across the redshift range 2 \leq z \leq 2.7. These parameters are estimated through a Markov Chain Monte Carlo (MCMC) technique, using the mean and power spectrum of the transmitted flux.$
$We find that the usual lognormal distribution of IGM densities cannot recover the parameters of the SPH simulations. This limitation arises from the fact that the SPH baryonic density distribution cannot be described by a simple lognormal form. To address this, we extend the model by scaling the linear density contrast by a parameter \nu. While the resulting baryonic density is still lognormal, the additional parameter gives us extra freedom in setting the variance of density fluctuations. With this extension, values of T_0 and \gamma implied in the SPH simulations are recovered at \sim 1-\sigma (\lesssim 10\%) of the median (best-fit) values for most redshifts bins. However, this extended lognormal model cannot recover \Gamma_{12} reliably, with the best-fit value discrepant by \gtrsim 3-\sigma for z > 2.2. Despite this limitation in the recovery of \Gamma_{12}, whose origins we explain, we argue that the model remains useful for constraining cosmological parameters.}$
$\keywords{intergalactic media, Lyman-\alpha forest, power spectrum}$
$\begin{document}$
$\label{firstpage}$
$\maketitle$
$\flushbottom$
$\n\end{document}\end{equation}}$
$\newcommand{\ee}{\end{equation}}$
$\newcommand{\Cal}[1]{\ensuremath{\mathcal{#1}}}$
$\newcommand{\AP}[1]{\emph{\color{blue}[AP: #1]}}$
$\newcommand{\TRC}[1]{{\color{Green}[{\bf TRC:} #1]}}$
$\newcommand{\BA}[1]{\emph{\color{red}[BA: #1]}}$
$\newcommand{\red}[1]{\textcolor{red}{#1}}$
$\newcommand{\PG}[1]{{\color{magenta}{\bf #1}}}$</div>



<div id="title">

# $\boldmath$ A modified lognormal approximation of the Lyman-$\alpha$ forest: comparison with full hydrodynamic simulations at $2\leq z\leq 2.7$

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2310.12720-b31b1b.svg)](https://arxiv.org/abs/2310.12720)<mark>Appeared on: 2023-10-20</mark> -  _17 pages, 8 figures_

</div>
<div id="authors">

B. A. author.}, T. R. Choudhury,, A. Paranjape., a. P. Gaikwad

</div>
<div id="abstract">

**Abstract:** Observations of the Lyman- $\alpha$ forest in distant quasar spectra with upcoming surveys are expected to provide significantly larger and higher-quality datasets. To interpret these datasets, it is imperative to develop efficient simulations. One such approach is based on the assumption that baryonic densities in the intergalactic medium (IGM) follow a lognormal distribution.We extend our earlier work to assess the robustness of the lognormal model of the Lyman- $\alpha$ forest in recovering the parameters characterizing IGM state, namely, the mean-density IGM temperature ( $T_0$ ), the slope of the temperature-density relation ( $\gamma$ ), and the hydrogen photoionization rate ( $\Gamma_{12}$ ), by comparing with high-resolution Sherwood SPH simulations across the redshift range $2 \leq z \leq 2.7$ . These parameters are estimated through a Markov Chain Monte Carlo (MCMC) technique, using the mean and power spectrum of the transmitted flux.We find that the usual lognormal distribution of IGM densities cannot recover the parameters of the SPH simulations. This limitation arises from the fact that the SPH baryonic density distribution cannot be described by a simple lognormal form. To address this, we extend the model by scaling the linear density contrast by a parameter $\nu$ . While the resulting baryonic density is still lognormal, the additional parameter gives us extra freedom in setting the variance of density fluctuations. With this extension, values of $T_0$ and $\gamma$ implied in the SPH simulations are recovered at $\sim 1-\sigma$ ( $\lesssim$ 10 \% ) of the median (best-fit) values for most redshifts bins. However, this extended lognormal model cannot recover $\Gamma_{12}$ reliably, with the best-fit value discrepant by $\gtrsim 3-\sigma$ for $z > 2.2$ . Despite this limitation in the recovery of $\Gamma_{12}$ , whose origins we explain, we argue that the model remains useful for constraining cosmological parameters.

</div>

<div id="div_fig1">

<img src="tmp_2310.12720/./figures_jcap/param_redshift_evol_mcmc.png" alt="Fig7" width="100%"/>

**Figure 7. -** Redshift evolution of parameters and reduced $\chi^2$ shown with black circles. Gray shaded regions 16 and 84 percentiles from MCMC chains. Red triangles are true values of parameters in SPH. (*fig:param_evol*)

</div>
<div id="div_fig2">

<img src="tmp_2310.12720/./figures_jcap/xJ_nu_chi2_stat_redshift_KS15_fbar_gaikwad.png" alt="Fig2" width="100%"/>

**Figure 2. -** Flux statistics for SPH data and best-fit parameters obtained from 2D $\chi^2$ analysis. Solid curves are best-fit lognormal and dashed curves are SPH. (*fig:2d_chi2_stat*)

</div>
<div id="div_fig3">

<img src="tmp_2310.12720/./figures_jcap/xJ_nu_chi2_redshift_KS15_pk_fbar_gaikwad.png" alt="Fig1" width="100%"/>

**Figure 1. -** $\chi^2$ colormap on log $x_{\textrm{J}}$ - $\nu$ grid with \{$T_0, \gamma, \Gamma_{12}$\} fixed to their true values for all 8 redshift bins. We get acceptable fits for $z \leq 2.5$. Black contours show 1 and 2-$\sigma$ confidence levels and gold stars show position of best-fit \{$x_{\mathrm{J}}$, $\nu$\}. (*fig:2d_chi2*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2310.12720"></div>

# Create HTML index

In [9]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

304  publications files modified in the last 7 days.


In [10]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

6  publications in the last 7 days.


In [11]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [12]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [13]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

2  publications in the last day.


In [14]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
