# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

Y. Wang  ->  Y. Wang  |  ['Y. Wang']
J. Liu  ->  J. Liu  |  ['J. Liu']
N. Wang  ->  N. Wang  |  ['N. Wang']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
S. Kraus  ->  S. Kraus  |  ['S. Kraus']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
J. Liu  ->  J. Liu  |  ['J. Liu']
J. Liu  ->  J. Liu  |  ['J. Liu']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']


X. Zhang  ->  X. Zhang  |  ['X. Zhang']
J. Li  ->  J. Li  |  ['J. Li']
J. Liu  ->  J. Liu  |  ['J. Liu']
K. Jahnke  ->  K. Jahnke  |  ['K. Jahnke']
M. Schirmer  ->  M. Schirmer  |  ['M. Schirmer']
Y. Wang  ->  Y. Wang  |  ['Y. Wang']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']
X. Zhang  ->  X. Zhang  |  ['X. Zhang']


Arxiv has 82 new papers today
          8 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/8 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2506.08101
Retrieving document from  https://arxiv.org/e-print/2506.08105


HTTP Error 404: Not Found


HTTP Error 404: Not Found


Retrieving document from  https://arxiv.org/e-print/2506.08241


HTTP Error 404: Not Found


Retrieving document from  https://arxiv.org/e-print/2506.08367


HTTP Error 404: Not Found
HTTP Error 404: Not Found


Retrieving document from  https://arxiv.org/e-print/2506.08369
Retrieving document from  https://arxiv.org/e-print/2506.08378


HTTP Error 404: Not Found
HTTP Error 404: Not Found


Retrieving document from  https://arxiv.org/e-print/2506.08389
Retrieving document from  https://arxiv.org/e-print/2506.08863


HTTP Error 404: Not Found


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.08101-b31b1b.svg)](https://arxiv.org/abs/2506.08101) | **The enhanced X-ray Timing and Polarimetry mission -- eXTP for launch in 2030**  |
|| S.-N. Zhang, et al. -- incl., <mark>Y. Wang</mark>, <mark>J. Liu</mark>, <mark>N. Wang</mark> |
|*Appeared on*| *2025-06-11*|
|*Comments*| *submitted to the SCIENCE CHINA Physics, Mechanics & Astronomy. arXiv admin note: text overlap with arXiv:1812.04020*|
|**Abstract**|            In this paper we present the current status of the enhanced X-ray Timing and Polarimetry mission, which has been fully approved for launch in 2030. eXTP is a space science mission designed to study fundamental physics under extreme conditions of matter density, gravity, and magnetism. The mission aims at determining the equation of state of matter at supra-nuclear density, measuring effects of QED, and understanding the dynamics of matter in strong-field gravity. In addition to investigating fundamental physics, the eXTP mission is poised to become a leading observatory for time-domain and multi-messenger astronomy in the 2030s, as well as providing observations of unprecedented quality on a variety of galactic and extragalactic objects. After briefly introducing the history and a summary of the scientific objectives of the eXTP mission, this paper presents a comprehensive overview of: 1) the cutting-edge technology, technical specifications, and anticipated performance of the mission's scientific instruments; 2) the full mission profile, encompassing spacecraft design, operational capabilities, and ground segment infrastructure.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error HTTP Error 404: Not Found</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.08105-b31b1b.svg)](https://arxiv.org/abs/2506.08105) | **Probing the Strong Gravity Region of Black Holes with eXTP**  |
|| Q. Bu, et al. -- incl., <mark>Y. Wang</mark> |
|*Appeared on*| *2025-06-11*|
|*Comments*| *submitted to the SCIENCE CHINA Physics, Mechanics & Astronomy*|
|**Abstract**|            We present the novel capabilities of the enhanced X-ray Timing and Polarimetry (eXTP) mission to study the strong gravity region around stellar-mass black holes in X-ray binary systems and supermassive black holes in active galactic nuclei. eXTP can combine X-ray spectral, timing, and polarimetric techniques to study the accretion process near black holes, measure black hole masses and spins, and test Einstein's theory of General Relativity in the strong field regime. We show how eXTP can improve the current measurements of black holes of existing X-ray missions and we discuss the scientific questions that can be addressed.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error HTTP Error 404: Not Found</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.08241-b31b1b.svg)](https://arxiv.org/abs/2506.08241) | **Precise Age For The Binary HD 21278 In The Young Alpha Persei Cluster**  |
|| C. A. Danner, et al. -- incl., <mark>S. Kraus</mark> |
|*Appeared on*| *2025-06-11*|
|*Comments*| *31 pages, 19 figures, Accepted for publication in the Astrophysical Journal*|
|**Abstract**|            We present a study of the double-lined spectroscopic binary HD 21278 that contains one of the brightest main sequence stars in the young $\alpha$ Persei open cluster. We analyzed new spectra and reanalyzed archived spectra to measure precise new radial velocity curves for the binary. We also obtained interferometric data using the CHARA Array at Mount Wilson to measure the sky positions of the two stars and the inclination of the $\sim$ 2 milliarcsecond orbit. We determine that the two stars have masses of $5.381 \pm 0.084 M_{\odot}$ and $3.353 \pm 0.064 M_{\odot}$. From isochrone fits, we find the cluster's age to be $49 \pm 7$ Myr (using PARSEC models) or $49.5 \pm 6$ Myr (MIST models). Finally, we revisit the massive white dwarfs that are candidate escapees from the $\alpha$ Persei cluster to try to better characterize the massive end of the white dwarf initial-final mass relation. The implied progenitor masses challenge the idea that Chandrasekhar-mass white dwarfs are made by single stars with masses near $8 \msun$.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error HTTP Error 404: Not Found</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.08367-b31b1b.svg)](https://arxiv.org/abs/2506.08367) | **Observatory Science with eXTP**  |
|| P. Zhou, et al. -- incl., <mark>Y. Wang</mark>, <mark>J. Liu</mark>, <mark>J. Liu</mark>, <mark>Y. Wang</mark>, <mark>X. Zhang</mark> |
|*Appeared on*| *2025-06-11*|
|*Comments*| *Submitted to the SCIENCE CHINA Physics, Mechanics & Astronomy*|
|**Abstract**|            Scheduled for launch in 2030, the enhanced X-ray Timing and Polarization (eXTP) telescope is a Chinese space-based mission aimed at studying extreme conditions and phenomena in astrophysics. eXTP will feature three main payloads: Spectroscopy Focusing Arrays (SFAs), Polarimetry Focusing Arrays (PFAs), and a Wide-field Camera (W2C). This white paper outlines observatory science, incorporating key scientific advances and instrumental changes since the publication of the previous white paper [1]. We will discuss perspectives of eXTP on the research domains of flare stars, supernova remnants, pulsar wind nebulae, cataclysmic variables, X-ray binaries, ultraluminous X-ray sources, AGN, and pulsar-based positioning and timekeeping.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error HTTP Error 404: Not Found</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.08369-b31b1b.svg)](https://arxiv.org/abs/2506.08369) | **Physics of Strong Magnetism with eXTP**  |
|| M. Ge, et al. -- incl., <mark>J. Li</mark>, <mark>J. Liu</mark> |
|*Appeared on*| *2025-06-11*|
|*Comments*| *Submitted to the SCIENCE CHINA Physics, Mechanics & Astronomy*|
|**Abstract**|            In this paper we present the science potential of the enhanced X-ray Timing and Polarimetry (eXTP) mission, in its new configuration, for studies of strongly magnetized compact objects. We discuss the scientific potential of eXTP for QED studies, especially leveraging on the recent observations made with the NASA IXPE mission. Given eXTP's unique combination of timing, spectroscopy, and polarimetry, we focus on the perspectives for physics and astrophysics studies of strongly magnetized compact objects, such as magnetars and accreting X-ray pulsars. Developed by an international Consortium led by the Institute of High Energy Physics of the Chinese Academy of Sciences, the eXTP mission is expected to launch in early 2030.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error HTTP Error 404: Not Found</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.08378-b31b1b.svg)](https://arxiv.org/abs/2506.08378) | **Euclid preparation: The NISP spectroscopy channel, on ground performance and calibration**  |
|| E. Collaboration, et al. -- incl., <mark>K. Jahnke</mark>, <mark>M. Schirmer</mark>, <mark>Y. Wang</mark> |
|*Appeared on*| *2025-06-11*|
|*Comments*| *18 pages 15 figures with additional 8 pages of annexes. Submitted to A&A*|
|**Abstract**|            ESA's Euclid cosmology mission relies on the very sensitive and accurately calibrated spectroscopy channel of the Near-Infrared Spectrometer and Photometer (NISP). With three operational grisms in two wavelength intervals, NISP provides diffraction-limited slitless spectroscopy over a field of $0.57$ deg$^2$. A blue grism $\text{BG}_\text{E}$ covers the wavelength range $926$--$1366$\,nm at a spectral resolution $R=440$--$900$ for a $0.5''$ diameter source with a dispersion of $1.24$ nm px$^{-1}$. Two red grisms $\text{RG}_\text{E}$ span $1206$ to $1892$\,nm at $R=550$--$740$ and a dispersion of $1.37$ nm px$^{-1}$. We describe the construction of the grisms as well as the ground testing of the flight model of the NISP instrument where these properties were established.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error HTTP Error 404: Not Found</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.08389-b31b1b.svg)](https://arxiv.org/abs/2506.08389) | **Submillimeter and Mid-Infrared Variability of Young Stellar Objects in the M17 HII Region**  |
|| Z. Chen, et al. -- incl., <mark>X. Zhang</mark> |
|*Appeared on*| *2025-06-11*|
|*Comments*| *Accepted for publication in The Astronomical Journal*|
|**Abstract**|            We conducted a comprehensive analysis of young stellar object (YSO) variability at submillimeter and mid-infrared (mid-IR) wavelengths for the M\,17 \ion{H}{2} region, using 3.5 years monitoring data from the JCMT Transient Survey at $450$ and $850\,\mu$m and 9 years mid-IR monitoring data from the NEOWISE mission. Our study encompasses observations of 198 and 164 bright submillimeter peaks identified within the deep JCMT coadded maps at 450 and $850\,\mu$m, and 66 YSOs seen by NEOWISE W2 that were previously identified in mid-IR observations. We find one robust linear submillimeter variable, an intermediate mass protostar, with a $4\%$ peak flux change in 3.5 years of JCMT monitoring that sets a lower limit of $16\%$ luminosity increase for the source. At mid-IR wavelengths, our analysis reveals secular and stochastic variability in 22 YSOs, with the highest fraction of secular variability occurring at the earliest evolutionary stage. This mid-IR fractional variability as a function of evolutionary stage result is similar to what has previously been found for YSO variability within the Gould Belt and the intermediate-mass star formation region M17\,SWex, though overall less variability is detected in M\,17 in submillimeter and mid-IR. We suspect that this lower detection of YSO variability is due to both the greater distance to M\,17 and the strong feedback from the \ion{H}{2} region. Our findings showcase the utility of multiwavelength observations to better capture the complex variability phenomena inherent to star formation processes and demonstrate the importance of years-long monitoring of a diverse selection of star-forming environments.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error HTTP Error 404: Not Found</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2506.08863-b31b1b.svg)](https://arxiv.org/abs/2506.08863) | **Responses of a Coronal Hole to a Fast Flare-Driven Coronal Wave**  |
|| <mark>X. Zhang</mark>, et al. |
|*Appeared on*| *2025-06-11*|
|*Comments*| *10 pages, 5 figures, Accepted in ApJL*|
|**Abstract**|            Coronal waves, significant solar phenomena, act as diagnostic tools for scientists studying solar atmosphere properties. Here, we present a novel observation detailing how a coronal wave event, associated with an X5.0 class flare, influenced the properties of an adjacent coronal hole through interaction. The coronal wave was observed in both extreme ultraviolet observations from the Atmospheric Imaging Assembly aboard the Solar Dynamics Observatory and Lyman-alpha observations from the Solar Disk Imager aboard the Advanced Space-based Solar Observatory. Utilizing the method of differential emission measure, we found that as the coronal wave passed through, the adjacent coronal hole experienced an increase in temperature from 1.31 to 1.43 MK and a rise in density from $\sim$1.62$\times10^{8}$ to 1.76$\times10^{8}$ cm$^{-3}$ within the rising period of $\sim$7 minutes. Subsequently, after the wave passed, the entire coronal hole transitioned to a new state with a slight temperature increase and a 14$\%$ decrease in density, with more pronounced changes observed at the coronal hole's boundary. Taking into account the impacts of radiative loss and heat conduction, the coronal wave was estimated to provide an average energy of 2.2$\times10^{8}$ erg cm$^{-2}$ to the coronal hole during the short rising period. This study highlights the identification of the coronal wave in both extreme ultraviolet and Lyman-alpha observations, shedding light on the significant energy input, particularly within the coronal hole. These findings provide new insights into better understanding kinematics of fast coronal waves, energy transfer processes open versus closed magnetic topologies, and the possible acceleration of solar winds.         |
|<p style="color:red"> **ERROR** </p>| <p style="color:red">latex error HTTP Error 404: Not Found</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

517  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

10  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

3  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
