# MPIA Arxiv on Deck 2

Contains the steps to produce the paper extractions.

In [1]:
# Imports
import os
from IPython.display import Markdown, display
from tqdm.notebook import tqdm
import warnings
from PIL import Image 
import re

# requires arxiv_on_deck_2

from arxiv_on_deck_2.arxiv2 import (get_new_papers, 
                                    get_paper_from_identifier,
                                    retrieve_document_source, 
                                    get_markdown_badge)
from arxiv_on_deck_2 import (latex,
                             latex_bib,
                             mpia,
                             highlight_authors_in_list)

# Sometimes images are really big
Image.MAX_IMAGE_PIXELS = 1000000000 

In [2]:
# Some useful definitions.

class AffiliationWarning(UserWarning):
    pass

class AffiliationError(RuntimeError):
    pass

def validation(source: str):
    """Raises error paper during parsing of source file
    
    Allows checks before parsing TeX code.
    
    Raises AffiliationWarning
    """
    check = mpia.affiliation_verifications(source, verbose=True)
    if check is not True:
        raise AffiliationError("mpia.affiliation_verifications: " + check)

        
warnings.simplefilter('always', AffiliationWarning)


def get_markdown_qrcode(paper_id: str):
    """ Generate a qrcode to the arxiv page using qrserver.com
    
    :param paper: Arxiv paper
    :returns: markdown text
    """
    url = r"https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="
    txt = f"""<img src={url}"https://arxiv.org/abs/{paper_id}">"""
    txt = '<div id="qrcode">' + txt + '</div>'
    return txt


def clean_non_western_encoded_characters_commands(text: str) -> str:
    """ Remove non-western encoded characters from a string
    List may need to grow.
    
    :param text: the text to clean
    :return: the cleaned text
    """
    text = re.sub(r"(\\begin{CJK}{UTF8}{gbsn})(.*?)(\\end{CJK})", r"\2", text)
    return text


def get_initials(name: str) -> str:
    """ Get the short name, e.g., A.-B. FamName
    :param name: full name
    :returns: initials
    """
    initials = []
    # account for non western names often in ()
    if '(' in name:
        name = clean_non_western_encoded_characters_commands(name)
        suffix = re.findall(r"\((.*?)\)", name)[0]
        name = name.replace(f"({suffix})", '')
    else:
        suffix = ''
    split = name.split()
    for token in split[:-1]:
        if '-' in token:
            current = '-'.join([k[0] + '.' for k in token.split('-')])
        else:
            current = token[0] + '.'
        initials.append(current)
    initials.append(split[-1].strip())
    if suffix:
        initials.append(f"({suffix})")
    return ' '.join(initials)

## get list of arxiv paper candidates

We use the MPIA mitarbeiter list webpage from mpia.de to get author names
We then get all new papers from Arxiv and match authors

In [3]:
# deal with the author list and edge cases of people that cannot be consistent on their name  

def filter_non_scientists(name: str) -> bool:
    """ Loose filter on expected authorships

    removing IT, administration, technical staff
    :param name: name
    :returns: False if name is not a scientist
    """
    remove_list = ['Licht', 'Binroth', 'Witzel', 'Jordan',
                   'Zähringer', 'Scheerer', 'Hoffmann', 'Düe',
                   'Hellmich', 'Enkler-Scharpegge', 'Witte-Nguy',
                   'Dehen', 'Beckmann', 'Jager', 'Jäger'
                  ]

    for k in remove_list:
        if k in name:
            return False
    return True

def add_author_to_list(author_list: list) -> list:
    """ Add author to list if not already in list
    
    :param author: author name
    :param author_list: list of authors
    :returns: updated list of authors
    """
    add_list = ['T. Henning']

    for author in add_list:
        if author not in author_list:
            author_list.append(author)
    return author_list

# get list from MPIA website
# filter for non-scientists (mpia.get_mpia_mitarbeiter_list() does some filtering)
mpia_authors = [k[1] for k in mpia.get_mpia_mitarbeiter_list() if filter_non_scientists(k[1])]
# add some missing author because of inconsistencies in their MPIA name and author name on papers
mpia_authors = add_author_to_list(mpia_authors)

In [4]:
new_papers = get_new_papers()
# add manual references
add_paper_refs = []
new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])

def robust_call(fn, value, *args, **kwargs):
    try:
        return fn(value, *args, **kwargs)
    except Exception:
        return value

candidates = []
for paperk in new_papers:
    # Check author list with their initials
    normed_author_list = [robust_call(mpia.get_initials, k) for k in paperk['authors']]
    hl_authors = highlight_authors_in_list(normed_author_list, mpia_authors, verbose=True)
    matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]
    paperk['authors'] = hl_authors
    if matches:
        # only select paper if an author matched our list
        candidates.append(paperk)
print("""Arxiv has {0:,d} new papers today""".format(len(new_papers)))        
print("""          {0:,d} with possible author matches""".format(len(candidates)))

J. Liu  ->  J. Liu  |  ['J. Liu']
J. Shi  ->  J. Shi  |  ['J. Shi']
A. Winter  ->  A. Winter  |  ['A. Winter']
R. Zhang  ->  R. Zhang  |  ['R. Zhang']
H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']
Arxiv has 56 new papers today
          4 with possible author matches


# Parse sources and generate relevant outputs

From the candidates, we do the following steps:
* get their tarball from ArXiv (and extract data)
* find the main .tex file: find one with \documentclass{...} (sometimes it's non trivial)
* Check affiliations with :func:`validation`, which uses :func:`mpia.affiliation_verifications`
* If passing the affiliations: we parse the .tex source
   * inject sub-documents into the main (flatten the main document)
   * parse structure, extract information (title, abstract, authors, figures...)
   * handles `\graphicspath` if provided
* Generate the .md document.

In [5]:
documents = []
failed = []
for paper in tqdm(candidates):
    # debug crap
    paper['identifier'] = paper['identifier'].lower().replace('arxiv:', '').replace(r'\n', '').strip()
    paper_id = paper['identifier']
    
    folder = f'tmp_{paper_id}'

    try:
        if not os.path.isdir(folder):
            folder = retrieve_document_source(f"{paper_id}", f'tmp_{paper_id}')
        
        try:
            doc = latex.LatexDocument(folder, validation=validation)    
        except AffiliationError as affilerror:
            msg = f"ArXiv:{paper_id:s} is not an MPIA paper... " + str(affilerror)
            failed.append((paper, "affiliation error: " + str(affilerror) ))
            continue
        
        # Hack because sometimes author parsing does not work well
        if (len(doc.authors) != len(paper['authors'])):
            doc._authors = paper['authors']
        else:
            # highlight authors (FIXME: doc.highlight_authors)
            # done on arxiv paper already
            doc._authors = highlight_authors_in_list(
                [get_initials(k) for k in doc.authors], 
                mpia_authors, verbose=True)
        if (doc.abstract) in (None, ''):
            doc._abstract = paper['abstract']
            
        doc.comment = (get_markdown_badge(paper_id) + 
                       "<mark>Appeared on: " + paper['date'] + "</mark> - ")
        if paper['comments']:
            doc.comment += " _" + paper['comments'] + "_"
        
        full_md = doc.generate_markdown_text()
        
        full_md += get_markdown_qrcode(paper_id)
        
        # replace citations
        try:
            bibdata = latex_bib.LatexBib.from_doc(doc)
            full_md = latex_bib.replace_citations(full_md, bibdata)
        except Exception as e:
            print("Issues with the citations")
            print(e)
        
        documents.append((paper_id, full_md))
    except Exception as e:
        warnings.warn(latex.LatexWarning(f"{paper_id:s} did not run properly\n" +
                                         str(e)
                                        ))
        failed.append((paper, "latex error " + str(e)))

  0%|          | 0/4 [00:00<?, ?it/s]

Retrieving document from  https://arxiv.org/e-print/2411.12994


extracting tarball to tmp_2411.12994... done.
Retrieving document from  https://arxiv.org/e-print/2411.13011


extracting tarball to tmp_2411.13011...

 done.
Retrieving document from  https://arxiv.org/e-print/2411.13051


extracting tarball to tmp_2411.13051...

 done.
Retrieving document from  https://arxiv.org/e-print/2411.13122


extracting tarball to tmp_2411.13122...

 done.


R. Zhang  ->  R. Zhang  |  ['R. Zhang']
H.-W. Rix  ->  H.-W. Rix  |  ['H.-W. Rix']


Found 107 bibliographic references in tmp_2411.13122/sample631.bbl.


### Export the logs

Throughout, we also keep track of the logs per paper. see `logs-{today date}.md` 

In [6]:
import datetime
today = str(datetime.date.today())
logfile = f"_build/html/logs/log-{today}.md"


with open(logfile, 'w') as logs:
    # Success
    logs.write(f'# Arxiv on Deck 2: Logs - {today}\n\n')
    logs.write("""* Arxiv had {0:,d} new papers\n""".format(len(new_papers)))
    logs.write("""    * {0:,d} with possible author matches\n\n""".format(len(candidates)))
    logs.write("## Sucessful papers\n\n")
    display(Markdown("## Successful papers"))
    success = [k[0] for k in documents]
    for candid in candidates:
        if candid['identifier'].split(':')[-1] in success:
            display(candid)
            logs.write(candid.generate_markdown_text() + '\n\n')

    ## failed
    logs.write("## Failed papers\n\n")
    display(Markdown("## Failed papers"))
    failed = sorted(failed, key=lambda x: x[1])
    current_reason = ""
    for paper, reason in failed:
        if 'affiliation' in reason:
            color = 'green'
        else:
            color = 'red'
        data = Markdown(
                paper.generate_markdown_text() + 
                f'\n|<p style="color:{color:s}"> **ERROR** </p>| <p style="color:{color:s}">{reason:s}</p> |'
               )
        if reason != current_reason:
            logs.write(f'### {reason:s} \n\n')
            current_reason = reason
        logs.write(data.data + '\n\n')
        
        # only display here the important errors (all in logs)
        # if color in ('red',):
        display(data)

## Successful papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.13122-b31b1b.svg)](https://arxiv.org/abs/2411.13122) | **Identifying the Galactic Substructures in 5D Space Using All-sky RR Lyrae Stars in Gaia DR3**  |
|| S. Sun, et al. -- incl., <mark>R. Zhang</mark>, <mark>H.-W. Rix</mark> |
|*Appeared on*| *2024-11-21*|
|*Comments*| *23 pages, 19 figures, 4 tables, accepted for publication in ApJ, version before language edition*|
|**Abstract**|            Motivated by the vast gap between photometric and spectroscopic data volumes, there is great potential in using 5D kinematic information to identify and study substructures of the Milky Way. We identify substructures in the Galactic halo using 46,575 RR Lyrae stars (RRLs) from Gaia DR3 with the photometric metallicities and distances newly estimated by Li et al. (2023). Assuming a Gaussian prior distribution of radial velocity, we calculate the orbital distribution characterized by the integrals of motion for each RRL based on its 3D positions, proper motions and corresponding errors, and then apply the friends-of-friends algorithm to identify groups moving along similar orbits. We have identified several known substructures, including Sagittarius (Sgr) Stream, Hercules-Aquila Cloud (HAC), Virgo Overdensity (VOD), Gaia-Enceladus-Sausage (GES), Orphan-Chenab stream, Cetus-Palca, Helmi Streams, Sequoia, Wukong and Large Magellanic Cloud (LMC) leading arm, along with 18 unknown groups. Our findings indicate that HAC and VOD have kinematic and chemical properties remarkably similar to GES, with most HAC and VOD members exhibiting eccentricity as high as GES, suggesting that they may share a common origin with GES. The ability to identify the low mass and spatially dispersed substructures further demonstrates the potential of our method, which breaks the limit of spectroscopic survey and is competent to probe the substructures in the whole Galaxy. Finally, we have also identified 18 unknown groups with good spatial clustering and proper motion consistency, suggesting more excavation of Milky Way substructures in the future with only 5D data.         |

## Failed papers


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.12994-b31b1b.svg)](https://arxiv.org/abs/2411.12994) | **Revisiting the activity-rotation relation for evolved stars**  |
|| H. Han, et al. -- incl., <mark>J. Liu</mark> |
|*Appeared on*| *2024-11-21*|
|*Comments*| *APJ accepted*|
|**Abstract**|            The magnetic dynamo mechanism of giant stars remains an open question, which can be explored by investigating their activity-rotation relations with multiple proxies. By using the data from the LAMOST and \emph{GALEX} surveys, we carried out a comprehensive study of activity-rotation relations of evolved stars based on \cahk lines, $\rm{H\alpha}$ lines and near ultraviolet (NUV) emissions. Our results show that evolved stars and dwarfs obey a similar power-law in the unsaturated region of the activity-rotation relation, indicating a common dynamo mechanism in both giant and dwarfs. There is no clear difference in the activity levels between red giant branch stars and red clump stars, nor between single giants and those in binaries. Additionally, our results show that the NUV activity levels of giants are comparable to those of G- and K-type dwarfs and are higher than those of M dwarfs.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.13011-b31b1b.svg)](https://arxiv.org/abs/2411.13011) | **The temporal and spatial variations of lithium abundance in the Galactic disc**  |
|| T. Sun, et al. -- incl., <mark>J. Shi</mark> |
|*Appeared on*| *2024-11-21*|
|*Comments*| *Accepted for publication in MNRAS. 9 pages, 10 figures*|
|**Abstract**|            This study investigates the temporal and spatial variations in lithium abundance within the Milky Way using a sample of 22,034 main-sequence turn-off (MSTO) stars and subgiants, characterised by precise stellar ages, 3D NLTE (non-local thermodynamic equilibrium) lithium abundances, and birth radii. Our results reveal a complex variation in lithium abundance with stellar age: a gradual increase from 14 Gyr to 6 Gyr, followed by a decline between 6 Gyr and 4.5 Gyr, and a rapid increase thereafter. We find that young Li-rich stars (ages $<$ 4 Gyr, A(Li) $>$ 2.7 dex) predominantly originate from the outer disc. By binning the sample according to guiding center radius and z$_{\rm max}$, we observe that these young Li-rich stars migrate radially to the local and inner discs. In addition, the stars originating from the inner disc experienced a rapid Li enrichment process between 8 Gyr and 6 Gyr. Our analysis suggests that the age range of Li-dip stars is 4-5 Gyr, encompassing evolution stages from MSTO stars to subgiants. The Galactic radial profile of A(Li) (with respect to birth radius), as a function of age, reveals three distinct periods: 14-6 Gyr ago, 6-4 Gyr ago, and 4-1 Gyr ago. Initially, the lithium abundance gradient is positive, indicating increasing Li abundance with birth radius. During the second period, it transitions to a negative and broken gradient, mainly affected by Li-dip stars. In the final period, the gradient reverts to a positive trend.         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Heidelberg' keyword not found.</p> |


|||
|---:|:---|
| [![arXiv](https://img.shields.io/badge/arXiv-2411.13051-b31b1b.svg)](https://arxiv.org/abs/2411.13051) | **EWOCS-III: JWST observations of the supermassive star cluster Westerlund 1**  |
|| M. G. Guarcello, et al. -- incl., <mark>A. Winter</mark> |
|*Appeared on*| *2024-11-21*|
|*Comments*| *Accepted for publication in Astronomy & Astrophysics*|
|**Abstract**|            The typically large distances, extinction, and crowding of Galactic supermassive star clusters have so far hampered the identification of their very low mass members, required to extend our understanding of star and planet formation, and early stellar evolution, to starburst. This situation has now evolved thanks to the James Webb Space Telescope (JWST), and its unmatched resolution and sensitivity in the infrared. In this paper, the third of the series of the Extended Westerlund 1 and 2 Open Clusters Survey (EWOCS), we present JWST/NIRCam and JWST/MIRI observations of the supermassive star cluster Westerlund 1. These observations are specifically designed to unveil the cluster members down to the BD mass regime, and to allow us to select and study the protoplanetary disks and to study the mutual feedback between the cluster members and the surrounding environment. Westerlund 1 was observed as part of JWST GO-1905 for 23.6 hours. The data have been reduced using the JWST calibration pipeline, together with specific tools necessary to remove artifacts. Source identification and photometry were performed with DOLPHOT. The MIRI images show a plethora of different features. Diffuse nebular emission is observed around the cluster, which is typically composed of myriads of droplet-like features pointing toward the cluster center or the group of massive stars surrounding the WR star W72/A. A long pillar is also observed in the NW. The MIRI images also show resolved shells and outflows surrounding the M-type RSG W20, W26, W75, and W237, the sgB[e] star W9 and the YHG W4. The color-magnitude diagrams built using the NIRCam photometry show a clear cluster sequence, which is marked in its upper part by the 1828 NIRCam stars with X-ray counterparts. NIRCam observations using the F115W filter have reached the 23.8 mag limit with 50\% completeness (roughly corresponding to a 0.06 Msol brown dwarf).         |
|<p style="color:green"> **ERROR** </p>| <p style="color:green">affiliation error: mpia.affiliation_verifications: 'Planck' keyword not found.</p> |

## Export documents

We now write the .md files and export relevant images

In [7]:
def export_markdown_summary(md: str, md_fname:str, directory: str):
    """Export MD document and associated relevant images"""
    import os
    import shutil
    import re

    if (os.path.exists(directory) and not os.path.isdir(directory)):
        raise RuntimeError(f"a non-directory file exists with name {directory:s}")

    if (not os.path.exists(directory)):
        print(f"creating directory {directory:s}")
        os.mkdir(directory)

    fig_fnames = (re.compile(r'\[Fig.*\]\((.*)\)').findall(md) + 
                  re.compile(r'\<img src="([^>\s]*)"[^>]*/>').findall(md))
    print("found figures", fig_fnames)
    for fname in fig_fnames:
        if 'http' in fname:
            # No need to copy online figures
            continue
        if not os.path.exists(fname):
            print("file not found", fname)
            continue
        print("copying ", fname, "to", directory)
        destdir = os.path.join(directory, os.path.dirname(fname))
        destfname = os.path.join(destdir, os.path.basename(fname))
        try:
            os.makedirs(destdir)
        except FileExistsError:
            pass
        shutil.copy(fname, destfname)
    with open(os.path.join(directory, md_fname), 'w') as fout:
        fout.write(md)
    print("exported in ", os.path.join(directory, md_fname))
    [print("    + " + os.path.join(directory,fk)) for fk in fig_fnames]

In [8]:
for paper_id, md in documents:
    export_markdown_summary(md, f"{paper_id:s}.md", '_build/html/')

found figures ['tmp_2411.13122/./Fig_FeH_Helmi_Sequoia_Wukong.png', 'tmp_2411.13122/./Fig_allknown_lbXZ_overlap.png', 'tmp_2411.13122/./Fig_d_FeH_HAC_VOD_GES.png']
copying  tmp_2411.13122/./Fig_FeH_Helmi_Sequoia_Wukong.png to _build/html/
copying  tmp_2411.13122/./Fig_allknown_lbXZ_overlap.png to _build/html/
copying  tmp_2411.13122/./Fig_d_FeH_HAC_VOD_GES.png to _build/html/
exported in  _build/html/2411.13122.md
    + _build/html/tmp_2411.13122/./Fig_FeH_Helmi_Sequoia_Wukong.png
    + _build/html/tmp_2411.13122/./Fig_allknown_lbXZ_overlap.png
    + _build/html/tmp_2411.13122/./Fig_d_FeH_HAC_VOD_GES.png


## Display the papers

Not necessary but allows for a quick check.

In [9]:
[display(Markdown(k[1])) for k in documents];

<div class="macros" style="visibility:hidden;">
$\newcommand{\ensuremath}{}$
$\newcommand{\xspace}{}$
$\newcommand{\object}[1]{\texttt{#1}}$
$\newcommand{\farcs}{{.}''}$
$\newcommand{\farcm}{{.}'}$
$\newcommand{\arcsec}{''}$
$\newcommand{\arcmin}{'}$
$\newcommand{\ion}[2]{#1#2}$
$\newcommand{\textsc}[1]{\textrm{#1}}$
$\newcommand{\hl}[1]{\textrm{#1}}$
$\newcommand{\footnote}[1]{}$
$\newcommand{\dt}[1]{{\tt #1}}$
$\newcommand{\vdag}{(v)^\dagger}$
$\newcommand$
$\newcommand$
$\newcommand{\KIAA}{\affiliation{Kavli Institute for Astronomy and$
$Astrophysics, Peking University, Beijing 100871, China}}$
$\newcommand{\DoA}{\affiliation{Department of Astronomy, School of Physics,$
$Peking University, Beijing 100871, China}}$
$\newcommand{\UCAS}{\affiliation{School of Astronomy and Space Science, University of Chinese Academy of Sciences, Beijing 100049, China}}$
$\newcommand{\NAOC}{\affiliation{CAS Key Laboratory of Optical Astronomy, National Astronomical Observatories, Chinese Academy of Sciences, Beijing 100101, China}}$
$\newcommand{\IFAA}{\affiliation{Institute for Frontiers in Astronomy and Astrophysics, Beijing Normal University, Beijing 102206, China}}$
$\newcommand{\MPI}{\affiliation{Max-Planck-Institute for Astronomy Königstuhl 17, D-69117, Heidelberg, Germany}}$
$\newcommand{\QNU}{\affiliation{College of Physics and Electronic Engineering, Qilu Normal University, Jinan 250200, China}}$
$\newcommand{\TGU}{\affiliation{Center for Astronomy and Space Sciences, China Three Gorges University, Yichang 443002, China}}$
$\newcommand{\SHAO}{\affiliation{Shanghai Astronomical Observatory, 80 Nandan Road, Shanghai 200030, China}}$
$\newcommand{\THU}{\affiliation{Department of Astronomy, Tsinghua University, Beijing 100084, China}}$</div>



<div id="title">

# Identifying the Galactic Substructures in 5D Space Using All-sky RR Lyrae Stars in Gaia DR3

</div>
<div id="comments">

[![arXiv](https://img.shields.io/badge/arXiv-2411.13122-b31b1b.svg)](https://arxiv.org/abs/2411.13122)<mark>Appeared on: 2024-11-21</mark> -  _23 pages, 19 figures, 4 tables, accepted for publication in ApJ, version before language edition_

</div>
<div id="authors">

S. Sun, et al. -- incl., <mark>R. Zhang</mark>, <mark>H.-W. Rix</mark>

</div>
<div id="abstract">

**Abstract:** Motivated by the vast gap between photometric and spectroscopic data volumes, there is great potential in using 5D kinematic information to identify and study substructures of the Milky Way. We identify substructures in the Galactic halo using 46,575 RR Lyrae stars (RRLs) from Gaia DR3 with the photometric metallicities and distances newly estimated by [Li, et. al (2023)]() . Assuming a Gaussian prior distribution of radial velocity, we calculate the orbital distribution characterized by the integrals of motion for each RRL based on its 3D positions, proper motions and corresponding errors, and then apply the friends-of-friends algorithm to identify groups moving along similar orbits. We have identified several known substructures, including Sagittarius (Sgr) Stream, Hercules-Aquila Cloud (HAC), Virgo Overdensity (VOD), Gaia-Enceladus-Sausage (GES), Orphan-Chenab stream, Cetus-Palca, Helmi Streams, Sequoia, Wukong and Large Magellanic Cloud (LMC) leading arm, along with 18 unknown groups. Our findings indicate that HAC and VOD have kinematic and chemical properties remarkably similar to GES, with most HAC and VOD members exhibiting eccentricity as high as GES, suggesting that they may share a common origin with GES. The ability to identify the low mass and spatially dispersed substructures further demonstrates the potential of our method, which breaks the limit of spectroscopic survey and is competent to probe the substructures in the whole Galaxy. Finally, we have also identified 18 unknown groups with good spatial clustering and proper motion consistency, suggesting more excavation of Milky Way substructures in the future with only 5D data.

</div>

<div id="div_fig1">

<img src="tmp_2411.13122/./Fig_FeH_Helmi_Sequoia_Wukong.png" alt="Fig5" width="100%"/>

**Figure 5. -** The metallicity distributions of the Helmi streams (top panel), Sequoia (middle panel) and Wukong (bottom panel), shown with magenta, lime and blue lines, respectively. The black lines in all panels represent the total sample. The vertical dashed line in the bottom panel represents [Fe/H]=$-$1.45. (*fig:Helmi_Sequoia_Wukong_FeH*)

</div>
<div id="div_fig2">

<img src="tmp_2411.13122/./Fig_allknown_lbXZ_overlap.png" alt="Fig8" width="100%"/>

**Figure 8. -** The spatial distributions of the all known substructures identified by our RRL sample in the $(l,b)$ and $(X,Z)$ spaces. These substructures are shown in two columns of panels for clarity. The numbers in parentheses are the number of candidates for each substructure. The black dashed areas in the upper left panel represent the regions of HAC and VOD in Table \ref{tab:diffuse_sub}. The black stars and black dots in the bottom two panels present the position of the Sun and the Galactic center, respectively. The black triangle in the left two panels represent the position of LMC center. The overlap between several pairs of substructures in Table \ref{tab:overlap} is listed in the legend and can be directly observed. (*fig:all_known_lbXZ*)

</div>
<div id="div_fig3">

<img src="tmp_2411.13122/./Fig_d_FeH_HAC_VOD_GES.png" alt="Fig13" width="100%"/>

**Figure 13. -** The heliocentric distance and metallicity distributions of the total sample and the members of HAC, VOD, and GES. (*fig:d_FeH_HAC_VOD_GES*)

</div><div id="qrcode"><img src=https://api.qrserver.com/v1/create-qr-code/?size=100x100&data="https://arxiv.org/abs/2411.13122"></div>

# Create HTML index

In [10]:
from datetime import datetime, timedelta, timezone
from glob import glob
import os

files = glob('_build/html/*.md')
days = 7
now = datetime.today()
res = []
for fk in files:
    stat_result = os.stat(fk).st_ctime
    modified = datetime.fromtimestamp(stat_result, tz=timezone.utc).replace(tzinfo=None)
    delta = now.today() - modified
    if delta <= timedelta(days=days):
        res.append((delta.seconds, fk))
res = [k[1] for k in reversed(sorted(res, key=lambda x:x[1]))]
npub = len(res)
print(len(res), f" publications files modified in the last {days:d} days.")
# [ print('\t', k) for k in res ];

252  publications files modified in the last 7 days.


In [11]:
import datetime
from glob import glob

def get_last_n_days(lst, days=1):
    """ Get the documents from the last n days """
    sorted_lst = sorted(lst, key=lambda x: x[1], reverse=True)
    for fname, date in sorted_lst:
        if date >= str(datetime.date.today() - datetime.timedelta(days=days)):
            yield fname

def extract_appearance_dates(lst_file):
    dates = []

    def get_date(line):
        return line\
            .split('Appeared on:')[-1]\
            .split('</mark>')[0].strip()

    for fname in lst:
        with open(fname, 'r') as f:
            found_date = False
            for line in f:
                if not found_date:
                    if "Appeared on" in line:
                        found_date = True
                        dates.append((fname, get_date(line)))
                else:
                    break
    return dates

from glob import glob
lst = glob('_build/html/*md')
days = 7
dates = extract_appearance_dates(lst)
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last {days:d} days.")

7  publications in the last 7 days.


In [12]:
def create_carousel(npub=4):
    """ Generate the HTML code for a carousel with `npub` slides """
    carousel = ["""  <div class="carousel" """,
                """       data-flickity='{ "autoPlay": 10000, "adaptiveHeight": true, "resize": true, "wrapAround": true, "pauseAutoPlayOnHover": true, "groupCells": 1 }' id="asyncTypeset">"""
                ]
    
    item_str = """    <div class="carousel-cell"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        carousel.append(item_str.format(k=k))
    carousel.append("  </div>")
    return '\n'.join(carousel)

def create_grid(npub=4):
    """ Generate the HTML code for a flat grid with `npub` slides """
    grid = ["""  <div class="grid"> """,
                ]
    
    item_str = """    <div class="grid-item"> <div id="slide{k}" class="md_view">Content {k}</div> </div>"""
    for k in range(1, npub + 1):
        grid.append(item_str.format(k=k))
    grid.append("  </div>")
    return '\n'.join(grid)

In [13]:
carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "7-day archives" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
with open("_build/html/index_7days.html", 'w') as fout:
    fout.write(page)

In [14]:
# redo for today
days = 1
res = list(get_last_n_days(dates, days))
npub = len(res)
print(len(res), f" publications in the last day.")

carousel = create_carousel(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("daily_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- carousel:s --%}", carousel)\
               .replace("{%-- suptitle:s --%}",  "Daily" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(carousel, docs, slides)
# print(page)
with open("_build/html/index_daily.html", 'w') as fout:
    fout.write(page)

5  publications in the last day.


In [15]:
# Create the flat grid of the last N papers (fixed number regardless of dates)
from itertools import islice 

npub = 6
res = [k[0] for k in (islice(reversed(sorted(dates, key=lambda x: x[1])), 6))]
print(len(res), f" {npub} publications selected.")

grid = create_grid(npub)
docs = ', '.join(['"{0:s}"'.format(k.split('/')[-1]) for k in res])
slides = ', '.join([f'"slide{k}"' for k in range(1, npub + 1)])

with open("grid_template.html", "r") as tpl:
    page = tpl.read()
    page = page.replace("{%-- grid-content:s --%}", grid)\
               .replace("{%-- suptitle:s --%}",  f"Last {npub:,d} papers" )\
               .replace("{%-- docs:s --%}", docs)\
               .replace("{%-- slides:s --%}", slides)
    
# print(grid, docs, slides)
# print(page)
with open("_build/html/index_npub_grid.html", 'w') as fout:
    fout.write(page)

6  6 publications selected.
