Skip to content

Commit

Permalink
Refactor bibfile parsing to parse all bib files with same parser. (#262)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcmtroffaes committed Aug 21, 2021
1 parent e6a5b7f commit a887422
Show file tree
Hide file tree
Showing 17 changed files with 154 additions and 84 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
2.3.1 (in development)
2.4.0 (in development)
----------------------

* Allow specific warnings to be suppressed (see issue #255, contributed by
Expand All @@ -13,6 +13,13 @@
* Document how to use a backslash escaped space to suppress space before
footnotes (see issue #256, reported by hagenw).

* Parse all bib files together, so macros specified in one file can be used in
another file (see issue #216, reported by mforbes).
As a consequence, duplicate citation keys across bib files will
now also result in proper warnings.
The ``parse_bibfile`` and ``process_bibfile`` functions have been been
replaced by ``parse_bibdata`` and ``process_bibdata`` in the API.

2.3.0 (1 June 2021)
-------------------

Expand Down
1 change: 1 addition & 0 deletions doc/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,7 @@ you can use:
The complete list of warning subtypes that can be suppressed is::

bibtex.bibfile_data_error
bibtex.bibfile_error
bibtex.duplicate_citation
bibtex.duplicate_label
Expand Down
4 changes: 2 additions & 2 deletions src/sphinxcontrib/bibtex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_role("footcite", FootCiteRole())

return {
'version': '2.3.1a0',
'env_version': 7,
'version': '2.4.0a0',
'env_version': 8,
'parallel_read_safe': True,
'parallel_write_safe': True,
}
123 changes: 68 additions & 55 deletions src/sphinxcontrib/bibtex/bibfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,26 @@
.. autoclass:: BibFile
:members:
.. autoclass:: BibData
:members:
.. autofunction:: normpath_filename
.. autofunction:: parse_bibfile
.. autofunction:: parse_bibdata
.. autofunction:: process_bibfile
.. autofunction:: is_bibdata_outdated
.. autofunction:: get_bibliography_entry
.. autofunction:: process_bibdata
"""

import math
import os.path
from typing import TYPE_CHECKING, Dict, Optional, NamedTuple
from typing import TYPE_CHECKING, Dict, NamedTuple, List

from pybtex.database.input.bibtex import Parser
from pybtex.database import BibliographyData, BibliographyDataError
from sphinx.util.logging import getLogger

if TYPE_CHECKING:
from pybtex.database import BibliographyData, Entry
from sphinx.environment import BuildEnvironment


Expand All @@ -29,64 +32,74 @@

class BibFile(NamedTuple):
"""Contains information about a parsed bib file."""
mtime: float #: Modification time of file when last parsed.
data: "BibliographyData" #: Parsed data from pybtex.
mtime: float #: Modification time of file when last parsed.
keys: Dict[str, None] #: Set of keys for this bib file as ordered dict.


class BibData(NamedTuple):
"""Contains information about a collection of bib files."""
encoding: str #: Encoding of all bib files.
bibfiles: Dict[str, BibFile] #: Maps bib filename to information about it.
data: BibliographyData #: Data parsed from all bib files.


def normpath_filename(env: "BuildEnvironment", filename: str) -> str:
"""Return normalised path to *filename* for the given environment *env*."""
return os.path.normpath(env.relfn2path(filename.strip())[1])


def parse_bibfile(bibfilename: str, encoding: str) -> "BibliographyData":
"""Parse *bibfilename* with given *encoding*, and return parsed data."""
parser = Parser(encoding)
logger.info("parsing bibtex file {0}... ".format(bibfilename), nonl=True)
parser.parse_file(bibfilename)
logger.info("parsed {0} entries"
.format(len(parser.data.entries)))
return parser.data


def process_bibfile(bibfiles: Dict[str, BibFile],
bibfilename: str, encoding: str) -> None:
"""Check if *bibfiles* is still up to date. If not, parse
*bibfilename* and store parsed data in *bibfiles*.
"""
def get_mtime(bibfilename: str) -> float:
try:
mtime = os.path.getmtime(bibfilename)
return os.path.getmtime(bibfilename)
except OSError:
logger.warning(
"could not open bibtex file {0}.".format(bibfilename),
type="bibtex", subtype="bibfile_error")
return
# get cache and check if it is still up to date
# if it is not up to date, parse the bibtex file
# and store it in the cache
logger.info("checking for {0} in bibtex cache... ".format(bibfilename),
nonl=True)
try:
bibfile = bibfiles[bibfilename]
except KeyError:
logger.info("not found")
bibfiles[bibfilename] = BibFile(
mtime=mtime, data=parse_bibfile(bibfilename, encoding))
else:
if mtime != bibfile.mtime:
logger.info("out of date")
bibfiles[bibfilename] = BibFile(
mtime=mtime, data=parse_bibfile(bibfilename, encoding))
else:
logger.info('up to date')
return -math.inf


def get_bibliography_entry(
bibfiles: Dict[str, BibFile], key: str) -> Optional["Entry"]:
"""Return bibliography entry from *bibfiles* for the given *key*."""
for bibfile in bibfiles.values():
try:
return bibfile.data.entries[key]
except KeyError:
pass
def parse_bibdata(bibfilenames: List[str], encoding: str) -> BibData:
"""Parse *bibfilenames* with given *encoding*, and return parsed data."""
parser = Parser(encoding)
bibfiles: Dict[str, BibFile] = {}
keys: Dict[str, None] = {}
for filename in bibfilenames:
logger.info("parsing bibtex file {0}... ".format(filename), nonl=True)
if not os.path.isfile(filename):
logger.warning(
"could not open bibtex file {0}.".format(filename),
type="bibtex", subtype="bibfile_error")
new_keys: Dict[str, None] = {}
else:
try:
parser.parse_file(filename)
except BibliographyDataError as exc:
logger.warning(
"bibliography data error in {0}: {1}".format(
filename, exc),
type="bibtex", subtype="bibfile_data_error")
keys, old_keys = dict.fromkeys(parser.data.entries.keys()), keys
assert all(key in keys for key in old_keys)
new_keys = dict.fromkeys(
key for key in keys if key not in old_keys)
logger.info("parsed {0} entries".format(len(new_keys)))
bibfiles[filename] = BibFile(mtime=get_mtime(filename), keys=new_keys)
return BibData(encoding=encoding, bibfiles=bibfiles, data=parser.data)


def is_bibdata_outdated(bibdata: BibData,
bibfilenames: List[str], encoding: str) -> bool:
return (
bibdata.encoding != encoding
or list(bibdata.bibfiles) != bibfilenames
or any(bibfile.mtime != get_mtime(filename)
for filename, bibfile in bibdata.bibfiles.items()))


def process_bibdata(bibdata: BibData,
bibfilenames: List[str], encoding: str) -> BibData:
"""Parse *bibfilenames* and store parsed data in *bibdata*."""
logger.info("checking bibtex cache... ", nonl=True)
if is_bibdata_outdated(bibdata, bibfilenames, encoding):
logger.info("out of date")
return parse_bibdata(bibfilenames, encoding)
else:
return None
logger.info("up to date")
return bibdata
4 changes: 2 additions & 2 deletions src/sphinxcontrib/bibtex/directives.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def run(self):
bibfiles = []
for bibfile in self.arguments[0].split():
normbibfile = normpath_filename(env, bibfile)
if normbibfile not in domain.bibfiles:
if normbibfile not in domain.bibdata.bibfiles:
logger.warning(
"{0} not found or not configured"
" in bibtex_bibfiles".format(bibfile),
Expand All @@ -144,7 +144,7 @@ def run(self):
else:
bibfiles.append(normbibfile)
else:
bibfiles = list(domain.bibfiles.keys())
bibfiles = list(domain.bibdata.bibfiles.keys())
for bibfile in bibfiles:
env.note_dependency(bibfile)
# generate nodes and ids
Expand Down
30 changes: 16 additions & 14 deletions src/sphinxcontrib/bibtex/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from sphinx.util.nodes import make_refnode

from .roles import CiteRole
from .bibfile import BibFile, normpath_filename, process_bibfile
from .bibfile import normpath_filename, process_bibdata, BibData
from .nodes import raw_latex
from .style.referencing import (
BaseReferenceText, BaseReferenceStyle, format_references
Expand Down Expand Up @@ -283,9 +283,12 @@ class BibtexDomain(Domain):

name = 'cite'
label = 'BibTeX Citations'
data_version = 3
data_version = 4
initial_data = dict(
bibfiles={},
bibdata=BibData(
encoding='',
bibfiles={},
data=pybtex.database.BibliographyData()),
bibliography_header=docutils.nodes.paragraph(),
bibliographies={},
citations=[],
Expand All @@ -295,11 +298,9 @@ class BibtexDomain(Domain):
reference_style: BaseReferenceStyle

@property
def bibfiles(self) -> Dict[str, BibFile]:
"""Map each bib filename to some information about the file (including
the parsed data).
"""
return self.data['bibfiles']
def bibdata(self) -> BibData:
"""Information about the bibliography files."""
return self.data['bibdata']

@property
def bibliography_header(self) -> docutils.nodes.Element:
Expand Down Expand Up @@ -341,10 +342,11 @@ def __init__(self, env: "BuildEnvironment"):
raise ExtensionError(
"You must configure the bibtex_bibfiles setting")
# update bib file information in the cache
for bibfile in env.app.config.bibtex_bibfiles:
process_bibfile(
self.bibfiles, normpath_filename(env, "/" + bibfile),
env.app.config.bibtex_encoding)
bibfiles = [
normpath_filename(env, "/" + bibfile)
for bibfile in env.app.config.bibtex_bibfiles]
self.data['bibdata'] = process_bibdata(
self.bibdata, bibfiles, env.app.config.bibtex_encoding)
# parse bibliography header
header = getattr(env.app.config, "bibtex_bibliography_header")
if header:
Expand Down Expand Up @@ -479,8 +481,8 @@ def get_entries(
in order of appearance in the bib files.
"""
for bibfile in bibfiles:
for entry in self.bibfiles[bibfile].data.entries.values():
yield entry
for key in self.bibdata.bibfiles[bibfile].keys:
yield self.bibdata.data.entries[key]

def get_filtered_entries(
self, bibliography_key: "BibliographyKey"
Expand Down
2 changes: 1 addition & 1 deletion src/sphinxcontrib/bibtex/foot_directives.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ def run(self):
env.temp_data["bibtex_foot_bibliography"],
foot_domain.bibliography_header.deepcopy())
domain = cast("BibtexDomain", env.get_domain('cite'))
for bibfile in domain.bibfiles:
for bibfile in domain.bibdata.bibfiles:
env.note_dependency(bibfile)
return [foot_bibliography]
3 changes: 1 addition & 2 deletions src/sphinxcontrib/bibtex/foot_roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from sphinx.roles import XRefRole
from sphinx.util.logging import getLogger

from .bibfile import get_bibliography_entry
from .richtext import BaseReferenceText
from .style.referencing import format_references
from .transforms import node_text_transform
Expand Down Expand Up @@ -95,7 +94,7 @@ def result_nodes(self, document: "docutils.nodes.document",
references = []
domain = cast("BibtexDomain", self.env.get_domain('cite'))
for key in keys:
entry = get_bibliography_entry(domain.bibfiles, key)
entry = domain.bibdata.data.entries.get(key)
if entry is not None:
formatted_entry = style.format_entry(label='', entry=entry)
references.append(
Expand Down
3 changes: 3 additions & 0 deletions test/roots/test-bibfiles_multiple_keys/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
extensions = ['sphinxcontrib.bibtex']
exclude_patterns = ['_build']
bibtex_bibfiles = ['test1.bib', 'test2.bib']
3 changes: 3 additions & 0 deletions test/roots/test-bibfiles_multiple_keys/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.. bibliography::
:style: plain
:all:
4 changes: 4 additions & 0 deletions test/roots/test-bibfiles_multiple_keys/test1.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@Misc{test,
author = {Mr. A},
title = {Test one},
}
4 changes: 4 additions & 0 deletions test/roots/test-bibfiles_multiple_keys/test2.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@Misc{test,
author = {Mr. B},
title = {Test two},
}
3 changes: 3 additions & 0 deletions test/roots/test-bibfiles_multiple_macros/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
extensions = ['sphinxcontrib.bibtex']
exclude_patterns = ['_build']
bibtex_bibfiles = ['macros.bib', 'test.bib']
3 changes: 3 additions & 0 deletions test/roots/test-bibfiles_multiple_macros/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.. bibliography::
:style: plain
:all:
1 change: 1 addition & 0 deletions test/roots/test-bibfiles_multiple_macros/macros.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
@string{rmp = "Rev. Mod. Phys."}
12 changes: 12 additions & 0 deletions test/roots/test-bibfiles_multiple_macros/test.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@article{Toussaint:2011,
author = {von Toussaint, Udo},
doi = {10.1103/RevModPhys.83.943},
issue = 3,
journal = rmp,
month = sep,
pages = {943-999},
publisher = {American Physical Society},
title = {Bayesian inference in physics},
volume = {83},
year = {2011}
}

0 comments on commit a887422

Please sign in to comment.