Skip to content

Commit

Permalink
🐛 customization.getnames: respect protected names (#334)
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed Oct 12, 2022
1 parent b47405b commit a5eeffd
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 8 deletions.
79 changes: 78 additions & 1 deletion bibtexparser/customization.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,61 @@ def splitname(name, strict_mode=True):
return parts


def find_matching(
text: str,
opening: str,
closing: str,
ignore_escaped: bool = True,
) -> dict:
r"""
Find matching 'brackets'.
:param text: The string to consider.
:param opening: The opening bracket (e.g. "(", "[", "{").
:param closing: The closing bracket (e.g. ")", "]", "}").
:param ignore_escaped: Ignore escaped bracket (e.g. "\(", "\[", "\{", "\)", "\]", "\}").
:return: Dictionary with ``{index_opening: index_closing}``
"""

a = []
b = []

if ignore_escaped:
opening = r"(?<!\\)" + opening
closing = r"(?<!\\)" + closing

for i in re.finditer(opening, text):
a.append(i.span()[0])

for i in re.finditer(closing, text):
b.append(-1 * i.span()[0])

if len(a) == 0 and len(b) == 0:
return {}

if len(a) != len(b):
raise IndexError(f"Unmatching {opening}...{closing} found")

brackets = sorted(a + b, key=lambda i: abs(i))

ret = {}
stack = []

for i in brackets:
if i >= 0:
stack.append(i)
else:
if len(stack) == 0:
raise IndexError(f"No closing {closing} at: {i:d}")
j = stack.pop()
ret[j] = -1 * i

if len(stack) > 0:
raise IndexError(f"No opening {opening} at {stack.pop():d}")

return ret


def getnames(names):
"""Convert people names as surname, firstnames
or surname, initials.
Expand All @@ -322,7 +377,29 @@ def getnames(names):
last = namesplit[0].strip()
firsts = [i.strip() for i in namesplit[1].split()]
else:
namesplit = namestring.split()
if "{" in namestring and "}" in namestring:
try:
brackets = find_matching(namestring, "{", "}")
except IndexError:
tidynames.append(namestring)
continue
namesplit = []
start = 0
i = 0
while True:
if i in brackets:
i = brackets[i]
else:
i += 1
if i >= len(namestring):
break
if namestring[i] == " ":
namesplit.append(namestring[start:i])
start = i + 1
elif i == len(namestring) - 1:
namesplit.append(namestring[start:])
else:
namesplit = namestring.split()
last = namesplit.pop()
firsts = [i.replace('.', '. ').strip() for i in namesplit]
if last in ['jnr', 'jr', 'junior']:
Expand Down
25 changes: 18 additions & 7 deletions bibtexparser/tests/test_customization.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ def test_getnames(self):
'Jean la Tour',
'Jean le Tour',
'Mike ben Akar',
'A. {Delgado de Molina}',
r'M. Vign{\'e}',
'Tom {de Geus}',
'Tom {de \{Geus}',
'Tom \{de Geus\}',
'Tom de {G\{eus}',
'Foo B{\'a}r',
r'{G{\'{e}}rard} {Ben Arous}',
'Incorrect {{name}',
#'Jean de la Tour',
#'Johannes Diderik van der Waals',
]
Expand All @@ -35,18 +44,20 @@ def test_getnames(self):
'la Tour, Jean',
'le Tour, Jean',
'ben Akar, Mike',
'{Delgado de Molina}, A.',
r'Vign{\'e}, M.',
'{de Geus}, Tom',
'{de \{Geus}, Tom',
'Geus\}, Tom \{de',
'de {G\{eus}, Tom',
'B{\'a}r, Foo',
r'{Ben Arous}, {G{\'{e}}rard}',
'Incorrect {{name}',
#'de la Tour, Jean',
#'van der Waals, Johannes Diderik',
]
self.assertEqual(result, expected)

@unittest.skip('Bug #9')
def test_getnames_braces(self):
names = ['A. {Delgado de Molina}', r'M. Vign{\'e}']
result = getnames(names)
expected = ['Delgado de Molina, A.', 'Vigné, M.']
self.assertEqual(result, expected)

###########
# page_double_hyphen
###########
Expand Down

0 comments on commit a5eeffd

Please sign in to comment.