From 54f6c212395f244fef6519c864da610bdb2533d9 Mon Sep 17 00:00:00 2001 From: Petr Bodnar Date: Sat, 9 Dec 2023 20:23:02 +0100 Subject: [PATCH] feat: allow just 1 dash in Table delimiter cells (#131) In or around [#371](github/markup#371), GFM relaxed constraints on the minimal number of required dashes in table delimiter row. So we relax the constraints as well. To "compensate" for this change, we check the format of the delimiter row more thoroughly (although we still do *not* check number of cells in the delimiter row). --- mistletoe/block_token.py | 22 +++++++++++++--------- test/test_block_token.py | 21 ++++++++++++++------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/mistletoe/block_token.py b/mistletoe/block_token.py index a86ac51..8be1f1f 100644 --- a/mistletoe/block_token.py +++ b/mistletoe/block_token.py @@ -676,7 +676,7 @@ def read(cls, lines, prev_marker=None): class Table(BlockToken): """ - Table token. + Table token. See its GFM definition at . This is a container block token. Its children are TableRow tokens. Class attributes: @@ -690,20 +690,24 @@ class Table(BlockToken): repr_attributes = BlockToken.repr_attributes + ("column_align",) interrupt_paragraph = True + _column_align = r':?-+:?' + column_align_pattern = re.compile(_column_align) + delimiter_row_pattern = re.compile(r'\s*\|?\s*' + _column_align + '\s*(\|\s*' + _column_align + '\s*)*\|?\s*') + def __init__(self, match): lines, start_line = match - if '---' in lines[1]: + # note: the following condition is currently always true, because read() guarantees the presence of the delimiter row + if '-' in lines[1]: self.column_align = [self.parse_align(column) for column in self.split_delimiter(lines[1])] self.header = TableRow(lines[0], self.column_align, start_line) self.children = [TableRow(line, self.column_align, start_line + offset) for offset, line in enumerate(lines[2:], start=2)] else: - # note: not reachable, because read() guarantees the presence of three dashes self.column_align = [None] self.children = [TableRow(line, line_number=start_line + offset) for offset, line in enumerate(lines)] - @staticmethod - def split_delimiter(delimiter): + @classmethod + def split_delimiter(cls, delimiter_row): """ Helper function; returns a list of align options. @@ -713,7 +717,7 @@ def split_delimiter(delimiter): Returns: a list of align options (None, 0 or 1). """ - return re.findall(r':?---+:?', delimiter) + return cls.column_align_pattern.findall(delimiter_row) @staticmethod def parse_align(column): @@ -740,14 +744,14 @@ def check_interrupts_paragraph(cls, lines): lines.set_pos(anchor) return result - @staticmethod - def read(lines): + @classmethod + def read(cls, lines): anchor = lines.get_pos() line_buffer = [next(lines)] start_line = lines.line_number() while lines.peek() is not None and '|' in lines.peek(): line_buffer.append(next(lines)) - if len(line_buffer) < 2 or '---' not in line_buffer[1]: + if len(line_buffer) < 2 or not cls.delimiter_row_pattern.fullmatch(line_buffer[1]): lines.set_pos(anchor) return None return line_buffer, start_line diff --git a/test/test_block_token.py b/test/test_block_token.py index 639ddfd..3758881 100644 --- a/test/test_block_token.py +++ b/test/test_block_token.py @@ -1,6 +1,8 @@ import unittest from unittest.mock import call, patch +from parameterized import parameterized + from mistletoe import block_token, block_tokenizer, span_token @@ -375,17 +377,22 @@ def test_parse_align(self): self.assertEqual(test_func('------:'), 1) def test_parse_delimiter(self): - test_func = block_token.Table.split_delimiter - self.assertEqual(list(test_func('| :--- | :---: | ---:|\n')), - [':---', ':---:', '---:']) - - def test_match(self): + test_func = lambda s : block_token.Table.split_delimiter(s) + self.assertEqual(list(test_func('|-| :--- | :---: | ---:|\n')), + ['-', ':---', ':---:', '---:']) + + @parameterized.expand([ + ('| --- | --- | --- |\n'), + ('| - | - | - |\n'), + ('|-|-|-- \n'), + ]) + def test_match(self, delimiter_line): lines = ['| header 1 | header 2 | header 3 |\n', - '| --- | --- | --- |\n', + delimiter_line, '| cell 1 | cell 2 | cell 3 |\n', '| more 1 | more 2 | more 3 |\n'] with patch('mistletoe.block_token.TableRow') as mock: - token = next(iter(block_token.tokenize(lines))) + token, = block_token.tokenize(lines) self.assertIsInstance(token, block_token.Table) self.assertTrue(hasattr(token, 'header')) self.assertEqual(token.column_align, [None, None, None])