In [89]:
# location: start;
# timestamp: none;
# note: after sample;
# lines: sample 1, note 2;
# song info: normal
start_source = """
1. Blade Runner [738 points] (87 groups, 116 songs, 221 samples)
  "Move on, move on." (Note: Police robot addressing crowd gathering after
   a shootout)
    - Age of Chance; This is Crush Collision; One Thousand Years of Trouble
"""

# location: star + after start;
# timestamp: one + multi;
# note: none + after samples + after band;
# lines: sample 2, note 2; + inner quote
# song info: normal
second_source = """
1. Blade Runner [738 points] (87 groups, 116 songs, 221 samples)
  "Move on, move on." @ 3:05
    - Age of Chance; This is Crush Collision; One Thousand Years of Trouble

  "'Move on, '
   'move on.'" @ 1:39, 2:10, 3:57, 5:12 (Note: Police robot addressing crowd gathering after
   a shootout)
  (Note: Police robot addressing crowd gathering after
   a shootout)
    - Age of Chance; This is Crush Collision; One Thousand Years of Trouble
    (Note: Police robot addressing crowd gathering after
    a shootout)
"""

source_variation = """
1. Blade Runner [738 points] (87 groups, 116 songs, 221 samples)
  "Move on, move on." (Note: Police robot addressing crowd gathering after
   a shootout)
    - Age of Chance; This is Crush Collision; One Thousand Years of Trouble

2. Blade Runner [738 points] (87 groups, 116 songs, 221 samples)
  "Move on, move on." @ 3:05
    - Age of Chance; This is Crush Collision; One Thousand Years of Trouble

  "'Move on, '
   'move on.'" @ 1:39, 2:10, 3:57, 5:12 (Note: Police robot addressing crowd gathering after
   a shootout)
  (Note: Police robot addressing crowd gathering after
   a shootout)
    - Age of Chance; This is Crush Collision; One Thousand Years of Trouble
    (Note: Police robot addressing crowd gathering after
    a shootout)
"""

from lark import Lark, Transformer
from rich import print as rprint

grammar = r"""
start: contained_string

contained_string: quoted | parenthesis | braces | brackets

quoted      : /([\"\'])(.*)(\1)/
parenthesis : "(" STR_PAREN ")"
braces      : "{" STR_BRACE "}"
brackets    : "[" STR_BRACK "]"

STR_PAREN: (STRING | "[" | "]" | "{" | "}")+
STR_BRACE: (STRING | "[" | "]" | "(" | ")")+
STR_BRACK: (STRING | "(" | ")" | "{" | "}")+
STRING: /[\w.,!?:\- ]/

"""
# CONT_STRING: LETTER+
# %import common.LETTER

samples = [
    '"sample"',
    "'sa'mple'",
    """'samp"le'""",
    "[sample]",
    "(sample)",
    "{sample}",
    "[sa(mple]",
    "[sam(pl)e]",
]
expected = [
    "sample",
    "sa'mple",
    """samp"le""",
    "sample",
    "sample",
    "sample",
    "sa(mple",
    "sam(pl)e",
]

parser = Lark(grammar=grammar)


class CSTransformer(Transformer):
    def __default__(self, data, children, meta):
        return children[0].value

    def quoted(self, nodes):
        return nodes[0].strip("'").strip('"')

    def contained_string(self, nodes):
        return nodes[0]

    def start(self, nodes):
        return nodes[0]


transformer = CSTransformer()


for ind, (sample, expect) in enumerate(zip(samples, expected)):
    try:
        parsed = parser.parse(sample)
        transformed = transformer.transform(parsed)
        # par_val = parsed.children[0].children[0].children[0].value
        par_val = transformed
    except Exception as e:
        print(f"-----FAILED parsing {ind}------")
        print(str(e))
    try:
        assert par_val == expect
    except Exception as e:
        print(f"-----FAILED expectation {ind}------")
        print(par_val)


In [85]:
parsed = parser.parse(samples[0])
print(parsed)
transformed = transformer.transform(parsed)
print(transformed)


Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'contained_string'), [Tree(Token('RULE', 'quoted'), [Token('__ANON_0', '"sample"')])])])
sample
