In [2]:
from pprint import pformat
import re

regex to match a line that contains only "hello", "world", "abc", separated by comma ","
good examples: 
hello,world,abc
abc,world,hello,hello
abc

bad example
hello world,abc
helloworld
xyz


In [9]:
test_lines = [
    "hello,world,abc",
    "abc,world,hello,hello",
    "abc",
    "hello world,abc",
    "helloworld",
    "xyz",
]


for line in test_lines:
    if m := re.match(r"((\b|,)(hello|world|abc)(\b|,))+$", line):
        print(f"Matched: {line} m.group={pformat(m.group())}")
    else:
        print(f"No match: {line}")

Matched: hello,world,abc m.group='hello,world,abc'
Matched: abc,world,hello,hello m.group='abc,world,hello,hello'
Matched: abc m.group='abc'
No match: hello world,abc
No match: helloworld
No match: xyz


In [33]:
lines = [
    'debug=url',
    'debug_after=url,title',
    'debug_before=bad', 
    'debug_after=url,title2',
    'print=element,domstack,iframestack', 
]  

for locator in lines:
    if m := re.match(r"(print|debug(?:_before|_after)*)=((?:(?:\b|,)(?:url|title|timeouts|waits|tag|xpath|domstack|iframestack|element))+)$", locator):
        print(f"Matched: {locator}")
        for e in m.groups(): 
            print(f"m.group={pformat(e)}")
    else:
        print(f"No match: {locator}")
    print()

Matched: debug=url
m.group='debug'
m.group='url'

Matched: debug_after=url,title
m.group='debug_after'
m.group='url,title'

No match: debug_before=bad

No match: debug_after=url,title2

Matched: print=element,domstack,iframestack
m.group='print'
m.group='element,domstack,iframestack'



In [12]:
import shlex
from pprint import pformat

lines = [
    '''debug=url''',
    '''debug_after=url,title "css=#foo" 'xpath=//div[@id="foo"]' ''',
    '''xpath=//div[@id="foo"] # this is a comment''',
    '''# this is a comment''',
]

for line in lines:
    steps = shlex.split(line)
    print(f"line={line}, steps={pformat(steps)}")
    print()


line=debug=url, steps=['debug=url']

line=debug_after=url,title "css=#foo" 'xpath=//div[@id="foo"]' , steps=['debug_after=url,title', 'css=#foo', 'xpath=//div[@id="foo"]']

line=xpath=//div[@id="foo"] # this is a comment, steps=['xpath=//div[@id=foo]', '#', 'this', 'is', 'a', 'comment']

line=# this is a comment, steps=['#', 'this', 'is', 'a', 'comment']



In [18]:
# split a long line into multiple lines
a = ('abc'
     +'def')
print(f"a={a}")

a=abcdef


In [21]:
lines = [
    'dump=/a/b',
    'dump_element=/a/b',
    'dump_element-clean=/a/b',
    'dump-clean=/a/b',
]  

for locator in lines:
    if m := re.match(r"dump(?:_(element|shadow|iframe|page|all))?(?:-(clean))?=(.+)", locator):
        print(f"Matched: {locator}")
        for e in m.groups(): 
            print(f"m.group={pformat(e)}")
    else:
        print(f"No match: {locator}")
    print()

Matched: dump=/a/b
m.group=None
m.group=None
m.group='/a/b'

Matched: dump_element=/a/b
m.group='element'
m.group=None
m.group='/a/b'

Matched: dump_element-clean=/a/b
m.group='element'
m.group='clean'
m.group='/a/b'

Matched: dump-clean=/a/b
m.group=None
m.group='clean'
m.group='/a/b'



In [1]:
# test truncate
lines = [
    '0123',
    '0123456789',
    '0123456789abcdef',
]

for line in lines:
    print(f"line={line[:6]}")
    print()

line=0123

line=012345

line=012345

