# Regex
https://docs.python.org/3/howto/regex.html  
https://docs.python.org/3/library/re.html

## Tutorial

In [20]:
import re

### Create a Regex Pattern

In [21]:
p = re.compile('[a-z]+')
p

re.compile(r'[a-z]+', re.UNICODE)

### Match

In [22]:
print(p.match(""))

None


In [23]:
m = p.match('tempo')
m

<re.Match object; span=(0, 5), match='tempo'>

In [25]:
print(m.group())

tempo


In [26]:
m.start(), m.end()

(0, 5)

In [27]:
m.span()

(0, 5)

### Search string

In [28]:
m = p.search('::: message')
print(m)

<re.Match object; span=(4, 11), match='message'>


In [29]:
m.group()

'message'

In [30]:
m.span()

(4, 11)

### Comman Pattern

In [36]:
p = re.compile(r'\d+')
m = p.match('123')
if m:
    print('Match found:', m.group())
else:
    print('No Match')

Match found: 123


### Find all

In [37]:
p = re.compile(r'\d+')
p.findall('12 drummers drumming, 11 pipers piping, 10 lords a-leaping')

['12', '11', '10']

In [38]:
iterator = p.finditer('12 drummers drumming, 11 ... 10 ...')
for match in iterator:
    print(match.span())

(0, 2)
(22, 24)
(29, 31)


### Shortcuts

In [41]:
re.match(r'From\s\w+', 'From amk Thu May 14 19:12:10 1998').group()

'From amk'

## Advance

### Extracting atttributes

In [None]:
code = """
...config({
    name: 'a',
    other: '123',
}),
...config({
    name: 'b',
    array: [],
    other: '123',
}),
...config({
    tag: ['letter'],
    name: 'c',
    array: [
        '1',
        '2',
    ],
}),
"""

In [49]:
def regex_for(name):
    return re.compile(r"""\.{3}config\(\{\n
      (?P<attrs>
          (\s+.+\n)*
          \s+name:\s\'""" + name + r"""\',\n
          (\s+.+\n)*
      )
    \}\),""", re.X | re.M)

p = regex_for('b')
m = p.search(code)
print(m.group('attrs'))

    name: 'b',
    array: [],
    other: '123',



### Lookahead >

In [50]:
s = "hello hello word"

In [51]:
re.findall(r"hello", s)

['hello', 'hello']

In [52]:
# Positive lookahead
re.findall(r"hello(?= hello)", s) # find hello followed by {space}hello
# output the first hello

['hello']

In [54]:
# Negative lookahead
re.findall(r"hello(?! hello)", s) # # find hello not followed by {space}hello
# output the second hello

['hello']

### Lookbehind <

In [56]:
m = re.search('(?<=abc)def', 'abcdef')
m.group(0)

'def'

In [57]:
m = re.search('(?<=abc)def', 'aaadef')
print(m)

None
