In [2]:
import re

# Capturing Groups

In [11]:
match = re.match(r'(\d-\w){2,3}', '1-a2-b')
match.groups()

('2-b',)

In [12]:
match = re.search(r'(ab)+c', 'ababc')
match.groups()

('ab',)

In [14]:
re.search(r'(ab)+c', 'abbc') is None

True

Find `Espana` or `Espanol`:

In [15]:
re.search(r'Espana|ol', 'Espanol')

<_sre.SRE_Match object; span=(5, 7), match='ol'>

In [16]:
re.search(r'Espana|ol', 'Espana')

<_sre.SRE_Match object; span=(0, 6), match='Espana'>

In [17]:
re.search(r'Espana|ol', 'ol')

<_sre.SRE_Match object; span=(0, 2), match='ol'>

In [18]:
re.search(r'Espan[aol]', 'Espano')

<_sre.SRE_Match object; span=(0, 6), match='Espano'>

In [19]:
re.search(r'Espan[a|ol]', 'Espano')

<_sre.SRE_Match object; span=(0, 6), match='Espano'>

In [21]:
re.search(r'Espan(a|ol)', 'Espano') is None

True

In [22]:
re.search(r'Espan(a|ol)', 'Espana')

<_sre.SRE_Match object; span=(0, 6), match='Espana'>

In [23]:
re.search(r'Espan(a|ol)', 'Espanol')

<_sre.SRE_Match object; span=(0, 7), match='Espanol'>

In [25]:
re.search(r'Espan(a|ol)', 'Espan') is None

True

Extract country codes:

In [27]:
pattern = re.compile(r'(\d+)-\w+')
it = pattern.finditer('1-a\n20-baer\n34-afrc')
match = next(it)
match.group(1)

'1'

In [28]:
match = next(it)
match.group(1)

'20'

In [29]:
match = next(it)
match.group(1)

'34'

# Backreferences

Find duplicated words:

In [34]:
pattern = re.compile(r'(\w+) \1')
match = pattern.search(r'hello hello world')
match.groups()

('hello',)

Products and country codes:

In [43]:
pattern = re.compile(r'(\d+)-(\w+)')
pattern.sub(r'\2-\1', '1-a\n20-baer\n34-afcr')

'a-1\nbaer-20\nafcr-34'

# Named Groups

In [44]:
pattern = re.compile(r'(\w+) (\w+)')
match = pattern.search('Hello world')
match.group(1)

'Hello'

In [45]:
match.group(2)

'world'

Named groups:

In [4]:
pattern = re.compile(r'(?P<first>\w+) (?P<second>\w+)')
match = pattern.search('Hello world')
match.group('first')

'Hello'

In [5]:
match.group('second')

'world'

Backreferences:

In [6]:
pattern = re.compile(r'(?P<country>\d+)-(?P<id>\w+)')
pattern.sub(r'\g<id>-\g<country>', '1-a\n20-baer\n34-afcr')

'a-1\nbaer-20\nafcr-34'

Named groups inside pattern:

In [7]:
pattern = re.compile(r'(?P<word>\w+) (?P=word)')
match = pattern.search(r'hello hello world')
match.groups()

('hello',)

# Non-capturing Groups

In [8]:
re.search(r'Espan(?:a|ol)', 'Espanol')

<_sre.SRE_Match object; span=(0, 7), match='Espanol'>

In [9]:
re.search(r'Espan(?:a|ol)', 'Espanol').groups()

()

# Atomic Groups

In [10]:
import regex

In [12]:
data = 'aaaaabbbbbaaaaccccccdddddaaa'
regex.match(r'(\w+)-\d', data) is None

True

Once faild stop to try again:

In [13]:
regex.match(r'(?>\w+)-\d', data) is None

True

# Special Cases with Groups

## Flags per Group

In [15]:
re.findall(r'(?u)\w+', 'н')

['н']

In [16]:
re.findall(r'\w+', 'н', re.U)

['н']

## yes-pattern|no-pattern

Tries to match pattern in case a previous one was found

In [18]:
pattern = re.compile(r'(\d\d-)?(\w{3,4})(?(1)(-\d\d))')
pattern.match('34-erte-22')

<_sre.SRE_Match object; span=(0, 10), match='34-erte-22'>

In [19]:
pattern.match('erte')

<_sre.SRE_Match object; span=(0, 4), match='erte'>

In [21]:
pattern.match('34-erte') is None

True

In [22]:
pattern = re.compile(r'(\d\d-)?(\w{3,4})-(?(1)(\d\d)|[a-z]{3,4})$')
pattern.match('34-erte-22')

<_sre.SRE_Match object; span=(0, 10), match='34-erte-22'>

In [23]:
pattern.match('34-erte') is None

True

In [24]:
pattern.match('erte-abcd')

<_sre.SRE_Match object; span=(0, 9), match='erte-abcd'>

## Overlapping Groups

In [25]:
re.findall(r'(a|b)+', 'abaca')

['a', 'a']

In [26]:
re.findall(r'((?:a|b)+)', 'abbaca')

['abba', 'a']

In [27]:
re.findall(r'(a|b)', 'abaca')

['a', 'b', 'a', 'a']