In [1]:

import re

In [2]:

# we want to find all word occurences appearing in the search string
pattern = re.compile(r'\w+')

In [3]:

# the findall operation does not match the pattern one letter at a time
# instead it finds all non-overlapping occurences of a pattern without returning a MatchObject
pattern.findall('hello there wacky world!')

['hello', 'there', 'wacky', 'world']

In [4]:

# with findall, even empty matches can be part of the result
pattern = re.compile(r'\w*')
pattern.findall('hello there wacky world!')

['hello', '', 'there', '', 'wacky', '', 'world', '', '']

In [5]:

# the empty matches happen because of the * quantifier, which allows 0 or more repetitions of the preceding regex
# similar (but not equivalent) behaviour can be seen with the ? quantifier which is non-greedy
pattern = re.compile(r'\w?')
pattern.findall('hello there wacky world!')

['h',
 'e',
 'l',
 'l',
 'o',
 '',
 't',
 'h',
 'e',
 'r',
 'e',
 '',
 'w',
 'a',
 'c',
 'k',
 'y',
 '',
 'w',
 'o',
 'r',
 'l',
 'd',
 '',
 '']

In [6]:

# grouped regexes are returned as tuples
# this query tries to match a pattern made of two words and creates a group of words
# in the end we have a list of tuples in which every tuple has two groups
pattern = re.compile(r'(\w+) (\w+)')
pattern.findall('hello there wacky world!')

[('hello', 'there'), ('wacky', 'world')]