Regular expressions inside Python are made available through the re module:

In [5]:
import re

In [17]:
my_string = """
Mr Simpson
Mrs Simpson
Mr. Brown
Ms Smith
Mr. T
"""
pattern = re.compile(r'Mr\.?\s\w+')
matches = pattern.finditer(my_string)
for match in matches:
    print(match)

print()
pattern = re.compile(r'(Mr|Ms|Mrs)\.?\s\w+')
matches = pattern.finditer(my_string)
for match in matches:
    print(match)

<re.Match object; span=(1, 11), match='Mr Simpson'>
<re.Match object; span=(24, 33), match='Mr. Brown'>
<re.Match object; span=(43, 48), match='Mr. T'>

<re.Match object; span=(1, 11), match='Mr Simpson'>
<re.Match object; span=(12, 23), match='Mrs Simpson'>
<re.Match object; span=(24, 33), match='Mr. Brown'>
<re.Match object; span=(34, 42), match='Ms Smith'>
<re.Match object; span=(43, 48), match='Mr. T'>


### Grouping
( ) is used to group substrings in the matches.

### Modifying strings
- split(): Split the string into a list, splitting it wherever the RE matches
- sub(): Find all substrings where the RE matches, and replace them with a different string

In [19]:
my_string = 'abc123ABCDEF123abc'
pattern = re.compile(r'123') #  no escape for the . here in the set
matches = pattern.split(my_string)
print(matches)

my_string = "hello world, you are the best world"
pattern = re.compile(r'world')
subbed_string = pattern.sub(r'planet', my_string)
print(subbed_string)

['abc', 'ABCDEF', 'abc']
hello planet, you are the best planet


In [20]:
urls = """
http://python-engineer.com
https://www.python-engineer.org
http://www.pyeng.net
"""
pattern = re.compile(r'https?://(www\.)?(\w|-)+\.\w+')
pattern = re.compile(r'https?://(www\.)?([a-zA-Z-]+)(\.\w+)')
matches = pattern.finditer(urls)
for match in matches:
    #print(match)
    print(match.group()) # 0
    #print(match.group(1))
    #print(match.group(2))
    print(match.group(3))
    
# substitute using back references to replace url + domain name
subbed_urls = pattern.sub(r'\2\3', urls)
print(subbed_urls)

http://python-engineer.com
.com
https://www.python-engineer.org
.org
http://www.pyeng.net
.net

python-engineer.com
python-engineer.org
pyeng.net



In [21]:
my_string = "Hello World"
pattern = re.compile(r'world', re.IGNORECASE) # No match without I flag
matches = pattern.finditer(my_string)
for match in matches:
    print(match)

my_string = '''
hello
cool
Hello
'''
# line starts with ...
pattern = re.compile(r'^[a-z]', re.MULTILINE) # No match without M flag
matches = pattern.finditer(my_string)
for match in matches:
    print(match)

<re.Match object; span=(6, 11), match='World'>
<re.Match object; span=(1, 2), match='h'>
<re.Match object; span=(7, 8), match='c'>


### Further readings
- https://docs.python.org/3/howto/regex.html
- https://docs.python.org/3/library/re.html
- https://developers.google.com/edu/python/regular-expressions