# re — Regular expression operations

https://docs.python.org/3/library/re.html

In [1]:
import re

In [21]:
s = 'USER@example.com'

In [22]:
prog = re.compile('^User', re.DEBUG | re.IGNORECASE)
prog.match(s)

AT AT_BEGINNING
LITERAL 85
LITERAL 115
LITERAL 101
LITERAL 114


<_sre.SRE_Match object; span=(0, 4), match='USER'>

In [35]:
for flag in re.RegexFlag:
    print(flag, bin(flag))

RegexFlag.ASCII 0b100000000
RegexFlag.IGNORECASE 0b10
RegexFlag.LOCALE 0b100
RegexFlag.UNICODE 0b100000
RegexFlag.MULTILINE 0b1000
RegexFlag.DOTALL 0b10000
RegexFlag.VERBOSE 0b1000000
RegexFlag.TEMPLATE 0b1
RegexFlag.DEBUG 0b10000000


In [36]:
text = 'foo bar foo bar foo'

In [38]:
# find the first appearance in the string
re.search('bar', text)

<_sre.SRE_Match object; span=(4, 7), match='bar'>

In [40]:
# check begining of string
re.match('foo', text)

<_sre.SRE_Match object; span=(0, 3), match='foo'>

In [50]:
# full string check
re.fullmatch('[fbaro\s]*', text)

<_sre.SRE_Match object; span=(0, 19), match='foo bar foo bar foo'>

In [52]:
re.split(r'\W+', 'Words, words, words.')

['Words', 'words', 'words', '']

In [53]:
re.split(r'(\W+)', 'Words, words, words.')

['Words', ', ', 'words', ', ', 'words', '.', '']

In [54]:
re.split(r'\W+', 'Words, words, words.', 1)

['Words', 'words, words.']

In [55]:
re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)

['0', '3', '9']

In [58]:
re.findall(r'foo', text)

['foo', 'foo', 'foo']

In [59]:
list(re.finditer(r'foo', text))

[<_sre.SRE_Match object; span=(0, 3), match='foo'>,
 <_sre.SRE_Match object; span=(8, 11), match='foo'>,
 <_sre.SRE_Match object; span=(16, 19), match='foo'>]

In [60]:
# replace string with another string using pattern
# you can use \0 - \9 to use found data 
re.sub(r'def\s+([a-zA-Z_][a-zA-Z_0-9]*)\s*\(\s*\):',
       r'static PyObject*\npy_\1(void)\n{',
       'def myfunc():')

'static PyObject*\npy_myfunc(void)\n{'

In [71]:
print(re.sub(r'(foo|bar)', r'my_block(\1);\n', text))

my_block(foo);
 my_block(bar);
 my_block(foo);
 my_block(bar);
 my_block(foo);



In [72]:
re.subn(r'(foo|bar)', r'my_block(\1);\n', text)

('my_block(foo);\n my_block(bar);\n my_block(foo);\n my_block(bar);\n my_block(foo);\n',
 5)

In [73]:
print(re.escape('python.exe'))

python\.exe


In [75]:
import string
legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:"
print('[%s]+' % re.escape(legal_chars))

[abcdefghijklmnopqrstuvwxyz0123456789\!\#\$\%\&\'\*\+\-\.\^_\`\|\~\:]+


In [76]:
digits_re = r'\d+'
sample = '/usr/sbin/sendmail - 0 errors, 12 warnings'
print(re.sub(digits_re, digits_re.replace('\\', r'\\'), sample))

