Functions in re module

In [53]:
import re
text = "This text contains a floating point -1982.456 value"
p    = r"[-+]?\d+\.?\d*"

In [54]:
m = re.search(p, text)

In [55]:
m.group()


'-1982.456'

In [5]:
m.span()

(37, 45)

In [6]:
text[37:45]

'1982.456'

In [7]:
m.start()

37

In [8]:
m.end()


45

In [9]:
t = re.sub(p, "3.14159", text)

In [10]:
t

'This text contains a floating point -3.14159 value'

In [11]:
p    = r"(\d+)\.?(\d*)"

In [12]:
m = re.search(p, text)

In [13]:
m.group()

'1982.456'

In [14]:
m.group(1)

'1982'

In [15]:
m.group(2)

'456'

In [16]:
m.groups()

('1982', '456')

In [17]:
p    = r"(?P<decimal>\d+)\.?(?P<fraction>\d*)"

In [18]:
m = re.search(p, text)

In [19]:
m.group()

'1982.456'

In [20]:
m.groups()

('1982', '456')

In [21]:
m.groupdict()

{'decimal': '1982', 'fraction': '456'}

Experimenting on metacharacters

In [34]:
import re
data = ['ab ', 'abc', 'a5e', 'a6f', '123 a6c anc', 'a5b', 'a55b', 'a555b', 'a5555b',
        'a55555b', 'a555555b', 'a5xb', '1/4', '3+2=5', 'def ghi', 'abcabc']
for item in data:
	m = re.search(r'a.c', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')
        
re.findall(r'a.c', '123 a6c anc')

abc matched in 'abc'
a6c matched in '123 a6c anc'
abc matched in 'abcabc'


['a6c', 'anc']

In [25]:
import re
data = ['ab ', 'abc', 'a5e', 'a6f', '123 a6c anc', 'a5b', 'a55b', 'a555b', 'a5555b',
        'a55555b', 'a555555b', 'a5xb', '1/4', '3+2=5', 'def ghi', 'abc ab']
for item in data:
	m = re.search(r'a.[abc]', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

abc matched in 'abc'
a6c matched in '123 a6c anc'
a5b matched in 'a5b'
abc matched in 'abc ab'


In [26]:
import re
data = ['ab ', 'abc', 'a5e', 'a6f', '123 a6c anc', 'a5b', 'a55b', 'a555b', 'a5555b',
        'a55555b', 'a555555b', 'a5xb', '1/4', '3+2=5', 'def ghi', 'abc ab']
for item in data:
	m = re.search(r'a.[abd-z]', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a5e matched in 'a5e'
a6f matched in 'a6f'
a5b matched in 'a5b'
a5x matched in 'a5xb'


In [27]:
import re
data = ['ab ', 'abc', 'a5e', 'a6f', '123 a6c anc', 'a5b', 'a55b', 'a555b', 'a5555b',
        'a55555b', 'a555555b', 'a5xb', '1/4', '3+2=5', 'def ghi', 'abc ab']
for item in data:
	m = re.search(r'a[^0-9][^0-9]', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

ab  matched in 'ab '
abc matched in 'abc'
anc matched in '123 a6c anc'
abc matched in 'abc ab'


In [29]:
import re
data = ['ab ', 'abc', 'a5e', 'a6f', '123 a6c anc', 'a5b', 'a55b', 'a555b', 'a5555b',
        'a55555b', 'a555555b', 'a5xb', '1/4', '3+2=5', 'def ghi', 'abc ab']
for item in data:
	m = re.search(r'[0-9]\+[0-9]', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

3+2 matched in '3+2=5'


In [31]:
import re
data = ['ab ', 'abc', 'a5e', 'a6f', '123 a6c anc', 'a5b', 'a55b', 'a555b', 'a5555b',
        'a55555b', 'a555555b', 'a5xb', '1/4', '3+2=5', 'def ghi', 'abc ab']
for item in data:
	m = re.search(r'bc|a6', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

bc matched in 'abc'
a6 matched in 'a6f'
a6 matched in '123 a6c anc'
bc matched in 'abc ab'


In [35]:

for item in data:
	m = re.search(r'a\d\D', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a5e matched in 'a5e'
a6f matched in 'a6f'
a6c matched in '123 a6c anc'
a5b matched in 'a5b'
a5x matched in 'a5xb'


In [36]:
for item in data:
	m = re.search(r'...\s.', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

123 a matched in '123 a6c anc'
def g matched in 'def ghi'


In [37]:
for item in data:
	m = re.search(r'a6', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a6 matched in 'a6f'
a6 matched in '123 a6c anc'


In [38]:
for item in data:
	m = re.search(r'^a6', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a6 matched in 'a6f'


In [40]:
data = ['ab ', 'abc', 'a5e', 'a6f', '123 a6c anc', 'a5b', 'a55b', 'a555b', 'a5555b',
        'a55555b', 'a555555b', 'a5xb', '1/4', '3+2=5', 'def ghi', 'abc ab']
for item in data:
	m = re.search(r'..c', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

abc matched in 'abc'
a6c matched in '123 a6c anc'
abc matched in 'abc ab'


In [41]:
data = ['ab ', 'abc', 'a5e', 'a6f', '123 a6c anc', 'a5b', 'a55b', 'a555b', 'a5555b',
        'a55555b', 'a555555b', 'a5xb', '1/4', '3+2=5', 'def ghi', 'abc ab']
for item in data:
	m = re.search(r'..c$', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

abc matched in 'abc'
anc matched in '123 a6c anc'


In [42]:
for item in data:
	m = re.search(r'ab\b', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

ab matched in 'ab '
ab matched in 'abc ab'


In [43]:
for item in data:
	m = re.search(r'ab\B', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

ab matched in 'abc'
ab matched in 'abc ab'


In [44]:
for item in data:
	m = re.search(r'a5555*', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a555 matched in 'a555b'
a5555 matched in 'a5555b'
a55555 matched in 'a55555b'
a555555 matched in 'a555555b'


In [45]:
for item in data:
	m = re.search(r'a5555+', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a5555 matched in 'a5555b'
a55555 matched in 'a55555b'
a555555 matched in 'a555555b'


In [46]:
for item in data:
	m = re.search(r'a5?\D', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

ab matched in 'ab '
ab matched in 'abc'
a5e matched in 'a5e'
an matched in '123 a6c anc'
a5b matched in 'a5b'
a5x matched in 'a5xb'
ab matched in 'abc ab'


In [47]:
for item in data:
	m = re.search(r'a5{3}\D', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a555b matched in 'a555b'


In [49]:
for item in data:
	m = re.search(r'a5{3,5}\D', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a555b matched in 'a555b'
a5555b matched in 'a5555b'
a55555b matched in 'a55555b'


In [50]:
for item in data:
	m = re.search(r'a5{3,}\D', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a555b matched in 'a555b'
a5555b matched in 'a5555b'
a55555b matched in 'a55555b'
a555555b matched in 'a555555b'


In [51]:
for item in data:
	m = re.search(r'a5{,5}\D', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

ab matched in 'ab '
ab matched in 'abc'
a5e matched in 'a5e'
an matched in '123 a6c anc'
a5b matched in 'a5b'
a55b matched in 'a55b'
a555b matched in 'a555b'
a5555b matched in 'a5555b'
a55555b matched in 'a55555b'
a5x matched in 'a5xb'
ab matched in 'abc ab'


In [52]:
for item in data:
	m = re.search(r'a(55){2}\D', item)
	if m:
		print (m.group() + ' matched in ' + '\'' + item + '\'')

a5555b matched in 'a5555b'
