In [1]:
# Phone number without regex.

def isPhoneNumber(text):
    if len(text) != 12:
         return False
    for i in range(0, 3):
        if not text[i].isdecimal():
             return False
    if text[3] != '-':
         return False
    for i in range(4, 7):
        if not text[i].isdecimal():
             return False
    if text[7] != '-':
         return False
    for i in range(8, 12):
        if not text[i].isdecimal():
             return False
    return True

message = 'Call me at 415-555-1011 tomorrow. 415-555-9999 is my office.'

# scans i to i + 12 thus selects 12 character to read.
for i in range(len(message)):
    chunk = message[i:i+12]
    print(chunk)
    if isPhoneNumber(chunk):
        print('Phone number found: ' + chunk)
        
print('Done')
print('Is 415-555-4242 a phone number?')
print(isPhoneNumber('415-555-4242'))
print('Is Moshi moshi a phone number?')
print(isPhoneNumber('Moshi moshi'))

Call me at 4
all me at 41
ll me at 415
l me at 415-
 me at 415-5
me at 415-55
e at 415-555
 at 415-555-
at 415-555-1
t 415-555-10
 415-555-101
415-555-1011
Phone number found: 415-555-1011
15-555-1011 
5-555-1011 t
-555-1011 to
555-1011 tom
55-1011 tomo
5-1011 tomor
-1011 tomorr
1011 tomorro
011 tomorrow
11 tomorrow.
1 tomorrow. 
 tomorrow. 4
tomorrow. 41
omorrow. 415
morrow. 415-
orrow. 415-5
rrow. 415-55
row. 415-555
ow. 415-555-
w. 415-555-9
. 415-555-99
 415-555-999
415-555-9999
Phone number found: 415-555-9999
15-555-9999 
5-555-9999 i
-555-9999 is
555-9999 is 
55-9999 is m
5-9999 is my
-9999 is my 
9999 is my o
999 is my of
99 is my off
9 is my offi
 is my offic
is my office
s my office.
 my office.
my office.
y office.
 office.
office.
ffice.
fice.
ice.
ce.
e.
.
Done
Is 415-555-4242 a phone number?
True
Is Moshi moshi a phone number?
False


In [2]:
# Finding patterns of text with regular expressions
# Phone number mays be exteneded with 415-555-4242 x99, previous function does not validate.

import re

# contains a regex object
phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')

# generic name to match objects with 
mo = phoneNumRegex.search('My Number is 415-555-4242')

# knowing it contains a match object and not some None value we call .group()
print('Phone number found: ' + mo.group())


Phone number found: 415-555-4242


In [3]:
# Grouping with parentheses
# Wish to seperate the area code from the rest of the phone number
# Add parentheses to creating groups in the regex (\d\d\d)-(\d\d\d-\d\d\d\d)
# Then use the group() match object method to grab the matching text from just one group

import re

# matches singular chunks of 3 numbers or 3-4 or both, thus selects multiple chunks
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
    
mo = phoneNumRegex.search('My number is 415-555-4242')

# 1 selects first parentheses
mo.group(1) # '415'

# 2 selects second parentheses
mo.group(2) # '555-4242'

# 0 and nothing group entire
mo.group(0) # '415-555-4242'
mo.group() # '415-555-4242'

# select all
mo.groups()

area, mainNumber = mo.groups()
print(area, mainNumber)

415 555-4242


In [4]:
# issue ocurring when having to match parentheses in the text
# escape them with a backslash

phoneNumRegex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')
mo = phoneNumRegex.search('My phone number is (415) 555-4242.')
mo.group(1) # '(415)'
mo.group(2) # '555-4242'


'555-4242'

In [6]:
# you did not close the first parentheses
re.compile(r'(\(Parentheses\)') # error

re.compile(r'(\(Parentheses\))', re.UNICODE)

In [11]:
# Matching multiple groups with the pipe
# | is a pipe, use it to match one of many expressions

heroRegex = re.compile(r'Batman|Tina Fey')


# if both are found then the first occurence of one is returned.
mo1 = heroRegex.search('Batman and Tina Fey')
mo1.group() # batman

mo2 = heroRegex.search('Tina fey and Batman')
mo2.group() # Tina Fey

# match one of several patterns

# Bat(man | mobile | copter | bat) Batman / Batmobile / Batcopter
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
mo = batRegex.search('Batmobile lost a wheel')
mo.group() #'Batmobile'

# returns the matched text in the parentheses 
mo.group(1) #'mobile'

# Use the pipe character and group parentheses, can specify several alternative patterns you would like your regex to match
# requiring the actual pipe character must backslash break it.

'mobile'

In [12]:
# Optional matching with the question mark
# Sometimes there is a pattern that you want to match only optionally, That is
# the regex should find a match regardless of whether that bits of text is there.

import re

batRegex = re.compile(r'Bat(wo)?man')

mo1 = batRegex.search('The Adventures of Batman')

mo1.group() # Batman

mo2 = batRegex.search('The adventures of Batwoman')
mo2.group() # Batwoman 


'Batwoman'