# RegEx in Python

In [1]:
import re

print('re: {}'.format(re.__version__))

re: 2.2.1


### search: find first location that matches within string

In [2]:
log = [
    'W/dalvikvm( 1553): threadid=1: uncaught exception',
    'E/( 1553): FATAL EXCEPTION: main',
    'E/( 1553): java.lang.StringIndexOutOfBoundsException',
    'E/( 1553):   at widget.List.makeView(ListView.java:1727)',
    'E/( 1553):   at widget.List.fillDown(ListView.java:652)',
    'E/( 1553):   at widget.List.fillFrom(ListView.java:709)'
]

In [3]:
p = re.compile('(\w+)\(ListView\.java:(\d+)')

for i in range(len(log)):
    m = p.search(log[i])
    if m != None:
        print(log[i])
        print(m[1] + ': ' + m[2] + '\n')

E/( 1553):   at widget.List.makeView(ListView.java:1727)
makeView: 1727

E/( 1553):   at widget.List.fillDown(ListView.java:652)
fillDown: 652

E/( 1553):   at widget.List.fillFrom(ListView.java:709)
fillFrom: 709



In [4]:
numbers = ['415-555-1234', '650-555-2345', '(416)555-3456', '202 555 4567', '4035555678', '1 416 555 9292', '(41)555-3456']

In [5]:
p = re.compile('\d?(\s|\()?(\d{3})(-| |\)|)(\d{3})(-| |)(\d{4})')

numbers_new = []

for i in range(len(numbers)):
    m = p.search(numbers[i])
    if m != None:
        print(numbers[i]) # m.group()
        print(m[2]+ '-' + m[4]+ '-' + m[6] + '\n')
        numbers_new.append(m[2]+ '-' + m[4]+ '-' + m[6])
        
print(numbers_new)

415-555-1234
415-555-1234

650-555-2345
650-555-2345

(416)555-3456
416-555-3456

202 555 4567
202-555-4567

4035555678
403-555-5678

1 416 555 9292
416-555-9292

['415-555-1234', '650-555-2345', '416-555-3456', '202-555-4567', '403-555-5678', '416-555-9292']


### match: match the start of the string

In [6]:
p = re.compile('\d.+-\d{3}-')
for i in range(len(numbers)):
    m = p.match(numbers[i])
    if m != None:
        print(m.start())
        print(m.end())
        print(m.span())
        print(m.group())
        print(numbers[i] + '\n') # m.group()

0
8
(0, 8)
415-555-
415-555-1234

0
8
(0, 8)
650-555-
650-555-2345



In [7]:
numbers_string = 'Either call me on 415-555-1234 or (416)555-3456. If my wife takes the call please call me on 202 555 4567. Btw. this is not a phone number: (41)555-3456'

### findall: find all matches in string

In [8]:
p = re.compile('\d?(\s|\()?(\d{3})(-| |\)|)(\d{3})(-| |)(\d{4})')
m = p.findall(numbers_string)

numbers_in_string = []

for i in range(len(m)):
    numbers_in_string.append(m[i][1] + '-' + m[i][3] + '-' + m[i][5])
    
numbers_in_string

['415-555-1234', '416-555-3456', '202-555-4567']

### finditer: create callable iterator 

In [9]:
p = re.compile('\d?(\s|\()?(\d{3})(-| |\)|)(\d{3})(-| |)(\d{4})')
iterator = p.finditer(numbers_string)

for match in iterator:
    print(match.group())

 415-555-1234
(416)555-3456
 202 555 4567
