In [1]:
import re

In [2]:
phone_num_regex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo = phone_num_regex.search('my number is 415-555-4242.')
print('Phone number found: ' + mo.group())

Phone number found: 415-555-4242


In [3]:
phone_num_regex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mo = phone_num_regex.search('my number is 415-555-4242.')
print(mo.group(1))
print(mo.group(2))
print(mo.group(0))
print(mo.group())

415
555-4242
415-555-4242
415-555-4242


In [4]:
mo.groups()

('415', '555-4242')

In [6]:
phone_num_regex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')
mo = phone_num_regex.search('my number is (415) 555-4242.')
print(mo.groups())

('(415)', '555-4242')


In [7]:
asdf = re.compile(r'as|df')
mo1 = asdf.search('i was doing some stuff in python with dfs')
print(mo1.group())
mo2 = asdf.search('hey oldfart, wassup?')
print(mo2.group())

as
df


In [8]:
phone_num_regex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
strings_found = phone_num_regex.findall('Cell: 415-555-9999 Work: 212-555-0000')
print(strings_found)

['415-555-9999', '212-555-0000']


In [9]:
phone_num_regex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')
tuples_found = phone_num_regex.findall('Cell: 415-555-9999 Work: 212-555-0000')
print(tuples_found)

[('415', '555', '9999'), ('212', '555', '0000')]


In [10]:
nongreedy_regex = re.compile(r'<.*?>')
mo = nongreedy_regex.search('<qw er> asdf>')
mo.group()

'<qw er>'

In [11]:
newline_regex = re.compile(r'.*', re.DOTALL)
newline_regex.search('blah blah blah\nsomething something something\n').group()

'blah blah blah\nsomething something something\n'

In [12]:
robocop = re.compile(r'robocop', re.I)
robocop.search('RoboCop is part man, part machine, all cop.').group()

'RoboCop'

In [13]:
names_regex = re.compile(r'Agent \w+')
names_regex.sub('CENSORED', 'Agent Alice gave the secret documents to Agent Bob.')

'CENSORED gave the secret documents to CENSORED.'

In [15]:
names_regex = re.compile(r'Agent (\w)\w*')
names_regex.sub(r'\1***', 'Agent Alice gave the secret documents to Agent Bob.')

'A*** gave the secret documents to B***.'

In [17]:
phone_num_regex = re.compile(r'''(
    \d{3}
    -
    \d{3}
    -
    \d{4}
)''', re.VERBOSE)

In [18]:
%run phoneandemail.py

copied to clipboard
800-420-7240
415-863-9900
415-863-9950
info@nostarch.com
media@nostarch.com
academic@nostarch.com
conferences@nostarch.com
info@nostarch.com


## practice questions

1. re.compile()
2. they contain escape characters
3. the first instance in a string matched by the regex object
4. with group() or groups()
5. the entire phone number, the area code, the rest
6. `\( \) \.`
7. whether any capture groups are specified in the regex
8. or
9. optional and nongreedy
10. '+' for 1 or more, '*' for 0 or more
11. exactly 3 times vs 3 to 5 times
12. '\d' for digits, '\w' for word characters, '\s' for whitespace charaters
13. NOT of above
14. '.*?' is the nongreedy variant
15. \w
16. re.I or re.IGNORECASE
17. any, also newlines
18. 'X drummers, X pipers, five rings, X hens'
19. allow multile regex speciifcations
20. `r'\d{1,3}(,\d{3})*'`
21. `r'[A-Z]\w* Watanabe'` but what about names like 3Jane?
22. `r'(Alice|Bob|Carol) (eats|pets|throws) (apples|cats}baseballs)\.', re.I`

In [27]:
%run datedetection.py

test text - 'valid: 22/12/1414, invalid day: 32/12/1414, invalid month: 01/14/1414, invalid year: 01/01/3000, valid leap: 29/02/2000, invalid leap: 29/02/2001'
['22/12/1414', '29/02/2000']


In [28]:
%run strongpw.py

password: password  result: failed
password: hunter2  result: failed
password: 123Abc  result: failed
password: qweR2zxcvs  result: passed


In [29]:
%run restrip.py

  as d f    
as d f
-_-_-qw--_-__-_er---__-
qw--_-__-_er
