In [1]:
import re

In [2]:
names_file = open("names.txt", encoding="utf-8")

In [3]:
data = names_file.read()

In [4]:
names_file.close()

In [5]:
print(re.match(r'Love', data))

In [6]:
print(re.search(r'Kenneth', data))

In [7]:
print(re.findall(r'\(?\d{3}\)?-?\s?\d{3}-\d{4}', data))

['(555) 555-5555', '(555) 555-5554', '(555) 555-5543', '555-555-5552', '555 555-5551', '(555) 555-5553', '(555) 555-4444']


In [8]:
print(re.findall(r'\w*, \w+', data))

['Love, Kenneth', 'Teacher, Treehouse', 'McFarland, Dave', 'Teacher, Treehouse', 'Arthur, King', 'King, Camelot', 'Österberg, Sven', 'Governor, Norrbotten', ', Tim', 'Enchanter, Killer', 'Carson, Ryan', 'CEO, Treehouse', 'Doctor, The', 'Lord, Gallifrey', 'Exampleson, Example', 'Example, Example', 'Obama, Barack', 'President, United', 'Chalkley, Andrew', 'Teacher, Treehouse', 'Vader, Darth', 'Lord, Galactic', 'Sanz, María', 'Minister, Spanish']


In [9]:
print(re.findall(r'[-\w\d+.]+@[-\w\d.]+', data))

['kenneth@teamtreehouse.com', 'dave@teamtreehouse.com', 'king_arthur@camelot.co.uk', 'governor@norrbotten.co.se', 'tim@killerrabbit.com', 'ryan@teamtreehouse.com', 'doctor+companion@tardis.co.uk', 'me@example.com', 'president.44@us.gov', 'andrew@teamtreehouse.com', 'darth-vader@empire.gov', 'mtfvs@spain.gov']


In [10]:
print(re.findall(r'\b[trehous]{9}\b', data, re.I))

['Treehouse', 'Treehouse', 'Treehouse', 'Treehouse']


In [11]:
print(re.findall(r'''
    \b@[-\w\d.]* #First a word boundary, an @, and then any number of characters
    [^gov\t]+ #Ignore 1+ instances of the letters 'g', 'o', or 'v' and a tab.
    \b #Match another word boundary
''', data, re.VERBOSE|re.I))

['@teamtreehouse.com', '@teamtreehouse.com', '@camelot.co.uk', '@norrbotten.co.se', '@killerrabbit.com', '@teamtreehouse.com', '@tardis.co.uk', '@example.com', '@us.', '@teamtreehouse.com', '@empire.', '@spain.']


In [12]:
print(re.findall(r"""
    \b[-\w]*, #Find a word boundary, 1+ hyphens or word characters, and a comma
    \s #find 1 whitespace
    [-\w ]+ #1+ hyphes and characters and explicit space
    [^\t\n] #Ignore tabs and newlines
""", data, re.X))

['Love, Kenneth', 'Teacher, Treehouse', 'McFarland, Dave', 'Teacher, Treehouse', 'Arthur, King', 'King, Camelot', 'Österberg, Sven-Erik', 'Governor, Norrbotten', 'Enchanter, Killer Rabbit Cave', 'Carson, Ryan', 'CEO, Treehouse', 'Doctor, The', 'Lord, Gallifrey', 'Exampleson, Example', 'Example, Example Co.', 'Obama, Barack', 'President, United States of America', 'Chalkley, Andrew', 'Teacher, Treehouse', 'Vader, Darth', 'Lord, Galactic Empire', 'Sanz, María Teresa', 'Minister, Spanish Govt.']


In [13]:
line = re.compile(r'''
    ^(?P<name>(?P<last>[-\w ]*),\s(?P<first>[-\w ]+))\t #last and first names
    (?P<email>[-\w\d+.]+@[-\w\d.]+)\t #email
    (?P<phone>\(?\d{3}\)?-?\s?\d{3}-\d{4})?\t #phone numbers
    (?P<job>[\w\s]+,\s[\w\s.]+)\t? #Job, Company
    (?P<twitter>@[\w\d]+)?$ #twitter
''', re.X|re.M)

In [14]:
print(re.search(line, data).groupdict())

{'name': 'Love, Kenneth', 'last': 'Love', 'first': 'Kenneth', 'email': 'kenneth@teamtreehouse.com', 'phone': '(555) 555-5555', 'job': 'Teacher, Treehouse\t', 'twitter': '@kennethlove'}


In [15]:
print(line.search(data).groupdict())

{'name': 'Love, Kenneth', 'last': 'Love', 'first': 'Kenneth', 'email': 'kenneth@teamtreehouse.com', 'phone': '(555) 555-5555', 'job': 'Teacher, Treehouse\t', 'twitter': '@kennethlove'}


In [16]:
for match in line.finditer(data):
    print(match.group('name'))

Love, Kenneth
McFarland, Dave
Arthur, King
Österberg, Sven-Erik
, Tim
Carson, Ryan
Doctor, The
Exampleson, Example
Obama, Barack
Chalkley, Andrew
Vader, Darth
Fernández de la Vega Sanz, María Teresa


In [17]:
for match in line.finditer(data):
    print('{first} {last} <{email}>'.format(**match.groupdict()))

Kenneth Love <kenneth@teamtreehouse.com>
Dave McFarland <dave@teamtreehouse.com>
King Arthur <king_arthur@camelot.co.uk>
Sven-Erik Österberg <governor@norrbotten.co.se>
Tim  <tim@killerrabbit.com>
Ryan Carson <ryan@teamtreehouse.com>
The Doctor <doctor+companion@tardis.co.uk>
Example Exampleson <me@example.com>
Barack Obama <president.44@us.gov>
Andrew Chalkley <andrew@teamtreehouse.com>
Darth Vader <darth-vader@empire.gov>
María Teresa Fernández de la Vega Sanz <mtfvs@spain.gov>
