# Project: Text patterns matching with Regular Expression


# 9. The Syntax of Regular Expressions

# 9.1 Finding Text Patterns with Regular Expressions

In [13]:
import re
pattern = re.compile(r'\d{3}-\d{3}-\d{3}')
match = pattern.search('My number is 415-555-424')
match.group()

'415-555-424'

# 9.2.1. Grouping with Parentheses

In [21]:
import re
pattern = re.compile (r'(\d{5})\s(\d\d)\s(\d{3})')
match = pattern.search ("My phone number is 98493 92 897")
print('First part of the phone number %s' %(match.group(1)))
print('Second part of the phone number %s' % (match.group(2)))
print(match.group(3))

First part of the phone number 98493
Second part of the phone number 92
897


In [95]:
import re
pattern = re.compile (r'(\d{5})\s\d\d\s\d{3}')
match = pattern.findall ("My phone number is 98493 92 897")
print(match)

['98493']


# 9.2.2. Using Escape Characters

In [1]:
import re
pattern = re.compile (r'(\(\d{5}\))\s(\(\d\d\))\s(\(\d{3}\))')
match = pattern.search ("My phone number is (98493) (92) (897)")
print('First part of the phone number %s' %(match.group(1)))
print('Second part of the phone number %s' % (match.group(2)))
print(match.group(3))

First part of the phone number (98493)
Second part of the phone number (92)
(897)


# 9.2.3. Matching Characters from Alternate Groups

In [15]:
import re
pattern = re.compile (r'(Bejing|New York|Paris|Moscow|London)')
match = pattern.search('I am going to Paris tomorrow. Tom is arriving from London')
print(match.group(1))

Paris


# 9.2.3. Returning All Matches

In [11]:
import re
pattern = re.compile (r'(Bejing|New York|Paris|Moscow|London)')
match = pattern.findall('Xin is flyig from Bejing. I am going to Paris tomorrow. Tom is arriving from London. Pavel is from Moscow')
for city in match:
    print (city)

Bejing
Paris
London
Moscow


# 10. Qualifier Syntax: What Characters to Match

# 10.1. Using Character Classes and Negative Character Classes

In [17]:
import re
vowel_pattern = re.compile(r'[aeiouAEIOU]')
match = vowel_pattern.findall('Humpty Dumpty sat on a wall. Humpty Dumpty had a great fall. All the king\'s horses and all the king\'s men')
# match = vowel_pattern.findall('XXbgt')
if (len(match) == 0):
    print ("No vowels found")
else:
    print (print(match))

['u', 'u', 'a', 'o', 'a', 'a', 'u', 'u', 'a', 'a', 'e', 'a', 'a', 'A', 'e', 'i', 'o', 'e', 'a', 'a', 'e', 'i', 'e']
None


In [29]:
vowel_pattern = re.compile(r'[^aeiouAEIOU ]') # consonant + space pattern
# match = vowel_pattern.findall('Humpty Dumpty sat on a wall. Humpty Dumpty had a great fall. All the king\'s horses and all the king\'s men')
match = vowel_pattern.findall('I LovE YOu')
if (len(match) == 0):
    print ("No consonant found")
else:
    print (print(match))

['L', 'v', 'Y']
None


# 10.2. Using Shorthand Character Classes

In [72]:
pattern = re.compile(r'\w+\s+(\d+)')
match = pattern.findall('Miller  56, Arthur 66, Lewis 88, John#33')
print("Age values are: %s" % match)

Age values are: ['56', '66', '88']


In [57]:
pattern = re.compile (r'First Name: (\w+)')
match = pattern.findall ('First Name: Sabyasachi Age: 48')
print(match)

['Sabyasachi']


In [None]:
pattern = re.compile (r'Last Name: (\w+\s\D+)')
match = pattern.findall('Last Name: Sinéad O’Connor')
print(match)
pattern = re.compile (r'Last Name: (\S+\s\D+)')
match = pattern.findall('Last Name: Jean-Paul Sartre')
print(match)

['Sinéad O’Connor']
['Jean-Paul Sartre']


# 10.3. Matching Everything with the Dot Character

In [55]:
at_re = re.compile(r'.at')
match = at_re.findall('cat on the hat sat on the mat')
print(match)


['cat', 'hat', 'sat', 'mat']
