# Advanced Regular Expressions Lab

Complete the following set of exercises to solidify your knowledge of regular expressions.

In [1]:
import re

### 1. Use a regular expression to find and extract all vowels in the following text.

In [2]:
text = "This is going to be a sentence with a good number of vowels in it."

In [3]:
pattern_vowels = r'[aeiou]'

vowels = re.findall(pattern_vowels, text)
vowels

['i',
 'i',
 'o',
 'i',
 'o',
 'e',
 'a',
 'e',
 'e',
 'e',
 'i',
 'a',
 'o',
 'o',
 'u',
 'e',
 'o',
 'o',
 'e',
 'i',
 'i']

### 2. Use a regular expression to find and extract all occurrences and tenses (singular and plural) of the word "puppy" in the text below.

In [5]:
text_p = "The puppy saw all the rest of the puppies playing and wanted to join them. I saw this and wanted a puppy of my own!"

In [6]:
pattern_pup = r'pup.+?\b'

pup = re.findall(pattern_pup, text_p)
pup

['puppy', 'puppies', 'puppy']

### 3. Use a regular expression to find and extract all tenses (present and past) of the word "run" in the text below.

In [7]:
text_r = "I ran the relay race the only way I knew how to run it."

In [8]:
pattern_run = r'r[aeiou]n'

run = re.findall(pattern_run, text_r)
run

['ran', 'run']

### 4. Use a regular expression to find and extract all words that begin with the letter "r" from the previous text.

In [9]:
pattern_r = r'\br[a-z]+\b'

r_words = re.findall(pattern_r, text_r)
r_words

['ran', 'relay', 'race', 'run']

### 5. Use a regular expression to find and substitute the letter "i" for the exclamation marks in the text below.

In [10]:
text_i = "Th!s !s a sentence w!th spec!al characters !n !t."

In [11]:
substitute_i = re.sub(r'[!]', "i", text_i)
substitute_i

'This is a sentence with special characters in it.'

### 6. Use a regular expression to find and extract words longer than 4 characters in the text below.

In [12]:
text_4 = "This sentence has words of varying lengths."

In [13]:
pattern_4 = r'\w{5,}'

character_4 = re.findall(pattern_4, text_4)
character_4

['sentence', 'words', 'varying', 'lengths']

### 7. Use a regular expression to find and extract all occurrences of the letter "b", some letter(s), and then the letter "t" in the sentence below.

In [14]:
text_bt = "I bet the robot couldn't beat the other bot with a bat, but instead it bit me."

In [15]:
pattern_bt = r'b.+?[t]\b'

words_bt = re.findall(pattern_bt, text_bt)
words_bt

['bet', 'bot', 'beat', 'bot', 'bat', 'but', 'bit']

### 8. Use a regular expression to find and extract all words that contain either "ea" or "eo" in them.

In [16]:
text_e = "During many of the peaks and troughs of history, the people living it didn't fully realize what was unfolding. But we all know we're navigating breathtaking history: Nearly every day could be — maybe will be — a book."

In [28]:
pattern_e = r'[A-Za-z]+(?:ea|eo)+[A-Za-z](?:\w+)'

words_e = re.findall(pattern_e, text_e)
words_e

['peaks', 'people', 'realize', 'breathtaking', 'Nearly']

### 9. Use a regular expression to find and extract all the capitalized words in the text below individually.

In [17]:
text_capitalized = "Teddy Roosevelt and Abraham Lincoln walk into a bar."

In [18]:
pattern_capitalized = r'[A-Z][a-z]+'

capitalized_words = re.findall(pattern_capitalized, text_capitalized)
capitalized_words

['Teddy', 'Roosevelt', 'Abraham', 'Lincoln']

### 10. Use a regular expression to find and extract all the sets of consecutive capitalized words in the text above.

In [19]:
pattern_capitalized2 = r'([A-Z][a-z]+(?=\s[A-Z])(?:\s[A-Z][a-z]+)+)'

capitalized_words2 = re.findall(pattern_capitalized2, text_capitalized)
capitalized_words2

['Teddy Roosevelt', 'Abraham Lincoln']

### 11. Use a regular expression to find and extract all the quotes from the text below.

*Hint: This one is a little more complex than the single quote example in the lesson because there are multiple quotes in the text.*

In [20]:
text_quotes = 'Roosevelt says to Lincoln, "I will bet you $50 I can get the bartender to give me a free drink." Lincoln says, "I am in!"'

In [21]:
pattern_quotes = r'\"\w.+?\"'

quotes = re.findall(pattern_quotes, text_quotes)
quotes

['"I will bet you $50 I can get the bartender to give me a free drink."',
 '"I am in!"']

### 12. Use a regular expression to find and extract all the numbers from the text below.

In [22]:
text_numbers = "There were 30 students in the class. Of the 30 students, 14 were male and 16 were female. Only 10 students got A's on the exam."

In [23]:
pattern_numbers = r'\d+\b'

numbers = re.findall(pattern_numbers, text_numbers)
numbers

['30', '30', '14', '16', '10']

### 13. Use a regular expression to find and extract all the social security numbers from the text below.

In [24]:
text_SSN = """
Henry's social security number is 876-93-2289 and his phone number is (847)789-0984.
Darlene's social security number is 098-32-5295 and her phone number is (987)222-0901.
"""

In [25]:
pattern_SSN = r'\d+\-+\d+\-+\d{4}'

SSN = re.findall(pattern_SSN, text_SSN)
SSN

['876-93-2289', '098-32-5295']

### 14. Use a regular expression to find and extract all the phone numbers from the text below.

In [26]:
pattern_phone = r'\(\d*\)\d*\-\d*\b'

phone_numbers = re.findall(pattern_phone, text_SSN)
phone_numbers

['(847)789-0984', '(987)222-0901']

### 15. Use a regular expression to find and extract all the formatted numbers (both social security and phone) from the text below.

In [27]:
pattern_SSN_phone = r'\(*\d+\)*\d+\-*\d+\-\d+'

SSN_phone = re.findall(pattern_SSN_phone, text_SSN)
SSN_phone

['876-93-2289', '(847)789-0984', '098-32-5295', '(987)222-0901']