# Advanced Regular Expressions Lab

Complete the following set of exercises to solidify your knowledge of regular expressions.

In [2]:
import re

### 1. Use a regular expression to find and extract all vowels in the following text.

In [3]:
text = "This is going to be a sentence with a good number of vowels in it."

In [4]:
# Podemos usar re.findall()
re.findall("[aeiou]", text)

['i',
 'i',
 'o',
 'i',
 'o',
 'e',
 'a',
 'e',
 'e',
 'e',
 'i',
 'a',
 'o',
 'o',
 'u',
 'e',
 'o',
 'o',
 'e',
 'i',
 'i']

In [5]:
# Podemos usar re.search() .group()
vowels = [re.search("[aeiou]", i) for i in text]
[i.group() for i in vowels if i != None]

['i',
 'i',
 'o',
 'i',
 'o',
 'e',
 'a',
 'e',
 'e',
 'e',
 'i',
 'a',
 'o',
 'o',
 'u',
 'e',
 'o',
 'o',
 'e',
 'i',
 'i']

In [6]:
# Podemos compilar el pattern que queremos
vocales = re.compile('[aeiou]')
vocales.findall(text)

['i',
 'i',
 'o',
 'i',
 'o',
 'e',
 'a',
 'e',
 'e',
 'e',
 'i',
 'a',
 'o',
 'o',
 'u',
 'e',
 'o',
 'o',
 'e',
 'i',
 'i']

### 2. Use a regular expression to find and extract all occurrences and tenses (singular and plural) of the word "puppy" in the text below.

In [7]:
text = "The puppy saw all the rest of the puppies playing and wanted to join them. I saw this and wanted a puppy of my own!"

In [8]:
# Podemos buscar las dos palabras
re.findall('puppy|puppies', text)

['puppy', 'puppies', 'puppy']

In [9]:
# Podemos buscar la raíz, que además contenga más alfanuméricos
re.findall('pupp\w+', text)

['puppy', 'puppies', 'puppy']

### 3. Use a regular expression to find and extract all tenses (present and past) of the word "run" in the text below.

In [10]:
text = "I ran the relay race the only way I knew how to run it."

In [11]:
# Podemos buscar las dos palabras
re.findall('run|ran', text)

['ran', 'run']

In [12]:
# Podemos buscar las dos letras que coinciden y las 2 que no
re.findall('r[ua]n', text)

['ran', 'run']

### 4. Use a regular expression to find and extract all words that begin with the letter "r" from the previous text.

In [13]:
# Podemos buscar las palabras que tengan una r, un espacio delante y algo detrás. Para no obtener el espacio, agrupamos
re.findall('\s(r\w+)', text)

['ran', 'relay', 'race', 'run']

### 5. Use a regular expression to find and substitute the letter "i" for the exclamation marks in the text below.

In [14]:
text = "Th!s !s a sentence w!th spec!al characters !n !t."

In [15]:
# Podemos sustituir directamente la exclamación por i
re.sub("[!]", "i", text)

'This is a sentence with special characters in it.'

### 6. Use a regular expression to find and extract words longer than 4 characters in the text below.

In [16]:
text = "This sentence has words of varying lengths."

In [17]:
# incluimos el nº de alfanuméricos entre llaves {}
re.findall('\w{5,}', text)

['sentence', 'words', 'varying', 'lengths']

In [18]:
# Podemos ir más allá e incluir un espacio delante y uno detrás para asegurar que es una palabra completa, y hacer .strip()
# para asegurarnos que cojemos la última indicamos W para el punto final, y la primera indicamos ^
for i in [re.findall('\s\w{5,}\s|^\w{5,}\s|\w{5,}\W', text)]:
    for j in i:
        print(j.strip())

sentence
words
varying
lengths.


### 7. Use a regular expression to find and extract all occurrences of the letter "b", some letter(s), and then the letter "t" in the sentence below.

In [19]:
text = "I bet the robot couldn't beat the other bot with a bat, but instead it bit me."

In [162]:
# Buscamos b, cualquier alfanuméricos repetido 1 o más veces, y "t". Sin embargo, nos devuelve el "bot" de robot
re.findall("b\w+t", text)

['bet', 'bot', 'beat', 'bot', 'bat', 'but', 'bit']

In [21]:
# debemos incluir los espacios delante y detrás, y posteriormente los quitamos con loop y .strip(). Para no obtener el espacio...
# ... agrupamos
re.findall("\s(b\w*t)\s", text)

['bet', 'beat', 'bot', 'but', 'bit']

### 8. Use a regular expression to find and extract all words that contain either "ea" or "eo" in them.

In [22]:
text = "During many of the peaks and troughs of history, the people living it didn't fully realize what was unfolding. But we all know we're navigating breathtaking history: Nearly every day could be — maybe will be — a book."


In [23]:
# Usamos or para encontrar palabars que tengan ea o eo y 0 o más alfanuméricos delante o detrás
re.findall("\w*ea\w*|\w*eo\w*", text)

['peaks', 'people', 'realize', 'breathtaking', 'Nearly']

### 9. Use a regular expression to find and extract all the capitalized words in the text below individually.

In [24]:
text = "Teddy Roosevelt and Abraham Lincoln walk into a bar."

In [25]:
# Buscamos cualquier mayúscula que vaya seguida de 1 o más alfanuméricos
re.findall("[A-Z]\w+", text)

['Teddy', 'Roosevelt', 'Abraham', 'Lincoln']

### 10. Use a regular expression to find and extract all the sets of consecutive capitalized words in the text above.

In [26]:
# Concatenamos dos palabras mayúsculas con un espacio
re.findall("[A-Z]\w+\s[A-Z]\w+", text)

['Teddy Roosevelt', 'Abraham Lincoln']

### 11. Use a regular expression to find and extract all the quotes from the text below.

*Hint: This one is a little more complex than the single quote example in the lesson because there are multiple quotes in the text.*

In [27]:
text = 'Roosevelt says to Lincoln, "I will bet you $50 I can get the bartender to give me a free drink." Lincoln says, "I am in!"'


In [28]:
re.findall('".*?"', text)

['"I will bet you $50 I can get the bartender to give me a free drink."',
 '"I am in!"']

### 12. Use a regular expression to find and extract all the numbers from the text below.

In [29]:
text = "There were 30 students in the class. Of the 30 students, 14 were male and 16 were female. Only 10 students got A's on the exam."


In [30]:
# Buscamos 1 o más digitos (ya que hay digitos que incluyen más de un digito)
re.findall('\d+', text)

['30', '30', '14', '16', '10']

### 13. Use a regular expression to find and extract all the social security numbers from the text below.

In [31]:
text = """
Henry's social security number is 876-93-2289 and his phone number is (847)789-0984.
Darlene's social security number is 098-32-5295 and her phone number is (987)222-0901.
"""

In [32]:
# Buscamos el patron: 3 dígitos + "-" + 2 dígitos + "-" + 4 dígitos
re.findall('\d{3}-\d{2}-\d{4}', text)

['876-93-2289', '098-32-5295']

### 14. Use a regular expression to find and extract all the phone numbers from the text above.

In [33]:
# Buscamos el patron: 3 dígitos entre parentesis (necesitamos backslash) + 3 dígitos + "-" + 4 dígitos
re.findall('\(\d{3}\)\d{3}-\d{4}', text)

['(847)789-0984', '(987)222-0901']

### 15. Use a regular expression to find and extract all the formatted numbers (both social security and phone) from the text above.

In [34]:
re.findall('\d{3}-\d{2}-\d{4}|\(\d{3}\)\d{3}-\d{4}', text)

['876-93-2289', '(847)789-0984', '098-32-5295', '(987)222-0901']