## 9.1. What is Regex

In [1]:
import re

pattern = '^p.*y$'
string_list = ["pathology", "biology", "geography", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)


pathology
psychology


## 9.2. Specifying Patterns Using Meta Characters

### Square Brackets []

In [2]:
import re

pattern = '.*[ab].*'
string_list = ["pathology", "nic", "jos", "biology", "geography", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)

pathology
biology
geography
mathematics


### Period (.)

In [3]:
import re

pattern = '.....'
string_list = ["pathology", "nic", "jos", "biology", "geography", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)

pathology
biology
geography
psychology
mathematics


### Carrot (^) and Dollar ($)

In [4]:
import re

pattern = '^p.*y$'
string_list = ["pathology", "biology", "geography", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)


pathology
psychology


### PLus (+) and Question Mark (?)

In [5]:
import re

pattern = '.*og+y'
string_list = ["pathology", "biology", "geography", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)

pathology
biology
psychology


In [6]:
import re

pattern = '.*at?h'
string_list = ["pathology", "biology", "geography", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)

pathology
mathematics


### Alteration (|) and Grouping ()

In [7]:
import re

pattern = '(^p)|(.*s$)'
string_list = ["pathology", "biology", "geography", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)

pathology
psychology
mathematics


### Backslash

In [8]:
import re

pattern = '25\.+.*'
string_list = ["pathology", "biology", "25.245", "" "geography", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)

25.245


### Special Sequences

In [9]:
import re

pattern = '\Apat'
string_list = ["pathology", "biology", "25.245", "" "geography", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)

pathology


In [10]:
import re

pattern = '\d+'
str = "This is 10, he is 20"
result = re.findall(pattern, str)
print(result)

['10', '20']


In [11]:
import re

pattern = '\D+'
str = "This is 10, he is 20"
result = re.findall(pattern, str)
print(result)

['This is ', ', he is ']


In [12]:
import re

pattern = '\w+'
str = "This is % 10 # he is 20"
result = re.findall(pattern, str)
print(result)

['This', 'is', '10', 'he', 'is', '20']


In [13]:
import re

pattern = '\W+'
str = "This is % 10 # he is 20"
result = re.findall(pattern, str)
print(result)

[' ', ' % ', ' # ', ' ', ' ']


## 9.3 Regular Expression Functions in Python

### 9.3.1. findall() 

In [14]:
import re

pattern = '\d+'
str = "This is 10 he is 20 and the gate is 80"
result = re.findall(pattern, str)
print(result)

['10', '20', '80']


### 9.3.2. split() 

In [15]:
import re

pattern = '\d+'
str = "This is 10 he is 20 and the gate is 80."
result = re.split(pattern, str)
print(result)

['This is ', ' he is ', ' and the gate is ', '.']


### 9.3.3. sub() 

In [16]:
import re

pattern = '\d+'
str = "This is 10 he is 20 and the gate is 80."
new_str = re.sub(pattern, 'XX', str)
print(new_str)

This is XX he is XX and the gate is XX.


In [17]:
import re

pattern = '\d+'
str = "This is 10 he is 20 and the gate is 80."
new_str = re.subn(pattern, 'XX', str)
print(new_str)

('This is XX he is XX and the gate is XX.', 3)


### 9.3.4. search()

In [18]:
import re

pattern = '\d+'
str = "This is 10 he is 20 and the gate is 80."
result = re.search(pattern, str)
print(result)

<re.Match object; span=(8, 10), match='10'>


## Exercise 9.1

**Question 1:**

The pattern '(\Ag)|(\d+)' matches

A. All the digits

B. The strings with characters `Ag`

C. Strings that start with `g` or which are digits

D. All of the above

**Answer: C**
    

**Question 2:**

In Python, the function used to replace a string by another string is:

A. replace()

B. sub()

C. substitute()

D. rep()

**Answer: B**
    
    
**Question 3:**

The escape character used to escape metacharacters within a regex patter is:

A. Forwardslash

B. Backslash

C. Alteration

D. Escape
    
**Answer: B**

## Exercise 9.2

Write a regular expression which return all words from a list that contain a carrot (^) symbol or a dollar sign ($) or a plus (+) sign.

**Solution:**

In [19]:
import re

pattern = '(.*\^.*)|(.*\$.*)|(.*\+.*)'
string_list = ["path+ology", "biolo$gy", "25", "" "geogr^aphy", "psychology", "mathematics"]

for str in string_list:
    result = re.match(pattern, str)
    if result:
        print(str)

path+ology
biolo$gy
geogr^aphy
