# Regular Expressions

In [1]:
import re

## re.search

In [2]:
result_search = re.search("pattern", r"string containing pattern")
print(result_search)

<re.Match object; span=(18, 25), match='pattern'>


## re.sub

In [None]:
string = r"sara was able to help me find the items i needed quickly"

new_string = re.sub(r"sara", r"sarah", string)
print(new_string)

sarah was able to help me find the items i needed quickly


## Regex Syntax

In [7]:
customer_reviews = ['sam was a great help to me in the store', 
                    'the cashier was very rude to me, I think her name was eleanor', 
                    'amazing work from sadeen!', 
                    'sarah was able to help me find the items i needed quickly', 
                    'lucy is such a great addition to the team', 
                    'great service from sara she found me what i wanted'
                   ]

In [None]:
# Find only sarah's reviews but account for the spelling of sara

sarahs_reviews = []
pattern_to_find = r"sarah?" 
# the ? after r means it is an optional character to match, so our search will look for sarah and sara

for string in customer_reviews:
    if (re.search(pattern_to_find, string)):
        sarahs_reviews.append(string)

print(sarahs_reviews)

['sarah was able to help me find the items i needed quickly', 'great service from sara she found me what i wanted']


In [15]:
# Find reviews that start with the letter a
a_reviews = []
pattern_to_find = r"^a" # the ^ operator to indicates the start of a string

for string in customer_reviews:
    if (re.search(pattern_to_find, string)):
        a_reviews.append(string)

print(a_reviews)

['amazing work from sadeen!']


In [16]:
# Find reviews that end with the letter y

y_reviews = []
pattern_to_find = r"y$" # the $ operator to indicate the end of a string

for string in customer_reviews:
    if (re.search(pattern_to_find, string)):
        y_reviews.append(string)

print(y_reviews)

['sarah was able to help me find the items i needed quickly']


In [17]:
# Find reviews that contain the words needed or wanted

needwant_reviews = []
pattern_to_find = r"(need|want)ed" # the pipe operator | can be used to mean OR

for string in customer_reviews:
    if (re.search(pattern_to_find, string)):
        needwant_reviews.append(string)

print(needwant_reviews)

['sarah was able to help me find the items i needed quickly', 'great service from sara she found me what i wanted']


In [19]:
# Remove anything from the review that isn't a word or a space (i.e. remove punctuation)

no_punct_reviews = []
pattern_to_find = r"[^\w\s]" 
# [^ ] means "not", \w means word and \s means whitespace: so find anything that is not a word or a space

for string in customer_reviews:
    no_punct_string = re.sub(pattern_to_find, "", string)
    no_punct_reviews.append(no_punct_string)

print(no_punct_reviews)

['sam was a great help to me in the store', 'the cashier was very rude to me I think her name was eleanor', 'amazing work from sadeen', 'sarah was able to help me find the items i needed quickly', 'lucy is such a great addition to the team', 'great service from sara she found me what i wanted']
