# Pattern Matching with Regular Expressions (REGEX)

In [1]:
import re

#REGEX is tricky.  There are several guides and tools online.

In [29]:
#Find words matching a pattern
text = "The quick brown fox jumps over the lazy dog"
pattern = r"\b\w{5}\b"  #Match words that are exactly 5 characters long
result = re.findall(pattern, text)
print(result)

['quick', 'brown', 'jumps']


In [31]:
#Find words matching a pattern - Words that are five letters long
text = "The quick brown fox jumps over the lazy dog"
pattern = r"\b\w{5}\b"  #Match words that are exactly 5 characters long
result = re.findall(pattern, text)
print(result)

['quick', 'brown', 'jumps']


In [33]:
#Find words matching a pattern - Words that start with "br"
text = "Brent broke the broom while sweeping in a breeze"
pattern = r"\bbr\w*\b"
words_starting_with_br = re.findall(pattern, text, flags=re.IGNORECASE)
print(words_starting_with_br)

['Brent', 'broke', 'broom', 'breeze']


In [35]:
#Find and replace
text = "Hello, world!"
pattern = r"world"
replacement = "Python"
result = re.sub(pattern, replacement, text)
print(result)

Hello, Python!


In [37]:
#Pattern validation
def is_valid_email(email):
    pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
    return re.fullmatch(pattern, email) is not None

email1 = "example@example.com"

if is_valid_email(email1):
    print("Valid email")
else:
    print("Invalid email")
    
email2 = "example@@example.com"

if is_valid_email(email2):
    print("Valid email")
else:
    print("Invalid email")

Valid email
Invalid email


In [39]:
text = "Contact us at info@example.com or support@example.org"

email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"

email_addresses = re.findall(email_pattern, text)

for email_address in email_addresses:
    print(email_address)

info@example.com
support@example.org


In [41]:
text = """
John: 123-456-7890
Jane: (987) 654-3210
Alice: 555-1234
Bob: 888-9999
"""

phone_pattern = r"\b(?:\d{3}[-.]?)?\d{3}[-.]?\d{4}\b"

phone_numbers = re.findall(phone_pattern, text)

for phone_number in phone_numbers:
    print(phone_number)

123-456-7890
654-3210
555-1234
888-9999


In [43]:
#Searching a text file for important information
#Regular expressions for email, phone number, credit card number, and social security number
email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
phone_pattern = r"\b(?:\d{3}[-.]?)?\d{3}[-.]?\d{4}\b"
credit_card_pattern = r"\b(?:\d[ -]*?){13,16}\b"
ssn_pattern = r"\b(?:\d{3}-\d{2}-\d{4})\b"

#Open the text file for reading
with open("regex.txt", "r") as file:
    text = file.read()

#Search for patterns in the text
email_addresses = set(re.findall(email_pattern, text))
phone_numbers = set(re.findall(phone_pattern, text))
credit_card_numbers = set(re.findall(credit_card_pattern, text))
social_security_numbers = set(re.findall(ssn_pattern, text))

#Print the results
print("Email addresses:", email_addresses)
print("Phone numbers:", phone_numbers)
print("Credit card numbers:", credit_card_numbers)
print("Social security numbers:", social_security_numbers)

Email addresses: {'tim@apple.com', 'elon@x.com', 'robertwrobel49@webster.edu'}
Phone numbers: {'123-456-7890', '654-3210', '555-1234', '888-9999'}
Credit card numbers: {'1234 5678 9101 1121'}
Social security numbers: {'492-91-1234'}
