In [2]:
def isPhoneNumber(text):
    if len(text) != 12:
        return False
    for i in range(0, 3):
        if not text[i].isdecimal():
            return False
    if text[3] != '-':
        return False
    for i in range(4,7):
        if not text[i].isdecimal():
            return False
    if text[7] != '-':
        return False
    for i in range(8, 12):
        if not text[i].isdecimal():
            return False
    return True

print('Is 415-555-4242 a phone number?')
print(isPhoneNumber('415-555-4242'))
print('Is Moshi moshi a phone number?')
print(isPhoneNumber('Moshi moshi'))

Is 415-555-4242 a phone number?
True
Is Moshi moshi a phone number?
False


In [3]:
message = 'Call me at 415-555-1011 tomorrow. 415-555-9999 is my office.'
for i in range(len(message)):
    chunk = message[i:i+12]
    if isPhoneNumber(chunk):
        print('Phone number found: ' + chunk)
print('Done')

Phone number found: 415-555-1011
Phone number found: 415-555-9999
Done


In [4]:
import re
phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-d\d\d\d')

In [7]:
import re

phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') 
mo = phoneNumRegex.search('My number is 415-555-4242.')

if mo:
    print('Phone number found: ' + mo.group())
else:
    print('No phone number found.')


Phone number found: 415-555-4242


In [8]:
import re

phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo = phoneNumRegex.search('Call me at 212-999-1234 tomorrow.')

if mo:
    print('Phone number found: ' + mo.group())
else:
    print('No phone number found.')


Phone number found: 212-999-1234


In [9]:
mo = phoneNumRegex.search('My number is (212) 999-1234.')

if mo:
    print('Phone number found: ' + mo.group())
else:
    print('No phone number found.')


No phone number found.


In [10]:
mo = phoneNumRegex.search('Numbers: 415-555-1010, 212-999-1234')

if mo:
    print('Phone number found: ' + mo.group())
else:
    print('No phone number found.')

# .search() returns only the first match. Even though there are two valid phone numbers, 
# only the first (415-555-1010) is captured.

Phone number found: 415-555-1010


In [14]:
import re

# Updated regex to match both formats: 415-555-4242 and (415) 555-4242
phoneNumRegex = re.compile(r'\(?\d{3}\)?[-\s]?\d{3}-\d{4}')

text = '''Call me at 415-555-1010 or (212) 999-1234. Office: 
646 888-9999 is not valid for this pattern.'''

matches = phoneNumRegex.findall(text)

if matches:
    print('Phone numbers found:')
    for match in matches:
        print(match)
else:
    print('None phone numbers found')

Phone numbers found:
415-555-1010
(212) 999-1234
646 888-9999


In [17]:
import re

# Flexible regex to match various phone number formats
phoneNumRegex = re.compile(
    r'''(
        (\+?\d{1,2}[-.\s]?)?         # optional country code, e.g., +1-
        (\(?\d{3}\)?[-.\s]?)         # area code with or without parentheses
        \d{3}[-.\s]?                 # first 3 digits
        \d{4}                        # last 4 digits
    )''', re.VERBOSE)

text = '''
Here are some numbers:
415-555-4242
(212) 555 1212
+1-646-555-9999
+14155551234
123.456.7890
Not a number: 555-abc-1234
'''

matches = phoneNumRegex.findall(text)

if matches:
    print('Phone numbers found:')
    for match in matches:
        print(match[0]) # match[0] is the full match string
else:
    print('No phone numbers found')

Phone numbers found:
415-555-4242
(212) 555 1212
+1-646-555-9999
+14155551234
123.456.7890


In [1]:
import re

# Regex pattern to match different phone number formats
phoneNumRegex = re.compile(
    r'''(
        (\+?\d{1,2}[-.\s]?)?      # optional country code
        (\(?\d{3}\)?[-.\s]?)      # area code
        \d{3}[-.\s]?
        \d{4}
    )''', re.VERBOSE)

text = '''
Here are some numbers:
415-555-4242
(212) 555 1212
+1-646-555-9999
+14155551234
123.456.7890
Not a number: 555-abc-1234
'''

matches = phoneNumRegex.findall(text)

if matches:
    print('Phone numbers found')
    for match in matches:
        raw_number = match[0] # full matched string
        digits_only = re.sub(r'\D', '', raw_number) # remove non-digits
        print(f'{raw_number} -> {digits_only}')
else:
    print('No phone numbers found')
    

Phone numbers found
415-555-4242 -> 4155554242
(212) 555 1212 -> 2125551212
+1-646-555-9999 -> 16465559999
+14155551234 -> 14155551234
123.456.7890 -> 1234567890


In [4]:
import re

def extract_phone_numbers(text):
    phoneNumRegex = re.compile(
        r'''(
            (\+?\d{1,2}[-.\s]?)?          # Country code
            (\(?\d{3}\)?|\b\d{3})[-.\s]?  # Area code with word boundary
            \d{3}[-.\s]?
            \d{4}
            \b                            # Word boundary
        )''', re.VERBOSE)
    
    matches = phoneNumRegex.findall(text)
    return [re.sub(r'\D', '', match[0]) for match in matches if match[0].strip()]
    
text = '''
Call me at 415-555-4242 or (212) 555 1212.
My backup is +1-646-555-9999 or +14155551234.
'''

numbers = extract_phone_numbers(text)

for number in numbers:
    print(number)

4155554242
2125551212
16465559999
14155551234


In [5]:
import re

def extract_valid_phone_numbers(text):
    phone_regex = re.compile(r'''
        \b  # Word boundary
        (
            (?:\+?\d{1,4}[-.\s]?)?      # Optional country code
            (?:\(?\d{2,5}\)?[-.\s]?)    # Area code
            \d{3}[-.\s]?\d{4}           # Standard 3+4 local number
            |                           # OR
            \d{4}[-.\s]?\d{3}[-.\s]?\d{3}  # UK-style 4+3+3
        )
        \b  # Word boundary
    ''', re.VERBOSE)

    matches = phone_regex.findall(text)
    valid_numbers = []
    
    for number in matches:
        digits = re.sub(r'\D', '', number)
        # Basic validation
        if 10 <= len(digits) <= 15:  # Reasonable length check
            valid_numbers.append(digits)
    
    return valid_numbers

text = '''
Call me at 415-555-4242 or (212) 999-8888.
My UK number is +44 20 7946 0958.
Office: 213.555.6789.
Invalid: 123-456-789, 5551234567.
'''

valid_numbers = extract_valid_phone_numbers(text)
for number in valid_numbers:
    print(number)

4155554242
2129998888
2135556789
5551234567


In [34]:
import re

# Sample text with different phone number formats
text = '''
Call me at 415-555-4242 tomorrow.
Or at (212) 999-8888 on Friday.
My UK number is +44 20 7946 0958.
Office line: 213.555.6789.
'''

# Corrected Verbose regex pattern for flexible phone number formats
phone_regex = re.compile(r'''
    (
        (\+?\d{1,4}[-.\s]?)?      # Optional country code e.g. +44 or 1
        (\(?\d{2,5}\)?[-.\s]?)    # Area code: 2 to 5 digits
        \d{3,4}[-.\s]?\d{3,4}     # Local number with 3 to 4 digits in both parts
    )
''', re.VERBOSE)

# Find all matches
matches = phone_regex.findall(text)

# Extract just the full matched phone number string (first item in each tuple)
raw_phone_numbers = [match[0] for match in matches]

# Strip non-digit characters
digit_only_numbers = [re.sub(r'\D', '', number) for number in raw_phone_numbers]

# Output results
print("Raw Matches:")
for number in raw_phone_numbers:
    print(f"- {number}")

print("\nDigit-Only Matches:")
for number in digit_only_numbers:
    print(f"- {number}")


Raw Matches:
- 415-555-4242
- (212) 999-8888
- +44 20 7946 0958
- 213.555.6789

Digit-Only Matches:
- 4155554242
- 2129998888
- 442079460958
- 2135556789


In [6]:
text1 = "My German number is +49 30 901820 and my French number is +33 1 23 45 67 89"

# Corrected Verbose regex pattern for flexible phone number formats
phone_regex = re.compile(r'''
    (
        (\+?\d{1,4}[-.\s]?)?            # Optional country code
        (\(?\d{1,5}\)?[-.\s]?)          # Area code
        (\d{2,4}[-.\s]?){2,5}           # 2 to 5 groups of 2–4 digits
    )
''', re.VERBOSE)

# Find all matches
matches = phone_regex.findall(text1)

# Extract just the full matched phone number string (first item in each tuple)
raw_phone_numbers = [match[0] for match in matches]

# Strip non-digit characters
digit_only_numbers = [re.sub(r'\D', '', number) for number in raw_phone_numbers]

# Output results
print("Raw Matches:")
for number in raw_phone_numbers:
    print(f"- {number}")

print("\nDigit-Only Matches:")
for number in digit_only_numbers:
    print(f"- {number}")

Raw Matches:
- +49 30 901820 
- +33 1 23 45 67 89

Digit-Only Matches:
- 4930901820
- 33123456789


In [8]:
import re

# List of 5 example strings with phone numbers in different formats
texts = [
    "Call me at +1 (415) 555-2671",
    "Office: 020 7946 0958",
    "My old number was +91-22-1234-5678",
    "Reach me at 0033 1 45 67 89 10",
    "Support: (800)555.1234 or (800) 555 5678"
]

# Regex pattern for flexible international phone number formats
phone_regex = re.compile(r'''
    (
        (\+?\d{1,4}[-.\s]?)?   # Optional country code
        (\(?\d{1,5}\)?[-.\s]?) # Area code with optional parenthesis
        (\d{2,4}[-.\s]?){2,5}  # 2 to 5 groups of 2-4 digits
    )
''', re.VERBOSE)

# Process each example string
for i, text in enumerate(texts, start = 1):
    matches = phone_regex.findall(text)
    raw_phone_numbers = [match[0] for match in matches]
    digit_only_numbers = [re.sub(r'\D', '', number) for number in raw_phone_numbers]

    print(f"\nExample {i}:{text}")
    print("Raw Matches:")
    for number in raw_phone_numbers:
        print(f"- {number}")
    print("Digit-Only Matches:")
    for number in digit_only_numbers:
        print(f"- {number}")


Example 1:Call me at +1 (415) 555-2671
Raw Matches:
- +1 (415) 555-2671
Digit-Only Matches:
- 14155552671

Example 2:Office: 020 7946 0958
Raw Matches:
- 020 7946 0958
Digit-Only Matches:
- 02079460958

Example 3:My old number was +91-22-1234-5678
Raw Matches:
- +91-22-1234-5678
Digit-Only Matches:
- 912212345678

Example 4:Reach me at 0033 1 45 67 89 10
Raw Matches:
- 0033 1 45 67 89 10
Digit-Only Matches:
- 0033145678910

Example 5:Support: (800)555.1234 or (800) 555 5678
Raw Matches:
- (800)555.1234 
- (800) 555 5678
Digit-Only Matches:
- 8005551234
- 8005555678


In [11]:
import re

texts = [
    "Emergency hotline: +61 3 9123 4567",
    "Client number: 0044 20 7946 1234",
    "Text me at 555-678-9012 (US)",
    "Main line: (02) 9876 5432",
    "International: +81-3-1234-5678 ext. 99"
]

phone_regex = re.compile(r'''
    (
    (\+?\d{1,4}[-.\s]?)?    # Optional country code
    (\(?\d{1,5}\)?[-.\s]?)  # Area code
    (\d{2,4}[-.\s]?){2,5}   # 2 to 5 groups of 2–4 digits
    )
''', re.VERBOSE)

for i, text in enumerate(texts, start = 1):
    matches = phone_regex.findall(text)
    raw_phone_numbers = [match[0] for match in matches]
    digit_only_numbers = [re.sub(r'\D','', number) for number in raw_phone_numbers]

    print(f"\nExample {i}:{text}")
    print("Raw matches:")
    for number in raw_phone_numbers:
        print(f"- {number}")
    print("Digit-Only Matches:")
    for number in digit_only_numbers:
        print(f"- {number}")
        


Example 1:Emergency hotline: +61 3 9123 4567
Raw matches:
- +61 3 9123 4567
Digit-Only Matches:
- 61391234567

Example 2:Client number: 0044 20 7946 1234
Raw matches:
- 0044 20 7946 1234
Digit-Only Matches:
- 00442079461234

Example 3:Text me at 555-678-9012 (US)
Raw matches:
- 555-678-9012 
Digit-Only Matches:
- 5556789012

Example 4:Main line: (02) 9876 5432
Raw matches:
- (02) 9876 5432
Digit-Only Matches:
- 0298765432

Example 5:International: +81-3-1234-5678 ext. 99
Raw matches:
- +81-3-1234-5678 
Digit-Only Matches:
- 81312345678


In [13]:
texts = [
    "Call center (India): +91-22-1234-5678",
    "Helpdesk: (021) 555 1234 ext. 204",
    "Office line: +1 (415) 555.2671 x99"
]

phone_regex = re.compile(r'''
    (
        (\+?\d{1,4}[-.\s]?)?    # (1) Optional country code, e.g. +91
        (\(?\d{1,5}\)?[-.\s]?)  # (2) Area code, e.g. (415)
        (\d{2,4}[-.\s]?){2,5}   # (3) 2-5 digit groups, e.g. 1234-5678
    )
''', re.VERBOSE)

for i, text in enumerate(texts, start=1):
    matches = phone_regex.findall(text)
    raw_phone_numbers = [match[0] for match in matches]
    digit_only_numbers = [re.sub(r'\D', '', number) for number in raw_phone_numbers]

    print(f"\nExample {i}: {text}")
    print("Raw Matches:")
    for number in raw_phone_numbers:
        print(f"- {number}")
    print("Digit-Only Matches:")
    for number in digit_only_numbers:
        print(f"- {number}")


Example 1: Call center (India): +91-22-1234-5678
Raw Matches:
- +91-22-1234-5678
Digit-Only Matches:
- 912212345678

Example 2: Helpdesk: (021) 555 1234 ext. 204
Raw Matches:
- (021) 555 1234 
Digit-Only Matches:
- 0215551234

Example 3: Office line: +1 (415) 555.2671 x99
Raw Matches:
- +1 (415) 555.2671 
Digit-Only Matches:
- 14155552671


In [38]:
phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo = phoneNumRegex.search('My number is 415-555-4242.')
print('Phone number found: ' + mo.group())

Phone number found: 415-555-4242


In [40]:
import re

# Step 1: create Regex object
phone_regex = re.compile(r'\d{3}-\d{3}-\d{4}') # Matches 123-456-7890

# Step 2: Search a string
text = "Call me at 415-555-4242 or 212-999-8888."
match = phone_regex.search(text) # Returns first match

# Step 3: Extract matched text
print(match.group())

415-555-4242


In [41]:
import re

# Step 1: Regex for emails
email_regex = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')

# Step 2: Search a string
text = "Contact: user@example.com or support@company.co.uk"
match = email_regex.search(text)

# Step 3: Get the match
print(match.group())

user@example.com


In [14]:
import re

# Step 1: FIXED Regex for dates (escaped forward slash)
date_regex = re.compile(r'\b\d{2}/\d{2}/\d{4}\b')  

# Step 2: Search a string
text = "Event on 12/25/2023 and 01/01/2024"
match = date_regex.search(text)

# Step 3: Extract the date
print(match.group())  # Output: 12/25/2023

12/25/2023


In [16]:
import re

# Step 1: Case-insensitive regex
word_regex = re.compile(r'python', re.IGNORECASE)

# Step 2: Search a string
text = "Python is great. PYTHON is versatile."
match = word_regex.search(text)

# Step 3: Get the match
print(match.group())

Python


In [17]:
import re

# Step 1: Regex for hashtags
hashtage_regex = re.compile(r'#\w+')   # Matches #hashtage

# Step 2: Find all matches
text = "Trending: #Python #DataScience #AI"
matches = hashtage_regex.findall(text)

# Step 3: Print all
print(matches)

['#Python', '#DataScience', '#AI']


In [19]:
text = "My email is user123@example.com"
match = re.search(r"\w+@\w+\.\w+", text)
print(match.group())

user123@example.com


In [20]:
text = "Emails: a@a.com, b@b.org, c@c.net"
emails = re.findall(r'\w+@\w+\.\w+', text)
print(emails)

['a@a.com', 'b@b.org', 'c@c.net']


In [22]:
text = "Name: John Doe, Age: 32"
match = re.search(r'Name: (\w+ \w+), Age: (\d+)', text)
print(match.group(1))
print(match.group(2))

John Doe
32


In [23]:
text = "The event is on 2025-05-01"
match = re.search(r"(\d{4})-(\d{2})-(\d{2})", text)
print(match.groups())

('2025', '05', '01')


In [26]:
text = "Prices: $5.50, $10.00, $3.25"
prices = re.findall(r'\$(\d+\.\d{2})', text)
print(prices)

['5.50', '10.00', '3.25']


In [27]:
text = "cat bat rat"
words = re.findall(r"(?:c|b|r)at", text)
print(words)

['cat', 'bat', 'rat']


In [32]:
text = "Call me at (123) 456-7890"
match = re.search(r"\((\d{3})\) (\d{3})-(\d{4})", text)
print(match.groups())

('123', '456', '7890')


In [30]:
text = "Color or Colour"
match = re.findall(r"Colou?r", text)
print(match)

['Color', 'Colour']


In [61]:
text = "Temp: 22C"
match = re.search(r"Temp: (?P<value>\d+)(?P<unit>[CF])", text)
print(match.group("value"))
print(match.group("unit"))

22
C


In [33]:
import re

text = "Today's highs: Paris 22C, London 18C, New York 72F, Tokyo 25C"

# Pattern with named groups for value and unit
temp_regex = re.compile(r"(?P<value>\d+)(?P<unit>[CF])")

# Use finditer() to get all matches as Match objects
matches = temp_regex.finditer(text)

# Print each temperature and unit
for match in matches:
    value = match.group("value")
    unit = match.group("unit")
    print(f"Temperature: {value}°{unit}")


Temperature: 22°C
Temperature: 18°C
Temperature: 72°F
Temperature: 25°C


In [35]:
import re

text = "Today's highs: Paris 22C, London 18C, New York 72F, Tokyo 25C"

# Regex to capture city and temperature (value + unit)
temp_city_regex = re.compile(r"(?P<city>[A-Za-z ]+?)\s+(?P<value>\d+)(?P<unit>[CF])")

# Use finditer to extract structured data
matches = temp_city_regex.finditer(text)

# Print results
for match in matches:
    city = match.group("city").strip()
    value = match.group("value")
    unit = match.group("unit")
    print(f"{city}: {value}°{unit}")


Paris: 22°C
London: 18°C
New York: 72°F
Tokyo: 25°C


In [41]:
import re

text = "Participants: Alice (23), Bob (31), Charlie (28), Dana (35)"

# Regex to match a name followed by an age in parentheses
name_age_regex = re.compile(r"(?P<name>[A-Za-z]+)\s*\((?P<age>\d+)\)")


# Find all matches using finditer
matches = name_age_regex.finditer(text)

# Print result
for match in matches:
    name = match.group("name")
    age = match.group("age")
    print(f"{name} is {age} years old")



Alice is 23 years old
Bob is 31 years old
Charlie is 28 years old
Dana is 35 years old


In [42]:
import re
pattern = re.compile(r'\d+') # Matches one or more digits
match = pattern.search('The price is 100 dollars')
print(match.group())

100


In [43]:
# Match a word
pattern = re.compile(r'cat')
match = pattern.search('The cat sat on the mat')
print(match.group())

cat


In [44]:
# Digits only
pattern = re.compile(r'\d{4}')
match = pattern.search('Year: 2025')
print(match.group())                     

2025


In [45]:
# Extract email
pattern = re.compile(r'\w+@\w+\.\w+')
match = pattern.search('My email is user@example.com')
print(match.group())

user@example.com


In [47]:
# Phone Number Format
pattern = re.compile(r'\d{3}-\d{3}-\d{4}')
match = pattern.search('Call me at 123-456-7890')
print(match.group())

123-456-7890


In [51]:
# Use of Dot (any character)
pattern = re.compile(r'c.t')
match = pattern.search('My cat cut is cute')
print(match.group())

cat


In [55]:
# Add capturing groups (if you need them)
import re

pattern = re.compile(r'c(.?)t')
matches = pattern.finditer('My cat cut is cute')

for match in matches:
    print(f"Full match: {match.group(0)}, Middle letter: {match.group(1)}")


Full match: cat, Middle letter: a
Full match: cut, Middle letter: u
Full match: cut, Middle letter: u


In [56]:
import re

text = "cat cut coat cute circuit comet court chart cheat"

pattern = re.compile(r'\bc\w*t\b')

matches = pattern.findall(text)

print("Words starting with 'c' and ending with 't':")
for word in matches:
    print("-", word)

Words starting with 'c' and ending with 't':
- cat
- cut
- coat
- circuit
- comet
- court
- chart
- cheat


In [57]:
# Using groups
pattern = re.compile(r'(\d+)-(\d+)-(\d+)')
match = pattern.search('123-456-7890')
print(match.group(2))

456


In [58]:
# Optional Character
pattern = re.compile(r'colou?r')
match = pattern.search('color or colour?')
print(match.group())
# The u? means the "u" is optional.


color


In [71]:
# Match start of string
pattern = re.compile(r'^Hello')
match = pattern.search('Hello world')
print(match.group())

Hello


In [59]:
# Match start of string
pattern = re.compile(r'^Hello')
match = pattern.search('Well, Hello world')
print(match.group())

AttributeError: 'NoneType' object has no attribute 'group'

In [60]:
# Match End of String
pattern = re.compile(r'end$')
match = pattern.search('This is the end')
print(match.group())

end


In [75]:
# Match Word Boundary
pattern = re.compile(r'\bcat\b')
match = pattern.search('a catfish is not a cat')
print(match.group())
# \b ensures "cat" is a separate word.

cat


In [63]:
# Extract First Name and Last Name
pattern = re.compile(r'(\w+) (\w+)')
match = pattern.search('Jane Doe')
print(match.group(1), match.group(2))

Jane Doe


In [79]:
# Match Repeated Words
pattern = re.compile(r'\b(\w+)\s+\1\b')
match = pattern.search('This is is a test')
print(match.group())
# Captures and matches repeated words.

is is


In [64]:
# Match Decimal numbers
pattern = re.compile(r'\d+\.\d+')
match = pattern.search('The value is 3.14')
print(match.group())

3.14


In [65]:
# Match a date
pattern = re.compile(r'\d{2}/\d{2}/\d{4}')
match = pattern.search('Today is 03/05/2025')
print(match.group())

03/05/2025


In [83]:
# Match Uppercase words
pattern = re.compile(r'\b[A-Z]{2,}\b')
match = pattern.search('I love the USA and NASA')
print(match.group())

USA


In [85]:
# Match HTML tags
pattern = re.compile(r'<(\w+)>.*?</\1>')
match = pattern.search('<b>Bold Text</b>')
print(match.group())

<b>Bold Text</b>


In [69]:
import re

html = """
<div class="product">
    <h2>Super Blender 3000</h2>
    <p>High-powered kitchen blender.</p>
</div>
<div class="product">
    <h2>Eco Water Bottle</h2>
    <p>BPR-free and sustainable.</p>
</div>
"""

# Pattern to match the content inside <h2> tags
pattern = re.compile(r'<(h2)>.*?</\1>')

matches = pattern.findall(html)
matches_full = pattern.finditer(html)

for match in matches_full:
    print(match.group())

<h2>Super Blender 3000</h2>
<h2>Eco Water Bottle</h2>


In [70]:
pattern = re.compile(r'<(\w+)>.*?</\1>')
text = '<i>Italic Text</i>'
match = pattern.search(text)

print(match.group())

<i>Italic Text</i>


In [71]:
pattern = re.compile(r'<(\w+)>.*?</\1>')
text = '<div>Content inside div</div>'
match = pattern.search(text)
print(match.group())

<div>Content inside div</div>


In [68]:
pattern = re.compile(r'<(\w+)>.*?</\1>')
text = '<span>Hello world</span>'
match = pattern.search(text)
print(match.group())

<span>Hello world</span>


In [72]:
# re.search() — Find First Email Address
text = "Contact me at alice@example.com for more info."
match = re.search(r'\w+@\w+\.\w+', text)
print(match.group())

alice@example.com


In [74]:
# re.findall() — Extract All Emails
text = "Send to john@abc.com, jane@xyz.org and foo@bar.net"
emails = re.findall(r'\w+@\w+\.\w+', text)
print(emails)

['john@abc.com', 'jane@xyz.org', 'foo@bar.net']


In [75]:
# re.search() — Grouping Phone Parts
text = "Phone: (123) 456-7890"
match = re.search(r'\((\d{3})\)\s*(\d{3})-(\d{4})', text)
print(match.groups())

('123', '456', '7890')


In [76]:
# re.findall() — All Words Starting with Capital
text = "Alice and Bob went to London and Paris"
caps = re.findall(r'\b[A-Z][a-z]*\b', text)
print(caps)

['Alice', 'Bob', 'London', 'Paris']


In [78]:
# re.search() — Match a Date (Group Day, Month, Year)
text = "Date of birth: 12-05-2000"
match = re.search(r'(\d{2})-(\d{2})-(\d{4})', text)
print(match.group(2))

05


In [79]:
# re.findall() — All Floating Point Numbers
text = "Values: 3.14, 0.99, and -12.5"
numbers = re.findall(r'-?\d+\.\d+', text)
print(numbers)

['3.14', '0.99', '-12.5']


In [80]:
# re.search() — Username from Email
text = "Contact: user123@example.com"
match = re.search(r'(\w+)@\w+\.\w+', text)
print(match.group(1))

user123


In [81]:
# re.findall() — Hashtags
text = "#fun #sun #PythonRocks"
tags = re.findall(r'#\w+', text)
print(tags)

['#fun', '#sun', '#PythonRocks']


In [82]:
# re.findall() — All HTML Tags
html = "<h1>Title</h1><p>Text</p>"
tags = re.findall(r'<[^>]+>', html)
print(tags)

['<h1>', '</h1>', '<p>', '</p>']


In [83]:
# re.search() — IP Address
text = "Ping 192.168.1.1"
match = re.search(r'(\d{1,3}\.){3}\d{1,3}', text)
print(match.group())


192.168.1.1


In [84]:
# re.findall() — Words Ending in ‘ing’
text = "running jumping swimming coding"
ings = re.findall(r'\b\w+ing\b', text)
print(ings)

['running', 'jumping', 'swimming', 'coding']


In [100]:
# re.search() — Extract First Word
text = "Hello there!"
match = re.search(r'^\w+', text)
print(match.group())  # Hello


Hello


In [101]:
# re.findall() — Match All Quoted Strings
text = 'He said "hello" and she replied "hi"'
quotes = re.findall(r'"(.*?)"', text)
print(quotes)

['hello', 'hi']


In [85]:
# re.findall() — Extract Domains from URLs
text = "Visit http://example.com and https://openai.com and http://blog.example.com"
domains = re.findall(r'https?://([\w\.]+)', text)
print(domains)

['example.com', 'openai.com', 'blog.example.com']


In [86]:
# re.search() — Extract Time (HH:MM)
text = "Meeting at 14:30"
match = re.search(r'(\d{2}):(\d{2})', text)
print(match.groups())

('14', '30')


In [87]:
# re.findall() — All Negative or Positive Integers
text = "-3 +7 -42 0"
numbers = re.findall(r'-?\d+', text)
print(numbers)

['-3', '7', '-42', '0']


In [89]:
# re.findall() — Repeated Letters
text = "Look at that treeee"
#repeats = re.findall(r'(.)\1+', text)
repeats = re.findall(r'((.)\2+)', text)

print(repeats)

[('oo', 'o'), ('eeee', 'e')]


In [170]:
import re

text = "This is is a test. The the result should match match repeated words."

# Regex pattern to match repeated words (case-insensitive)
pattern = r'\b(\w+)\s+\1\b'

# Use re.findall with re.IGNORECASE
repeats = re.findall(pattern, text, re.IGNORECASE)

print(repeats)


['is', 'The', 'match']


In [171]:
# re.findall() — File Extensions
text = "Files: report.pdf, data.csv, image.jpeg"
exts = re.findall(r'\.\w+', text)
print(exts)

['.pdf', '.csv', '.jpeg']


In [172]:
text = "Follow me @openai"
match = re.search(r'@(\w+)', text)
print(match.group(1))  # openai

openai


In [173]:
text = "Data science is awesome!"

words = re.findall(r'\b\w+\b', text)
print(words)  # ['Data', 'science', 'is', 'awesome']


['Data', 'science', 'is', 'awesome']


In [174]:
# Match All Numbers in Text
text = "There are 3 cats, 4 dogs, and 12 birds."
numbers = re.findall(r'\d+', text)
print(numbers)

['3', '4', '12']


In [175]:
# Find All Capitalized Words
text = "Alice went to New York in April."
caps = re.findall(r'\b[A-Z][a-z]*\b', text)
print(caps)

['Alice', 'New', 'York', 'April']


In [176]:
# Match All Email Addresses
text = "Contact us at help@example.com or admin@site.org"
emails = re.findall(r'\b[\w.-]+@[\w.-]+\.\w+\b', text)
emails = re.findall(r'\b[\w.-]+@[\w.-]+\.\w+\b', text)
print(emails)

['help@example.com', 'admin@site.org']


In [178]:
# Extract Hex Color Codes
text = "Colors: #ff0000, #00ff00, #0000ff"
colors = re.findall(r'#[0-9a-fA-F]{6}', text)
print(colors)

['#ff0000', '#00ff00', '#0000ff']


In [179]:
# Find All Words Longer Than 5 Letters
text = "Data science is fascinating and powerful"
long_words = re.findall(r'\b\w{6,}\b', text)
print(long_words)

['science', 'fascinating', 'powerful']


In [180]:
# Extract All Hashtags
text = "I love #Python and #MachineLearning!"
hashtags = re.findall(r'#\w+', text)
print(hashtags)

['#Python', '#MachineLearning']


In [183]:
# Find Repeated Words
text = "This is is a test test of repeated repeated words."
repeats = re.findall(r'\b(\w+)\s+\1\b', text)
print(repeats)

['is', 'test', 'repeated']


In [184]:
# Extract Time Strings (e.g., 12:30)
text = "The meeting is at 12:30 and lunch is at 14:00."
times = re.findall(r'\b\d{1,2}:\d{2}\b', text)
times = re.findall(r'\b\d{1,2}:\d{2}\b', text)
print(times)

['12:30', '14:00']


In [185]:
# Get All Words in Quotation Marks
text = 'He said "yes", then she said "no".'
quoted = re.findall(r'"(.*?)"', text)
print(quoted)

['yes', 'no']


In [188]:
# Match IPv4 Addresses
text = "Localhost IP is 127.0.0.1 and router is 192.168.1.1"
#ips = re.findall(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', text)
ips = re.findall(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', text)
print(ips)

['127.0.0.1', '192.168.1.1']


In [190]:
import re
text = "Localhost IP is 127.0.0.1 and router is 192.168.1.1"
ips = re.findall(r'\b(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\b', text)

for ip in ips:
    print(".".join(ip))


127.0.0.1
192.168.1.1


In [193]:
# Strict IPv4 Validation
import re
text = "Valid: 192.168.1.1, Invalid: 256.512.999.0"
ips = re.findall(
    r'\b(?:(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}'
    r'(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\b', 
    text)
print(ips)

['192.168.1.1']


In [195]:
import re
import ipaddress

text = "Valid: 192.168.1.1, Invalid: 256.512.999.0"

# Step 1: Extract full IPs using strict IPv4 pattern
potential_ips = re.findall(
    r'\b(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)'
    r'(?:\.(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)){3}\b',
    text
)

# Step 2: Filter only valid IPs using ipaddress
valid_ips = [
    ip for ip in potential_ips
    if ipaddress.ip_address(ip)
]

print(valid_ips)  # ['192.168.1.1']


['192.168.1.1']


In [197]:
# Email Address Extraction
text = "Contact: user@example.com, invalid@.com, support@company.co.uk"
emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-za-z0-9.-]+\.[A-Za-z]{2,}\b', text)
print(emails)

['user@example.com', 'support@company.co.uk']


In [198]:
# Credit Card Number Masking
text = "Cards: 4111-1111-1111-1111, 5500 0000 0000 0004"
masked = re.sub(r'\b(?:\d{4}[- ]?){3}\d{4}\b', '****-****-****-****', text)
print(masked)
                

Cards: ****-****-****-****, ****-****-****-****


In [199]:
# URL extraction
text = "Visit https://example.com/path or http://test.site"
urls = re.findall(r'https?://[^\s]+', text)
print(urls)  # Output: ['https://example.com/path', 'http://test.site']

['https://example.com/path', 'http://test.site']


In [202]:
# Password Strength Checker
password = "SecureP@ss123"
if re.fullmatch(r'(?=.*[A-Z])(?=.*[a-z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$', password):
    print("Strong password")

Strong password


In [203]:
# Matching URLs with Port Numbers
text = "Server: http://localhost:8080, DB: postgres://db1:5432"
urls = re.findall(r'\b\w+://[^\s/:?#]+(?:\:\d+)?(?:/[^\s]*)?', text)

print(urls)


['http://localhost:8080', 'postgres://db1:5432']


In [204]:
# Extracting Hashtags and Mentions
text = "Trending: #Python @regex_team #100DaysOfCode"
tags = re.findall(r'(?:#|@)(\w+)', text)
print(tags)

['Python', 'regex_team', '100DaysOfCode']


In [205]:
text = "Trending: #Python @regex_team #100DaysOfCode"
tags = re.findall(r'(?:#|@)\w+', text)
print(tags)

['#Python', '@regex_team', '#100DaysOfCode']


In [208]:
# Finding Currency Values
text = "Price: $19.99, €30, ¥5000"
currencies = re.findall(r'[$€¥]\d+(?:\.\d{2})?', text, re.UNICODE)
print(currencies)

['$19.99', '€30', '¥5000']


In [119]:
# Splitting CamelCase Words
text = "camelCaseExample XMLHttpRequest"
words = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)
print(words)

camel Case Example XMLHttp Request


In [127]:
# Validating ISBNs (10 or 13-digit)
import re

text = "ISBNs: 978-3-16-148410-0, 0-306-40615-2"

pattern = r'''
    \b
    (?: 
        \d{3}[- ]?\d{1,5}[- ]?\d{1,7}[- ]?\d{1,7}[- ]?\d      # ISBN-13
        |
        \d{1,5}[- ]?\d{1,7}[- ]?\d{1,7}[- ]?[\dX]             # ISBN-10
    )
    \b
'''

matches = re.finditer(pattern, text, re.VERBOSE)
isbns = [match.group(0) for match in matches]
print(isbns)



['978-3-16-148410-0', '0-306-40615-2']


In [129]:
import re

text = "ISBNs: 978-3-16-148410-0, 0-306-40615-2"

def validate_isbn(text):
    pattern = r'\b(?:(?:\d[- ]?){12}\d|(?:\d[- ]?){9}[\dX])\b'
    matches = re.finditer(pattern, text)
    valid_isbns = []
    
    for match in matches:
        isbn = match.group(0)
        cleaned = isbn.replace("-", "").replace(" ", "")
        if len(cleaned) == 13 and is_valid_isbn13(cleaned):
            valid_isbns.append(isbn)
        elif len(cleaned) == 10 and is_valid_isbn10(cleaned):
            valid_isbns.append(isbn)
    
    return valid_isbns

def is_valid_isbn13(isbn):
    total = sum((int(d) if i % 2 == 0 else int(d)*3) for i, d in enumerate(isbn[:-1]))
    check = (10 - total % 10) % 10
    return check == int(isbn[-1])

def is_valid_isbn10(isbn):
    total = sum((i + 1) * (10 if d == 'X' else int(d)) for i, d in enumerate(isbn))
    return total % 11 == 0

print(validate_isbn(text))


['978-3-16-148410-0', '0-306-40615-2']


In [209]:
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mo = phoneNumRegex.search('My number is 415-555-4242.')
print(mo.group(1))
print(mo.group(2))
print(mo.group(0))
print(mo.group())
print(mo.groups())
areaCode, mainNumber = mo.groups()
print(areaCode)
print(mainNumber)

415
555-4242
415-555-4242
415-555-4242
('415', '555-4242')
415
555-4242


In [210]:
phoneNumRegex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')
mo = phoneNumRegex.search('My phone number is (415) 555-4242.')
print(mo.group(1))
print(mo.group(2))

(415)
555-4242


In [212]:
heroRegex = re.compile (r'Batman|Tina Fey')
mo1 = heroRegex.search('Batman and Tina Fey')
print(mo1.group())
mo2 = heroRegex.search('Tina Fey and Batman')
print(mo2.group())

Batman
Tina Fey


In [213]:
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
mo = batRegex.search('Batmobile lost a wheel')
print(mo.group())
print(mo.group(1))

Batmobile
mobile


In [214]:
# re.compile Compiles a regular expression string into a Regex object.
# can reuse this object to run multiple matches (like search(), findall(), or finditer()).

phone_pattern = re.compile(r'\d{3}-\d{4}')
match = phone_pattern.search("Call 555-1234")  # like re.search


In [215]:
# re.search(pattern, text) or compiled.search(text)
# Searches the entire string for the first match only.
# Returns a match object or None.

import re
match = re.search(r'\d{3}-\d{4}', "Call 555-1234")
print(match.group())  # → '555-1234'


555-1234


In [216]:
# re.finditer(pattern, text) or compiled.finditer(text)
# Returns an iterator over all non-overlapping matches in the string.

import re
for match in re.finditer(r'\d+', "Age: 18, Room: 205"):
    print(match.group())  # prints '18' then '205'


18
205


In [217]:
# Example 1: Phone Number Variations
phoneRegex = re.compile(r'(\d{3})-(\d{3}-\d{4})')
mo = phoneRegex.search('My number is 415-555-4242')
print(mo.group())
print(mo.group(1))
print(mo.group(2))

415-555-4242
415
555-4242


In [218]:
# Example 2: Optional Title Prefix
nameRegex = re.compile(r'(Mr\.|Mrs\.|Ms\.)\s+(\w+)')
mo = nameRegex.search('Mrs. Robinson')
print(mo.group())
print(mo.group(1))
print(mo.group(2))

Mrs. Robinson
Mrs.
Robinson


In [219]:
# Example 3: Date Components
dateRegex = re.compile(r'(\d{2})/(\d{2})/(\d{4})')
mo = dateRegex.search('12/31/2023')
print(mo.groups())
print(mo.group(3))

('12', '31', '2023')
2023


In [220]:
# Example 4: Non-Capturing Group
verbRegex = re.compile(r'(?:walk|run|jump)ed')
mo = verbRegex.search('He jumped over')
print(mo.group())

jumped


In [221]:
# Example 5: Nested Groups
emailRegex = re.compile(r'((\w+)@(\w+\.\w+))')
mo = emailRegex.search('user@example.com')
print(mo.groups())
print(mo.group(0))
print(mo.group(1))
print(mo.group(2))
print(mo.group(3))

('user@example.com', 'user', 'example.com')
user@example.com
user@example.com
user
example.com


In [222]:
# Example 6: Password Strength Validation
pass_regex = re.compile(r'^(?=.*[A-Z])(?=.*\d)(?=.*[!@#$%^&*]).{8,}$')
mo = pass_regex.search('SecureP@ss1')
print(bool(mo))

True


In [223]:
# Example 7: HTML Tag extraction
html_regex = re.compile(r'<([a-z]+)[^>]*>(.*?)<\/\1>')
mo = html_regex.search('<div class="header">Hello</div>')
print(mo.group(1))
print(mo.group(2))

div
Hello


In [224]:
import re

html = '<div class="main">Hello World</div>'
pattern = re.compile(r'<([a-z]+)[^>]*>(.*?)</\1>')
match = pattern.search(html)

print(match.group(1))  # div
print(match.group(2))  # Hello World


div
Hello World


In [230]:
# Example 8: Log File Parsing
log_regex = re.compile(r'(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) - (\w+): (.*)')
mo = log_regex.search('2023-12-25 08:30:45 - ERROR: File not found')
print(mo.groups())


('2023-12-25', '08:30:45', 'ERROR', 'File not found')


In [231]:
# Example 9: Version Number Comparison
ver_regex = re.compile(r'v?(\d+)\.(\d+)\.(\d+)(?:-(\w+))?')
mo = ver_regex.search('v2.18.5-beta')
print(mo.groups())

('2', '18', '5', 'beta')


In [232]:
# Example 10: Sentence Tokenization
sent_regex = re.compile(r'([A-Z][^.!?]*[.!?])')
mo = sent_regex.findall('Hello! How are you? I am fine.')
print(mo)

['Hello!', 'How are you?', 'I am fine.']


In [233]:
# Example 11: XML/HTML Attribute Extraction
import re

#attr_regex = re.compile(r'<(\w+)\s+([a-z-]+)="([^"]+)"')
attr_regex = re.compile(r'<(\w+)\s+(\w+)="([^"]+)"')
mo = attr_regex.search('<img src="cat.jpg" alt="A cat">')

print(mo.group(1))  # 'img' (tag name)
print(mo.group(2))  # 'src' (1st attribute name)
print(mo.group(3))  # 'cat.jpg' (1st attribute value)


img
src
cat.jpg


In [234]:
import re

html = '<img src="cat.jpg" alt="A cat">'
tag_name = re.search(r'<(\w+)', html).group(1)
attrs = re.findall(r'(\w+)="([^"]+)"', html)

print("Tag name:", tag_name)
print("Attributes:", attrs)


Tag name: img
Attributes: [('src', 'cat.jpg'), ('alt', 'A cat')]


In [237]:
import re

lines = [
    "Contact us at support@example.com.",
    "Send it to admin@domain.org or hello@world.net.",
    "Invalid email here: test@.com"
]

email_pattern = re.compile(r'[\w\.-]+@[\w\.-]+\.\w+')

for line in lines:
    matches = email_pattern.findall(line)
    print(matches)

['support@example.com']
['admin@domain.org', 'hello@world.net']
[]


In [238]:
import re

numbers = [
    "415-555-1234",
    "1234567890",
    "(212) 555-6789",
    "invalid number"
]

phone_pattern = re.compile(r'(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}')

for number in numbers:
    if phone_pattern.fullmatch(number):
        print(f"Valid: {number}")
    else:
        print(f"Invalid: {number}")

Valid: 415-555-1234
Valid: 1234567890
Valid: (212) 555-6789
Invalid: invalid number


In [239]:
import re

html_lines = [
    "<p>Hello <strong>World</strong></p>",
    "<div class='note'>Reminder</div>",
    "<a href='#'>Click here</a>"
]

tag_pattern = re.compile(r'<[^>]+>')

for line in html_lines:
    cleaned = tag_pattern.sub('', line)
    print(cleaned)

Hello World
Reminder
Click here


In [159]:
import re

# Basic use
mo = re.search(r'\d+', "User123")
print(mo.group())  # '123'

# Find all matches (needs defined text!)
text = "Contact us at info@example.com and support@openai.com"
emails = re.findall(r'[\w\.-]+@[\w\.-]+', text)

print(emails)  # ['info@example.com', 'support@openai.com']

# Extract groups
m = re.search(r'(\d+)-(\d+)', "2024-05")
print(m.group(1))  # '2024'
print(m.group(2))  # '05'


123
['info@example.com', 'support@openai.com']
2024
05


In [240]:
text = "Location: 25.038, 121.564; Backup: 24.986, 121.548"

pattern = re.compile(r'-?\d{1,3}\.\d+,\s*-?\d{1,3}\.\d+')

coords = pattern.findall(text)
print("Coordinates found:", coords)

Coordinates found: ['25.038, 121.564', '24.986, 121.548']


In [242]:
text = "Damage assessed on 2023-11-04. Follow-up on 2024-01-12."

pattern = re.compile(r'\b\d{4}-\d{2}-\d{2}\b')

dates = pattern.findall(text)
print("Dates found:", dates)

Dates found: ['2023-11-04', '2024-01-12']


In [244]:
text = "Estimated losses: $120M. Secondary losses: $2.5B."

pattern = re.compile(r'\$\d+(?:\.\d+)?[MB]?')

losses = pattern.findall(text)
print("Losses:", losses)

Losses: ['$120M', '$2.5B']


In [246]:
text = "Zone: FL-12A. Backup Zone: TX-05C. Nearby Zone: CA-07."

pattern = re.compile(r'\b[A-Z]{2}-\d{2}[A-Z]?\b')

zones = pattern.findall(text)
print("Zone IDs found:", zones)

Zone IDs found: ['FL-12A', 'TX-05C', 'CA-07']


In [248]:
text = "ALERT: Flood level HIGH. Code: RED. Next alert: MODERATE."

pattern = re.compile(r'\b(HIGH|MODERATE|SEVERE|LOW|RED|YELLOW)\b')

alerts = pattern.findall(text)
print("Alert levels found:", alerts)

Alert levels found: ['HIGH', 'RED', 'MODERATE']


In [249]:
# Example 12: Multi-line Comment Stripper

code = '''/* This is a 
multi-line comment */ int x = 5;'''
comment_regex = re.compile(r'/\*(.*?)\*/', re.DOTALL)
print(comment_regex.sub('', code))  # ' int x = 5;'

 int x = 5;


In [250]:
# Example 13: URL Query Parameter Parser
url_regex = re.compile(r'(\w+)=([^&]+)')
params = dict(url_regex.findall('page=2&size=10&sort=desc'))
print(params)

{'page': '2', 'size': '10', 'sort': 'desc'}


In [251]:
# Example 14: Nested Parentheses Matcher
paren_regex = re.compile(r'\(([^()]*(?:\([^()]*\)[^()]*)*)\)')
mo = paren_regex.search('3 * (2 + (5-1))')
print(mo.group(1))  # Output: '2 + (5-1)'

2 + (5-1)


In [252]:
def extract_all_parentheses(text):
    stack = []
    result = []

    for i, char in enumerate(text):
        if char == '(':
            stack.append(i)  # Push position of '(' onto the stack
        elif char == ')':
            if stack:
                start = stack.pop()
                result.append(text[start:i+1])  # Capture from '(' to ')'

    # Optional: return innermost to outermost
    return sorted(result, key=lambda x: text.index(x), reverse=True)

expr = '3 * (2 + (5 - (1 + 2)))'
matches = extract_all_parentheses(expr)
print(matches)


['(1 + 2)', '(5 - (1 + 2))', '(2 + (5 - (1 + 2)))']


In [253]:
# Example 15: Email Header Parser
header_regex = re.compile(
    r'^(From|To|Subject):\s*(.+(?:\n\s+.+)*)',
    re.MULTILINE
)
text = '''From: user@domain.com
Subject: Hello
   This spans
   multiple lines'''
print(header_regex.findall(text))


[('From', 'user@domain.com'), ('Subject', 'Hello\n   This spans\n   multiple lines')]


In [254]:
import re

config_regex = re.compile(
    r'^(Name|Description|Version):\s*(.+(?:\n\s+.+)*)',
    re.MULTILINE
)

text = '''Name: Flood Model v2
Description: This model simulates
   regional flood risk using
   rainfall and terrain data
Version: 1.3'''

print(config_regex.findall(text))


[('Name', 'Flood Model v2'), ('Description', 'This model simulates\n   regional flood risk using\n   rainfall and terrain data'), ('Version', '1.3')]


In [255]:
bib_regex = re.compile(
    r'^(Author|Title|Year):\s*(.+(?:\n\s+.+)*)',
    re.MULTILINE
)

text = '''Author: Smith, J.
Title: Climate Risk and Adaptation
   A Review of Global Models
Year: 2022'''

print(bib_regex.findall(text))


[('Author', 'Smith, J.'), ('Title', 'Climate Risk and Adaptation\n   A Review of Global Models'), ('Year', '2022')]


In [257]:
log_regex = re.compile(
    r'^(Timestamp|Message|Level):\s*(.+(?:\n\s+.+)*)',
    re.MULTILINE
)

text = '''Timestamp: 2024-11-20 10:30
Message: Severe flooding reported
   across multiple zones.
   Response team dispatched.
Level: CRITICAL'''

print(log_regex.findall(text))

[('Timestamp', '2024-11-20 10:30'), ('Message', 'Severe flooding reported\n   across multiple zones.\n   Response team dispatched.'), ('Level', 'CRITICAL')]


In [258]:
# Example 16: Extract Phone Numbers
re.findall(r'\d{3}-\d{3}-\d{4}', 'Call 415-555-1234 or 212-555-5678')  
# Output: ['415-555-1234', '212-555-5678']  

['415-555-1234', '212-555-5678']

In [259]:
# Example 17: Simple Email Validation
bool(re.match(r'\b\w+@\w+\.\w{2,}\b', 'test@example.com'))  # True  

True

In [260]:
# Example 18: Remove HTML Tags
re.sub(r'<[^>]+>', '', '<p>Hello <b>World</b></p>')  # 'Hello World'  

'Hello World'

In [261]:
# Example 19: Extract Dates (MM/DD/YYYY)
re.findall(r'\b\d{2}/\d{2}/\d{4}\b', 'Dates: 12/25/2023, 01/01/2024')

['12/25/2023', '01/01/2024']

In [262]:
# Example 20: Split on Multiple Delimiters
re.split(r'[,;]\s*','apple,banana; cherry')

['apple', 'banana', 'cherry']

In [263]:
# Example 21: Find Capitalized Words
re.findall(r'\b[A-Z][a-z]+\b', 'New York and London')

['New', 'York', 'London']

In [264]:
# Example 22: Extract Hashtags
re.findall(r'#\w+', 'Trending: #Python #DataScience')

['#Python', '#DataScience']

In [265]:
# Example 23: Remove Extra Whitespace
re.sub(r'\s+', ' ', 'Too   many   spaces')

'Too many spaces'

In [267]:
# Example 24: Match URLs
re.findall(r'https?://\S+', 'Visit https://example.com')

['https://example.com']

In [268]:
# Example 25: Validate Passwords
bool(re.match(r'^(?=.*[A-Z])(?=.*\d).{8,}$', 'Secure123'))

True

In [269]:
# Example 26: Extract prices
re.findall(r'\$\d+\.\d{2}', 'Cost: $19.99, $5.50')

['$19.99', '$5.50']

In [270]:
# Example 27: Find repeated words
re.findall(r'\b(\w+)\s+\1\b', 'the the quick brown fox')

['the']

In [271]:
# Example 28: Match Hex Color Codes
re.findall(r'#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})', '#FF5733 #abc')  # ['FF5733', 'abc']  

['FF5733', 'abc']

In [272]:
# Example 29: Extract twitter handles
re.findall(r'@\w+', 'Follow @user1 and @user2')  # ['@user1', '@user2']  

['@user1', '@user2']

In [273]:
# Example 30: Parse Log Levels
re.findall(r'(INFO|WARN|ERROR)','ERROR: File not found')

['ERROR']

In [274]:
# Example 31: Match IP Addresses
re.findall(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b','IP: 192.168.1.1')

['192.168.1.1']

In [275]:
# Example 32: Extract File Extensions
re.search(r'\.(\w+)$', 'document.pdf').group(1)  # 'pdf'  

'pdf'

In [276]:
# Example 33: Remove Punctuation
re.sub(r'[^\w\s]','','Hello, world!')

'Hello world'

In [277]:
# Example 34: Match Simple Dates (YYYY-MM-DD)
re.findall(r'\d{4}-\d{2}-\d{2}','2023-12-25, 2024-01-01')

['2023-12-25', '2024-01-01']

In [231]:
# Example 35: Validate usernames
bool(re.match(r'^[a-zA-Z0-9_]{3,16}$','user_123'))

True

In [282]:
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
mo = batRegex.search('Batmobile lost a wheel')
print(mo.group())
print(mo.group(1))

Batmobile
mobile


In [284]:
batRegex = re.compile(r'Bat(wo)?man')

mo1 = batRegex.search('The adventures of Batman')
print(mo1.group())

mo2 = batRegex.search('The adventures of Batwoman')
print(mo2.group())

Batman
Batwoman


In [286]:
phoneRegex = re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')
mo1 = phoneRegex.search('My number is 415-555-4242')
print(mo1.group())

mo2 = phoneRegex.search('mt number is 555-4242')
print(mo2.group())

415-555-4242
555-4242


In [289]:
batRegex = re.compile(r'Bat(wo)*man')
mo1 = batRegex.search('The adventure of Batman')
print(mo1.group())

mo2 = batRegex.search('The adventures of Batwoman')
print(mo2.group())

mo3 = batRegex.search('The adventures of Batwowowowoman')
print(mo3.group())

Batman
Batwoman
Batwowowowoman


In [291]:
batRegex = re.compile(r'Bat(wo)+man')
mo1 = batRegex.search('The Adventures of Batwoman')
print(mo1.group())


mo2 = batRegex.search('The Adventures of Batwowowowoman')
print(mo2.group())


mo3 = batRegex.search('The Adventures of Batman')
print(mo3 == None)

Batwoman
Batwowowowoman
True


In [297]:
import re

haRegex = re.compile(r'(Ha){3}', re.IGNORECASE)
mo1 = haRegex.search('Hahaha')
if mo1:
    print(mo1.group())  # Output: Hahaha
else:
    print('No match')

mo2 = haRegex.search('Ha')
print(mo2 == None)

Hahaha
True


In [298]:
greedyHaRegex = re.compile(r'(Ha){3,5}')
mo1 = greedyHaRegex.search('HaHaHaHaHa')
print(mo1.group())

nongreedyHaRegex = re.compile(r'(Ha){3,5}?')
mo2 = nongreedyHaRegex.search('HaHaHaHa')
print(mo2.group())

HaHaHaHaHa
HaHaHa


In [299]:
phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo = phoneNumRegex.search('Cell: 415-555-9999 Work: 212-555-0000')
mo.group()

'415-555-9999'

In [302]:
phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000')


['415-555-9999', '212-555-0000']

In [304]:
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')
phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000')

[('415', '555', '9999'), ('212', '555', '0000')]

In [305]:
xmasRegex = re.compile(r'\d+\s\w+')
xmasRegex.findall('12 drummers, 11 pipers, 10 lords, 9 ladies, 8 maids, 7 swans, 6 geese, 5 rings, 4 birds, 3 hens, 2 doves, 1 partridge')

['12 drummers',
 '11 pipers',
 '10 lords',
 '9 ladies',
 '8 maids',
 '7 swans',
 '6 geese',
 '5 rings',
 '4 birds',
 '3 hens',
 '2 doves',
 '1 partridge']

In [306]:
vowelRegex = re.compile(r'[aeiouAEIOU]')
vowelRegex.findall('RoboCop eats babt food. BABY FOOD')

['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o', 'A', 'O', 'O']

In [307]:
consonantsRegex = re.compile(r'[^aeiouAEIOU]')
consonantsRegex.findall('RoboCop eats babt food. BABY FOOD')

['R',
 'b',
 'C',
 'p',
 ' ',
 't',
 's',
 ' ',
 'b',
 'b',
 't',
 ' ',
 'f',
 'd',
 '.',
 ' ',
 'B',
 'B',
 'Y',
 ' ',
 'F',
 'D']

In [3]:
import re

text = "My phone number is 123-456-7890."

# Example 1: Finding a literal string
match1 = re.search('phone', text)
if match1:
    print(f"Match found: {match1.group()}")

# Example 2: Using \d to find digits
match2 = re.search(r'\d\d\d-\d\d\d-\d\d\d\d', text) # r' raw string for backslashes
if match2:
    print(f"Phone number found: {match2.group()}")
    print(f"Starts at: {match2.start()}, Ends at: {match2.end()}")
    print(f"Span: {match2.span()}")

# Example 3: Using . wildcard (briefly illustrate limitation if not careful)
text2 = "cat bat hat"
match3 = re.search(r'.at', text2)
if match3:
    print(f"Pattern '.at' found: {match3.group()}")

Match found: phone
Phone number found: 123-456-7890
Starts at: 19, Ends at: 31
Span: (19, 31)
Pattern '.at' found: cat


In [4]:
text2 = "cat bat hat" 

# Using \s to find whitespace
match_space = re.search(r'\s', text2)
if match_space:
    print(f"Pattern '\\s' found: '{match_space.group()}'")
    print(f"Starts at: {match_space.start()}, Ends at: {match_space.end()}")
    print(f"Span: {match_space.span()}")

Pattern '\s' found: ' '
Starts at: 3, Ends at: 4
Span: (3, 4)


In [5]:
import re

text = "The numbers are 10, 25, and 300. Also, 5."

# Example 1: Find all digits
numbers1 = re.findall(r'\d', text) # Finds individual digits
print(f"Individual digits: {numbers1}")

# Example 2: Find all sequences of one or more digits (using +)
numbers2 = re.findall(r'\d+', text)
print(f"All numbers: {numbers2}")

# Example 3: Finding all words (using \w+)
words = re.findall(r'\w+', text)
print(f"All words: {words}")

# Example 4: Using . to find patterns (e.g., three-letter words ending with 's')
text2 = "cats dogs birds frogs"
threes_s = re.findall(r'\w\ws', text2)
print(f"Three-letter words ending in 's': {threes_s}")

Individual digits: ['1', '0', '2', '5', '3', '0', '0', '5']
All numbers: ['10', '25', '300', '5']
All words: ['The', 'numbers', 'are', '10', '25', 'and', '300', 'Also', '5']
Three-letter words ending in 's': ['ats', 'ogs', 'rds', 'ogs']


In [7]:
sample_text = "Contact us at info@example.com or support@company.org. You can also reach out to alice.smith@mail.net for more details."

# Task 1: Find all email addresses (simplified pattern)
# Pattern explanation:
# \w+    : One or more word characters (for username part)
# @      : Literal '@' symbol
# \w+    : One or more word characters (for domain name)
# \.     : Literal '.' (escaped because '.' is a special regex character)
# \w{2,3}: Two or three word characters (for top-level domain like com, org, net)
email_pattern = r'\w+@\w+\.\w{2,3}'
found_emails = re.findall(email_pattern, sample_text)
print(f"Found Emails: {found_emails}")

# Fix: Include '.' in the character class for the username part
# [\w.]+ means match one or more word characters OR literal dots
email_pattern_fixed = r'[\w.]+@\w+\.\w{2,3}' # Changed \w+ to [\w.]+
found_emails_fixed = re.findall(email_pattern_fixed, sample_text)
print(f"Fixed Found Emails: {found_emails_fixed}")

Found Emails: ['info@example.com', 'support@company.org', 'smith@mail.net']
Fixed Found Emails: ['info@example.com', 'support@company.org', 'alice.smith@mail.net']


In [8]:
# Write a regular expression that matches a string representing a simple date in the format MM/DD/YYYY.
# Example: "01/25/2023"

import re
date_regex = re.compile(r'\d\d/\d\d/\d\d\d\d')
mo = date_regex.search('Today is 01/25/2023 and it\'s a good day.')
print(mo.group() if mo else "No match")
# Expected Output: 01/25/2023

01/25/2023


In [10]:
# Using re.findall(), extract all the numbers (sequences of one or more digits) from the following string:
# "I have 3 apples, 12 oranges, and 200 grapes."

import re
text = "I have 3 apples, 12 oranges, and 200 grapes."
numbers = re.findall(r'\d+', text)
print(numbers)
# Expected Output: ['3', '12', '200']

['3', '12', '200']


In [11]:
# Write a regex that matches phone numbers in the format XXX-XXX-XXXX or (XXX) XXX-XXXX. Assume X is a digit.

import re
phone_regex = re.compile(r'(\d{3}-\d{3}-\d{4})|(\(\d{3}\) \d{3}-\d{4})')
text1 = "My number is 555-123-4567."
text2 = "Call me at (123) 456-7890 please."
mo1 = phone_regex.search(text1)
mo2 = phone_regex.search(text2)
print(mo1.group() if mo1 else "No match 1")
print(mo2.group() if mo2 else "No match 2")

555-123-4567
(123) 456-7890


In [12]:
# Explain the difference between re.search() and re.match().

import re
text = "hello world"
print(re.search('world', text)) # <re.Match object; span=(6, 11), match='world'>
print(re.match('world', text))  # None (because 'world' is not at the beginning)

<re.Match object; span=(6, 11), match='world'>
None


In [13]:
# Write a regex to find words that start with "b" and end with "s".
# Example string: "boats bats bikes busses"

import re
text = "boats bats bikes busses"
pattern = re.compile(r'b\w*s') # \w* matches zero or more word characters
matches = pattern.findall(text)
print(matches)

['boats', 'bats', 'bikes', 'busses']


In [14]:
# How do you make a regex case-insensitive in Python?
import re
text = "Hello World"
mo = re.search(r'world', text, re.IGNORECASE)
print(mo.group() if mo else "No match")
# Expected Output: World

World


In [15]:
# What does .* mean in a regular expression? What about .*??

# .*: Matches any character (except newline) zero or more times. This is a "greedy" match, meaning it will try to match as much text as possible.

# .*?: Matches any character (except newline) zero or more times in a "non-greedy" (or "lazy") way. It will try to match as little text as possible.

import re
text = "<h1>Title 1</h1> <h1>Title 2</h1>"
# Greedy:
greedy_match = re.search(r'<h1>.*</h1>', text)
print(f"Greedy: {greedy_match.group()}")
# Non-greedy:
lazy_match = re.search(r'<h1>.*?</h1>', text)
print(f"Lazy: {lazy_match.group()}")
# Expected Output:
# Greedy: <h1>Title 1</h1> <h1>Title 2</h1>
# Lazy: <h1>Title 1</h1>

Greedy: <h1>Title 1</h1> <h1>Title 2</h1>
Lazy: <h1>Title 1</h1>


In [16]:
# Write a regular expression to match strings that are exactly 5 characters long, consisting only of letters (uppercase or lowercase).

import re
# ^ matches start of string, $ matches end of string
# [a-zA-Z] matches any letter, {5} matches exactly 5 times
pattern = re.compile(r'^[a-zA-Z]{5}$')
print(pattern.search("hello"))   # <re.Match object...>
print(pattern.search("world!"))  # None
print(pattern.search("four"))    # None
print(pattern.search("12345"))   # None

<re.Match object; span=(0, 5), match='hello'>
None
None
None


In [17]:
# How would you extract all the URLs starting with https:// from a given text? (Simplify the URL pattern to just https:// followed by non-whitespace characters).

import re
text = "Visit https://example.com or https://another.org/path and also http://insecure.net."
urls = re.findall(r'https://\S+', text) # \S+ matches one or more non-whitespace characters
print(urls)
# Expected Output: ['https://example.com', 'https://another.org/path']

['https://example.com', 'https://another.org/path']


In [18]:
# What does \b represent in a regular expression? Provide a simple example.

import re
text = "cat catcher concatenate"
# Find 'cat' as a whole word
matches_word = re.findall(r'\bcat\b', text)
print(f"Word 'cat': {matches_word}")

# Find 'cat' anywhere
matches_any = re.findall(r'cat', text)
print(f"Any 'cat': {matches_any}")
# Expected Output:
# Word 'cat': ['cat']
# Any 'cat': ['cat', 'cat', 'cat']

Word 'cat': ['cat']
Any 'cat': ['cat', 'cat', 'cat']


In [19]:
# Extracting Area Code and Number:

import re
phone_regex = re.compile(r'(\d{3})-(\d{3}-\d{4})')
mo = phone_regex.search('My number is 555-123-4567.')
if mo:
    print(f"Full match: {mo.group(0)}") # or just mo.group()
    print(f"Area Code: {mo.group(1)}")
    print(f"Main Number: {mo.group(2)}")
# Output:
# Full match: 555-123-4567
# Area Code: 555
# Main Number: 123-4567

Full match: 555-123-4567
Area Code: 555
Main Number: 123-4567


In [20]:
# Using findall() with Groups:

import re
email_regex = re.compile(r'(\w+)@(\w+\.\w+)') # (username)@(domain)
text = "Contact support@example.com or admin@test.org"
matches = email_regex.findall(text)
print(matches)
# Output: [('support', 'example.com'), ('admin', 'test.org')]

[('support', 'example.com'), ('admin', 'test.org')]


In [21]:
# Applying Quantifiers to Groups:

import re
# Matches 'ha' repeated 2 to 3 times, followed by '!'
ha_regex = re.compile(r'(ha){2,3}')
print(ha_regex.search('hahaha!').group())
print(ha_regex.search('hah!').group())
print(ha_regex.search('ha!')) # None
# Output:
# hahaha
# haha
# None

hahaha


AttributeError: 'NoneType' object has no attribute 'group'

In [22]:
import re

ha_regex = re.compile(r'(ha){2,3}')

# Test case 1: 'hahaha!'
match1 = ha_regex.search('hahaha!')
if match1:
    print(match1.group())
else:
    print("No match found for 'hahaha!'")

# Test case 2: 'hah!'
match2 = ha_regex.search('hah!')
if match2: # This condition will be False because match2 is None
    print(match2.group())
else:
    print("No match found for 'hah!'") # This will be printed

# Test case 3: 'ha!'
match3 = ha_regex.search('ha!')
if match3: # This condition will be False because match3 is None
    print(match3.group())
else:
    print("No match found for 'ha!'") # This will be printed

hahaha
No match found for 'hah!'
No match found for 'ha!'


In [23]:
# Named Groups (for clarity):
# You can name your groups using (?P<name>...) for easier access by name.

import re
phone_regex_named = re.compile(r'(?P<area_code>\d{3})-(?P<prefix>\d{3})-(?P<line_number>\d{4})')
mo = phone_regex_named.search('Call me at 987-654-3210.')
if mo:
    print(f"Area Code: {mo.group('area_code')}")
    print(f"Prefix: {mo.group('prefix')}")
    print(f"Line Number: {mo.group('line_number')}")
# Output:
# Area Code: 987
# Prefix: 654
# Line Number: 3210

Area Code: 987
Prefix: 654
Line Number: 3210


In [24]:
# Non-Capturing Groups (?:...):
# Sometimes you need a group for applying a quantifier or alternation, but you don't want to capture its content. Use (?:...).

import re
# Matches 'bat' or 'cat' but doesn't capture 'b' or 'c' separately
animal_regex = re.compile(r'(?:b|c)at')
print(animal_regex.findall("I saw a bat and a cat."))
# Compared to (b|c)at which would give [('b',), ('c',)]
print(re.findall(r'(b|c)at', "I saw a bat and a cat."))
# Output:
# ['bat', 'cat']
# ['b', 'c']

['bat', 'cat']
['b', 'c']


In [25]:
# re.sub() (Substitute)
# The re.sub() function allows you to find all occurrences of a pattern in a string and 
# replace them with a different string.
# Syntax: re.sub(pattern, replacement, string, count=0, flags=0)

text = "The price is $123.45 and the quantity is 7."
new_text = re.sub(r'\d', 'X', text)
print(new_text)

The price is $XXX.XX and the quantity is X.


In [26]:
# Replacing specific words:

import re
text = "I like apples and oranges. Apples are great."
new_text = re.sub(r'apples', 'bananas', text, re.IGNORECASE) # Case-insensitive replacement
print(new_text)

I like bananas and oranges. Apples are great.


In [27]:
# Using groups in replacement (backreferences):
# You can refer to captured groups in your replacement string using \1, \2, etc., or 
# \g<name> for named groups.

import re
# Swap first and last names (LastName, FirstName -> FirstName LastName)
name_format = re.compile(r'(\w+), (\w+)')
text = "Doe, John"
new_name = name_format.sub(r'\2 \1', text)
print(new_name)
# Output: John Doe

John Doe


In [28]:
# Replacing multiple spaces with a single space:

import re
text = "This   has   too  many   spaces."
cleaned_text = re.sub(r'\s+', ' ', text)
print(cleaned_text)
# Output: This has too many spaces.

This has too many spaces.


In [29]:
# Replacing with a function (more advanced):
# The replacement argument can also be a function that takes a Match object and 
# returns the replacement string.

import re
def censor_number(match_obj):
    number = match_obj.group(0)
    return '*' * len(number) # Replace number with asterisks of same length

text = "My secret code is 12345, not 678."
censored_text = re.sub(r'\d+', censor_number, text)
print(censored_text)
# Output: My secret code is *****, not ***.

My secret code is *****, not ***.


In [30]:
# Basic compilation and usage:
phone_pattern = re.compile(r'\d{3}-\d{3}-\d{4}')
text = "Call 123-456-7890 or 987-654-3210."
print(phone_pattern.findall(text))

['123-456-7890', '987-654-3210']


In [31]:
# Using flags with re.compile():
word_pattern = re.compile(r'python', re.IGNORECASE)
text = "Python is great. I love python."
print(word_pattern.findall(text))

['Python', 'python']


In [32]:
# Compiling a complex pattern once:
import re
# Compile a pattern for extracting dates in MM/DD/YYYY format
date_pattern = re.compile(r'(\d{2})/(\d{2})/(\d{4})')
log_data = [
    "Event 1 on 10/26/2023 at 14:00",
    "Event 2 on 11/01/2023 at 10:30",
    "No date here"
]
for line in log_data:
    mo = date_pattern.search(line)
    if mo:
        print(f"Date found: {mo.group()}")

Date found: 10/26/2023
Date found: 11/01/2023


In [33]:
# Using re.VERBOSE flag with re.compile():
import re

# Regex for a simple email: user@domain.tld
email_pattern_verbose = re.compile(r'''
    ([a-zA-Z0-9._%+-]+)    # username
    @                      # @ symbol
    ([a-zA-Z0-9.-]+)       # domain name
    (\.[a-zA-Z]{2,4})      # dot-something (e.g., .com, .org)
''', re.VERBOSE)

mo = email_pattern_verbose.search('my.email@example.com')
if mo:
    print(f"Username: {mo.group(1)}")
    print(f"Domain: {mo.group(2)}{mo.group(3)}")

Username: my.email
Domain: example.com


In [34]:
# Comparing performance (conceptual):
import re
# Less efficient for many iterations:
# for i in range(10000):
#     re.search(r'abc', 'abcdef')

# More efficient for many iterations:
compiled_pattern = re.compile(r'abc')
# for i in range(10000):
#     compiled_pattern.search('abcdef')
print("Using re.compile() is generally better for repeated use of the same pattern.")

Using re.compile() is generally better for repeated use of the same pattern.


In [35]:
# Matching any vowel:
import re
vowel_regex = re.compile(r'[aeiouAEIOU]')
text = "Hello World"
print(vowel_regex.findall(text))

['e', 'o', 'o']


In [36]:
# Matching a range of characters:
import re
# Match any lowercase letter from 'a' to 'f'
af_regex = re.compile(r'[a-f]')
text = "abcdefghijkl"
print(af_regex.findall(text))

['a', 'b', 'c', 'd', 'e', 'f']


In [37]:
# Combining ranges and individual characters:

# Match digits, plus signs, minus signs, or parentheses
math_chars = re.compile(r'[0-9+\-()]') # - needs to be escaped or at start/end
text = "Calculate (10 + 20) - 5"
print(math_chars.findall(text))
# Output: ['(', '1', '0', '+', '2', '0', ')', '-', '5']

['(', '1', '0', '+', '2', '0', ')', '-', '5']


In [39]:
# Negative Character Sets ([^...]):
import re
# Match any character that is NOT a vowel
not_vowel_regex = re.compile(r'[^aeiouAEIOU\s]') # also exclude whitespace
text = "Hello World"
print(not_vowel_regex.findall(text))

['H', 'l', 'l', 'W', 'r', 'l', 'd']


In [40]:
# Matching specific symbols/punctuation:
# Match any of !, ?, ., or ,
punctuation_regex = re.compile(r'[!?. ,]') # space is also a character
text = "Hello! How are you? I'm fine, thanks."
print(punctuation_regex.findall(text))

['!', ' ', ' ', ' ', '?', ' ', ' ', ',', ' ', '.']


In [41]:
# Anchors (^ and $)
# String must start with "Hello":
starts_hello = re.compile(r'^Hello')
print(starts_hello.search('Hello World'))
print(starts_hello.search('World Hello'))

<re.Match object; span=(0, 5), match='Hello'>
None


In [42]:
# String must end with "World":
ends_world = re.compile(r'World$')
print(ends_world.search('Hello World'))
print(ends_world.search('World Hello'))

<re.Match object; span=(6, 11), match='World'>
None


In [43]:
# Exact string match (entire string must match the pattern):
exact_word = re.compile(r'^apple$')
print(exact_word.search('apple'))
print(exact_word.search('apple sauce'))
print(exact_word.search('red apple'))

<re.Match object; span=(0, 5), match='apple'>
None
None


In [44]:
# Matching lines that start with a digit (when using re.MULTILINE):
text = "Line 1\n2nd Line\nLine 3"
# Matches lines starting with a digit
digit_start_line = re.compile(r'^\d', re.MULTILINE)
print(digit_start_line.findall(text))

['2']


In [45]:
# Matching lines that end with a specific word (with re.MULTILINE):
text = "This is the first line.\nSecond line here.\nThird line end."
# Matches lines ending with 'line.'
ends_line = re.compile(r'line\.$', re.MULTILINE)
print(ends_line.findall(text))
# Output: ['line.', 'line.']

['line.']
