## re module examples

1.  Write a small script using re.search(), re.findall(), and re.sub() to modify text.

In [2]:
"""
Key concepts in this example:
1. anchors
2. non-capturing group
3. use of search(), findall() and sub()
4. using a replacer function with sub()
"""

import re

def modify_phone_numbers(text, prefix_pattern, new_prefix):
    phone_pattern = r"0(?:7|1)\d{2}-\d{3}-\d{3}"

    if re.search(phone_pattern, text):
        # Optional: demonstrate findall by extracting all matches
        _ = re.findall(phone_pattern, text) # Finds all, but result not used for main modification

        # Define a function to handle each individual replacement
        def replace_leading_zero(match_obj):
            # Apply the specific '0' to '+254' transformation to the matched number
            return re.sub(prefix_pattern, new_prefix, match_obj.group(0))

        # alternative lambda replacer function
        replacer_funct = lambda match_obj: re.sub(prefix_pattern, new_prefix, match_obj.group(0))

        # Perform the main replacement using the callable for dynamic changes
        return re.sub(phone_pattern,  replacer_funct, text)
    else:
        return "Invalid phone number detected in the text."

# Example Usage:
original_text = "Call 0114-846-762 for support, or 0723-495-109 for sales. My landline is 020-123456."
print(f"Original: {original_text}")
modified_text = modify_phone_numbers(original_text, "^0", "+254")
print(f"Modified: {modified_text}")

# Test with invalid input
print(f"\nOriginal: Some random text 123-456-789.")
print(f"Modified: {modify_phone_numbers('Some random text 123-456-789.', '^0', '+254')}")

Original: Call 0114-846-762 for support, or 0723-495-109 for sales. My landline is 020-123456.
Modified: Call +254114-846-762 for support, or +254723-495-109 for sales. My landline is 020-123456.

Original: Some random text 123-456-789.
Modified: Invalid phone number detected in the text.


2. Extract numbers from "Order123 was placed on 2024-03-29" using \d+

In [2]:
input_text = "Order123 was placed on 2024-03-29"
re.findall(r'\d+', input_text) 

['123', '2024', '03', '29']

3. Extract words from "Hello, World! Python_Regex" using \w+.

In [3]:
input_text = "Hello, World! Python_Regex"
re.findall(r"\w+", input_text)

['Hello', 'World', 'Python_Regex']

3. Test greedy vs non-greedy patterns on "aaaab" with a+ vs a+?

In [4]:
input_text = "ab"
re.search(r"a+?", input_text)

<re.Match object; span=(0, 1), match='a'>

4. Extract domains from emails like "kelvin@example.com" using @(.+)$.

In [7]:
email ="kelvin@example.com" 
m = re.search(r"@(.+)$",email)
if m:
    print(m.group(1))
else:
    print("Invalid email format")

example.com


5. Find repeated words in "this is is a test" using \b(\w+)\b\s+\1.

In [10]:
sentence = "this is is a test"
m =  re.search(r"\b(\w+)\b\s+\1" , sentence)
m.group(1)

'is'

In [12]:
matches = re.findall(r"\b(\w+)\b\s+\1", sentence)
print(matches)  # ['is']

['is']


6. Validate dates (YYYY-MM-DD) using \b\d{4}-\d{2}-\d{2}\b.

In [22]:
date = "What happended on 2025-06-05? Can you remember anything?"
m = re.search(r"\b\d{4}-\d{2}-\d{2}\b", date) # word bounderies with \b
print(m)

<re.Match object; span=(18, 28), match='2025-06-05'>


7. Extract email address from the address of a text

In [21]:
s = "My email address is pilotkelvin0@gmail.com"

print(re.search(r"(\b[\w.-]{3,25}@[\w.-]+)", s))

<re.Match object; span=(20, 42), match='pilotkelvin0@gmail.com'>


8. Extract phone numbers using


In [42]:
phone_pattern = re.compile("(\+?\d{1,3}[-.\s]?\d{3}[-.\s]?\d{4})")

import re

phone_pattern = re.compile(
    r"""
    (\+?
    \d{1,3}
    [-.\s]?\d{3}
    [-.\s]?
    \d{4})
    """
    , re.X)

text = """
Contact us at:
+1 800 1234
+254-700-5678
254.722.9988
1234567890
+44 20 1234
44-203-4567
91 123 4567
+33.612.4567
+61-412-7890
001 234 5678
"""

matches = phone_pattern.findall(text)

# for match in matches:
#     print(match)
matches

['+1 800 1234',
 '+254-700-5678',
 '254.722.9988',
 '1234567890',
 '44-203-4567',
 '91 123 4567',
 '+33.612.4567',
 '+61-412-7890',
 '001 234 5678']

9. Validate passwords

In [47]:
password_pattern = re.compile(
    r"""
    (?=.*\d)     # atleast one digit
    (?=.*[a-z])  # atleast on lowercase letter
    (?=.*[A-Z])  # atleast one uppercase letter
    .{8,}        # at least 8 characters
    """
    , re.X)

# List of test passwords
test_passwords = [
    # ✅ Valid passwords
    "Abcdefg1",
    "Password1",
    "Secure123A",
    "A1b2c3d4",
    "MyPass2024",
    "Xyz789Pass",
    
    # ❌ Invalid passwords
    "abcdefg1",     # No uppercase
    "ABCDEFG1",     # No lowercase
    "Password",     # No digit
    "Pass1",        # Too short
    "12345678",     # No letters
    "abc123xyz",    # No uppercase
    "ABC123XYZ",    # No lowercase
]

# Validate each password
for pwd in test_passwords:
    is_valid = password_pattern.match(pwd)
    status = "✅ Valid" if is_valid else "❌ Invalid"
    print(f"{pwd:15} → {status}")

Abcdefg1        → ✅ Valid
Password1       → ✅ Valid
Secure123A      → ✅ Valid
A1b2c3d4        → ✅ Valid
MyPass2024      → ✅ Valid
Xyz789Pass      → ✅ Valid
abcdefg1        → ❌ Invalid
ABCDEFG1        → ❌ Invalid
Password        → ❌ Invalid
Pass1           → ❌ Invalid
12345678        → ❌ Invalid
abc123xyz       → ❌ Invalid
ABC123XYZ       → ❌ Invalid


7. Find all IPv4 adressess in a log file

In [36]:
# example IPv4 address ranges from 0.0.0.0 to 256.256.256.256
import re
log_data = "0.0.0.0 and 255.255.255.255"
log_data = """
2025-06-14 08:45:21 INFO User login from 192.168.1.10
2025-06-14 08:46:02 WARN Failed login attempt from 172.16.0.5
2025-06-14 08:47:35 INFO File uploaded by user from IP 10.0.0.123
2025-06-14 08:49:00 ERROR Connection timeout from 203.0.113.42
2025-06-14 08:50:10 DEBUG Ping received from 8.8.8.8
2025-06-14 08:51:42 INFO New session started by 192.0.2.33
2025-06-14 08:52:18 INFO Access granted to 198.51.100.7
2025-06-14 08:53:00 INFO Logout by user at 192.168.1.10
2025-06-14 08:54:20 WARN Suspicious activity from 123.45.67.89
"""

ipv4_pattern = r"""
    (?:
        (?:  
            0
            |[1-9][0-9]?     # 1 to 99
            | 1[0-9]{2}      # 100 to 199
            | 2[0-5]{2}      # 200 to 255
        )
    \.
    ){3}
    (?:       
            0
            |[1-9][0-9]?     # 1 to 99
            | 1[0-9]{2}      # 100 to 199
            | 2[0-5]{2}      # 200 to 255
    )
    """
re.findall(ipv4_pattern, log_data, re.X)

['192.168.1.10',
 '172.16.0.5',
 '10.0.0.12',
 '203.0.113.42',
 '8.8.8.8',
 '192.0.2.33',
 '198.51.100.7',
 '192.168.1.10',
 '123.45.67.89']