In [2]:
# RE 

In [3]:
import re

# Common fucntions 

## match

In [4]:
result = re.match(r'Hello', 'Hello, world!')
if result:
    print("Match found:", result.group())

Match found: Hello


## search

In [5]:
result = re.search(r'world', 'Hello, world!')
if result:
    print("Search found:", result.group())


Search found: world


## findall

In [6]:
results = re.findall(r'\d+', 'There are 2 apples and 3 oranges.')
print("Find all numbers:", results)


Find all numbers: ['2', '3']


## sub

In [7]:
new_string = re.sub(r'apples', 'bananas', 'I have apples and oranges.')
print("Replaced string:", new_string)


Replaced string: I have bananas and oranges.


## split

In [8]:
parts = re.split(r'\s+', 'Split this string by spaces.')
print("Split parts:", parts)


Split parts: ['Split', 'this', 'string', 'by', 'spaces.']


this code demonstrates how to use the re module to:

1. Search for a specific phone number format in a string.
2. Search for an email address format in a string.
3. Find all sequences of digits in the string.


In [9]:
import re

text = "My phone number is 123-456-7890 and my email is example@example.com."

# Match phone number
phone_pattern = r'\d{3}-\d{3}-\d{4}'
phone_match = re.search(phone_pattern, text)
if phone_match:
    print("Phone number found:", phone_match.group())

# Find email
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
email_match = re.search(email_pattern, text)
if email_match:
    print("Email found:", email_match.group())

# Find all digits
digits = re.findall(r'\d+', text)
print("All digits found:", digits)


Phone number found: 123-456-7890
Email found: example@example.com
All digits found: ['123', '456', '7890']


# Multiplicity 


In regular expressions, the characters ?, *, +, and . are special metacharacters that control how patterns match strings. Here’s a breakdown of each one, along with examples of how to use them in Python with the re module.

The ? metacharacter matches 0 or 1 occurrence of the preceding element. It makes the preceding character or group optional.

In [10]:
import re

text = "color colour"
pattern = r'colou?r'  # The 'u' is optional

matches = re.findall(pattern, text)
print("Matches for 'color' or 'colour':", matches)


Matches for 'color' or 'colour': ['color', 'colour']


The * metacharacter matches 0 or more occurrences of the preceding element.

In [11]:
import re

text = "aaab acd aaaa"
pattern = r'a*'  # Matches zero or more 'a's

matches = re.findall(pattern, text)
print("Matches for 'a*':", matches)


Matches for 'a*': ['aaa', '', '', 'a', '', '', '', 'aaaa', '']


The + metacharacter matches 1 or more occurrences of the preceding element. It requires at least one occurrence.

In [12]:
import re

text = "aaab acd aaaa"
pattern = r'a+'  # Matches one or more 'a's

matches = re.findall(pattern, text)
print("Matches for 'a+':", matches)


Matches for 'a+': ['aaa', 'a', 'aaaa']


The . metacharacter matches any single character except a newline.

In [13]:
import re

text = "cat bat rat"
pattern = r'.at'  # Matches any character followed by 'at'

matches = re.findall(pattern, text)
print("Matches for '.at':", matches)


Matches for '.at': ['cat', 'bat', 'rat']


In [15]:
import re

# Define the regular expression pattern
pattern = r'o+h!'

# List of test strings
test_strings = [
    'h!',        # Does not match
    'oh!',       # Matches
    'oooh!',     # Matches
    'ooooooh!',  # Matches
    'o!',        # Does not match
    'oo!',       # Does not match
    'h',         # Does not match
    'oh',        # Does not match
    'oooh',      # Does not match
]

# Check each string against the pattern
for string in test_strings:
    if re.match(pattern, string):
        print(f"'{string}' matches the pattern.")
    else:
        print(f"'{string}' does not match the pattern.")


'h!' does not match the pattern.
'oh!' matches the pattern.
'oooh!' matches the pattern.
'ooooooh!' matches the pattern.
'o!' does not match the pattern.
'oo!' does not match the pattern.
'h' does not match the pattern.
'oh' does not match the pattern.
'oooh' does not match the pattern.


In [17]:
import re

# Define the regular expression pattern
pattern = r'[wW]oodchuck'

# Paragraph of test strings
paragraph = """
In a small forest, there lived a woodchuck who loved to dig. One day, the Woodchuck decided to explore beyond its usual burrow. However, it noticed that the oodchuck was not a friendly creature. Nearby, a Wodchuck was trying to find its way home. The woodchucks were busy gathering food for the winter, while the Woodchucks were preparing for a big celebration. Suddenly, the woodchuck! and the Woodchuck! heard a rustling noise in the bushes.
"""

# Split the paragraph into words for testing
test_strings = paragraph.split()

# Check each string against the pattern
for string in test_strings:
    if re.match(pattern, string):
        print(f"'{string}' matches the pattern.")



'woodchuck' matches the pattern.
'Woodchuck' matches the pattern.
'woodchucks' matches the pattern.
'Woodchucks' matches the pattern.
'woodchuck!' matches the pattern.
'Woodchuck!' matches the pattern.


In [19]:
import re

# Define the regular expression pattern
pattern = r'[wW]oodchuck'

# Paragraph of test strings
paragraph = """
In a small forest, there lived a woodchuck who loved to dig. One day, the Woodchuck decided to explore beyond its usual burrow. However, it noticed that the oodchuck was not a friendly creature. Nearby, a Wodchuck was trying to find its way home. The woodchucks were busy gathering food for the winter, while the Woodchucks were preparing for a big celebration. Suddenly, the woodchuck! and the Woodchuck! heard a rustling noise in the bushes.
"""

# Find all matches and their contexts
matches = re.finditer(pattern, paragraph)

# Print the context of each matched pattern
for match in matches:
    start, end = match.span()
    # Get the previous word and the next word
    words = paragraph[:start].split()
    previous_word = words[-1] if words else ''  # Get the last word before the match
    next_word = paragraph[end:].split()[0] if paragraph[end:].split() else ''  # Get the first word after the match
    print(f"{previous_word}, {match.group()}, {next_word}")


a, woodchuck, who
the, Woodchuck, decided
The, woodchuck, s
the, Woodchuck, s
the, woodchuck, !
the, Woodchuck, !


## Summary
```
? makes the preceding character or group optional (0 or 1 occurrence).
* matches 0 or more occurrences of the preceding character or group.
+ matches 1 or more occurrences of the preceding character or group.
. matches any single character except a newline.
```
These metacharacters are fundamental in constructing regular expressions and can be combined with other elements to create complex patterns for string matching and manipulation.