# Regular Expressions in Python (`re` module)

In [1]:
import re

Basic Functions in re Module
<table>
  <tr>
    <th>Function</th>
    <th>Description</th>
  </tr>
  <tr>
    <td>re.match()</td>
    <td>Checks for a match only at the beginning of the string</td>
  </tr>
  <tr>
    <td>re.search()</td>
    <td>Searches the string for a match anywhere</td>
  </tr>
  <tr>
    <td>re.findall()</td>
    <td>Returns all non-overlapping matches as a list</td>
  </tr>
  <tr>
    <td>re.finditer()</td>
    <td>Returns an iterator yielding match objects</td>
  </tr>
  <tr>
    <td>re.sub()</td>
    <td>Replaces matches with a specified string</td>
  </tr>
  <tr>
    <td>re.split()</td>
    <td>Splits the string at matches</td>
  </tr>
</table>

Metacharacters are special symbols that have unique meanings in regex patterns.

<table>
  <tr>
    <th>Symbol</th>
    <th>Description</th>
    <th>Example</th>
  </tr>
  <tr>
    <td>.</td>
    <td>Matches any character except newline</td>
    <td>r"a.c" → Matches "abc", "a1c"</td>
  </tr>
  <tr>
    <td>^</td>
    <td>Matches the start of a string</td>
    <td>r"^Hello" → Matches "Hello World" but not "World Hello"</td>
  </tr>
  <tr>
    <td>$</td>
    <td>Matches the end of a string</td>
    <td>r"World$" → Matches "Hello World" but not "World Hello"</td>
  </tr>
  <tr>
    <td>*</td>
    <td>Matches 0 or more repetitions</td>
    <td>r"ab*" → Matches "a", "ab", "abb", "abbb"</td>
  </tr>
  <tr>
    <td>+</td>
    <td>Matches 1 or more repetitions</td>
    <td>r"ab+" → Matches "ab", "abb", "abbb" (not "a")</td>
  </tr>
  <tr>
    <td>?</td>
    <td>Matches 0 or 1 occurrence</td>
    <td>r"colou?r" → Matches "color" and "colour"</td>
  </tr>
  <tr>
    <td>{n,m}</td>
    <td>Matches between n and m times</td>
    <td>r"\d{2,4}" → Matches 2 to 4 digit numbers</td>
  </tr>
  <tr>
    <td>[]</td>
    <td>Character set (any one of these)</td>
    <td>[aeiou] → Matches any vowel</td>
  </tr>
  <tr>
    <td>()</td>
    <td>Grouping</td>
    <td>(abc)+ → Matches "abc", "abcabc"</td>
  </tr>
  <tr>
    <td>|</td>
    <td>OR operator</td>
    <td></td>
  </tr>
  <tr>
    <td>\</td>
    <td>Escape special characters</td>
    <td>r"\$100" → Matches "$100"</td>
  </tr>
</table>

Character Classes
<table>
  <tr>
    <th>Pattern</th>
    <th>Description</th>
    <th>Example</th>
  </tr>
  <tr>
    <td>\d</td>
    <td>Any digit (0-9)</td>
    <td>r"\d+" → Matches "123", "42"</td>
  </tr>
  <tr>
    <td>\D</td>
    <td>Any non-digit</td>
    <td>r"\D+" → Matches "abc"</td>
  </tr>
  <tr>
    <td>\w</td>
    <td>Any word character (alphanumeric + underscore)</td>
    <td>r"\w+" → Matches "hello", "Python3"</td>
  </tr>
  <tr>
    <td>\W</td>
    <td>Any non-word character</td>
    <td>r"\W+" → Matches "@!#$"</td>
  </tr>
  <tr>
    <td>\s</td>
    <td>Any whitespace (space, tab, newline)</td>
    <td>r"\s+" → Matches " " or "\t"</td>
  </tr>
  <tr>
    <td>\S</td>
    <td>Any non-whitespace</td>
    <td>r"\S+" → Matches "Hello"</td>
  </tr>
</table>

## 1. Basic Functions in `re` Module

In [4]:

# match() - Checks for a match only at the beginning of the string
match = re.match(r'Hello Wo', 'Hello World')
print(match.group() if match else "No match")

Hello Wo


In [5]:
# search() - Searches the string for a match anywhere
search = re.search(r'World', 'Hello World')
print(search.group() if search else "No match")

World


In [6]:

# findall() - Returns all non-overlapping matches as a list
matches = re.findall(r'\d+', 'Numbers: 1, 2a3, 456, 7890')
print(matches)


['1', '2', '3', '456', '7890']


In [7]:

# finditer() - Returns an iterator yielding match objects
for match in re.finditer(r'\d+', 'Numbers: 123, 456, 789'):
    print(match.group())

123
456
789


In [8]:
# sub() - Replaces matches with a specified string
text = "Cats are great!, cat"
new_text = re.sub(r'cats?', 'dogs', text, flags=re.IGNORECASE)
print(new_text)

dogs are great!, dogs


In [9]:
# split() - Splits the string at matches
split_text = re.split(r'[;|,]', "apple;banana,grape|orange")
print(split_text)

['apple', 'banana', 'grape', 'orange']


## 2. Regex Metacharacters

In [11]:

# Using metacharacters
text = "The rain in Spain falls mainly in the plain."
pattern = r"\brain\b"
print(re.findall(pattern, text))  # Matches 'rain' exactly


['rain']


In [12]:
# Using character sets []
print(re.findall(r"[aeiou]", "Python is great"))

['o', 'i', 'e', 'a']


In [13]:
# Using quantifiers *, +, ?
print(re.findall(r"a.*b", "ab"))  # Matches everything from 'a' to 'b'
print(re.findall(r"a+b", "aab"))  # Matches 'aaab'
print(re.findall(r"colou?r", "color colour"))  # Matches both

['ab']
['aab']
['color', 'colour']


## 3. Practical Examples

In [14]:

# Extracting Email Addresses
text = "Contact us at support@example.com or sales@example.co.uk"
emails = re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', text)
print(emails)


['support@example.com', 'sales@example.co.uk']


In [15]:
# Validating a Phone Number
phone_number = "987-654-3210 123-123-134"
match = re.findall(r'\d{3}-\d{3}-\d{4}', phone_number)
print(match)

['987-654-3210']


In [16]:
# Finding all Words Starting with "P"
text = "Python is Powerful and Popular!"
words = re.findall(r'\bP\w+', text)
print(words)

['Python', 'Powerful', 'Popular']


In [17]:
# Using re.compile() for Efficiency
pattern = re.compile(r'\d{4}')
matches = pattern.findall("2025 is a year, and 1234 is a code.")
print(matches)

['2025', '1234']
