In [None]:
# Regular Expressions (regex) in Python — one of the most powerful tools for searching, validating, and manipulating text.

'''
Regular Expressions are patterns used to match strings (text).
They help you:
Validate inputs (like email, phone number)
Search for pattern
Replace parts of text
Extract specific information
Python provides regex features via the built-in re module.
'''

In [None]:
# Basic Pattern Matching – re.search()
# Searches for the first occurrence of a pattern.

import re

text = "I love Python programming"
match = re.search("Python", text)

if match:
    print("Found:", match.group())

# Found: Python

In [None]:
# Check if Pattern Exists – re.match()
# match() only checks at the beginning of the string.

re.match("I love", "I love Python")   #  Match
re.match("Python", "I love Python")   #  No match


In [None]:
# Find All Occurrences – re.findall()

text = "cat bat mat rat"
matches = re.findall(r"\bat\b", text)
print(matches)

# ['bat']


In [None]:
# Replace Text – re.sub()

text = "apple, banana, apple, mango"
new_text = re.sub("apple", "orange", text)
print(new_text)

# orange, banana, orange, mango


In [None]:
# Split String by Pattern – re.split()

text = "apple123banana456mango"
result = re.split(r"\d+", text)
print(result)

# ['apple', 'banana', 'mango']

'''
Common Special Characters
| Symbol  | Meaning                      | Example   | Matches           |      |                |
| ------- | ---------------------------- | --------- | ----------------- | ---- | -------------- |
| `.`     | Any character except newline | `a.b`     | `acb`, `a1b`      |      |                |
| `^`     | Start of string              | `^Hello`  | `"Hello world"`   |      |                |
| `$`     | End of string                | `world$`  | `"Hello world"`   |      |                |
| `*`     | 0 or more                    | `ab*`     | `a`, `ab`, `abb`  |      |                |
| `+`     | 1 or more                    | `ab+`     | `ab`, `abb`       |      |                |
| `?`     | 0 or 1                       | `colou?r` | `color`, `colour` |      |                |
| `{n}`   | Exactly n                    | `\d{3}`   | `123`             |      |                |
| `{n,}`  | n or more                    | `\d{2,}`  | `12`, `12345`     |      |                |
| `{n,m}` | Between n and m              | `\d{2,4}` | `12`, `1234`      |      |                |
| `[]`    | Character set                | `[aeiou]` | any vowel         |      |                |
| `       | `                            | OR        | `cat              | dog` | `cat` or `dog` |
| `()`    | Group                        | `(ab)+`   | `abab`            |      |                |

'''
'''
Common Escape Sequences
| Code | Meaning                        | Example   | Matches                   |
| ---- | ------------------------------ | --------- | ------------------------- |
| `\d` | Digit (0–9)                    | `\d+`     | `"123"`                   |
| `\D` | Non-digit                      | `\D+`     | `"abc"`                   |
| `\w` | Word char (letters, digits, _) | `\w+`     | `"Hello_123"`             |
| `\W` | Non-word char                  | `\W+`     | `"@#!"`                   |
| `\s` | Whitespace                     | `\s+`     | `"   "`                   |
| `\S` | Non-whitespace                 | `\S+`     | `"abc"`                   |
| `\b` | Word boundary                  | `\bcat\b` | `"cat"` (not `"catalog"`) |


'''

In [None]:
# Extracting Parts of Text
# Using groups ( ):

text = "Name: Alice, Age: 25"
match = re.search(r"Name: (\w+), Age: (\d+)", text)
print(match.groups())

# ('Alice', '25')

print(match.group(1))  # Alice
print(match.group(2))  # 25



In [None]:
# Flags (Modifiers)

'''
| Flag                     | Description                         | Example                               |
| ------------------------ | ----------------------------------- | ------------------------------------- |
| `re.IGNORECASE` / `re.I` | Case-insensitive                    | `re.search("python", "PYTHON", re.I)` |
| `re.MULTILINE` / `re.M`  | `^` and `$` match every line        |                                       |
| `re.DOTALL` / `re.S`     | `.` matches newline too             |                                       |
| `re.VERBOSE` / `re.X`    | Allow multiline regex with comments |                                       |

'''

re.findall("python", "Python PYTHON", re.I)

# ['Python', 'PYTHON']


In [None]:
# Validate Examples
# Email Validation
pattern = r"^[\w\.-]+@[\w\.-]+\.\w+$"
print(bool(re.match(pattern, "user@gmail.com")))  # True
print(bool(re.match(pattern, "invalid@com")))     # False


# Phone Number Validation
pattern = r"^\+?\d{10,13}$"
print(bool(re.match(pattern, "+12345678901")))  # True


# Extract Domain from Email
match = re.search(r"@([\w\.-]+)", "user@gmail.com")
print(match.group(1))   # gmail.com


# Find All Numbers in Text
text = "Order 123 costs $45 and order 678 costs $90."
numbers = re.findall(r"\d+", text)
print(numbers)


# ['123', '45', '678', '90']


In [None]:
'''
| Function       | Purpose                   | Example                       |
| -------------- | ------------------------- | ----------------------------- |
| `re.match()`   | Match from start          | `re.match("Hi", "Hi there")`  |
| `re.search()`  | Find first match anywhere | `re.search("Hi", "Say Hi")`   |
| `re.findall()` | Find all matches          | `re.findall("\d+", "a1b2")`   |
| `re.sub()`     | Replace                   | `re.sub("cat", "dog", text)`  |
| `re.split()`   | Split by pattern          | `re.split("\s+", text)`       |
| `re.compile()` | Precompile pattern        | `pattern = re.compile("\d+")` |

Real-World Use Cases
Validate emails, phone numbers, usernames
Extract IP addresses, log timestamps
Clean and normalize text data
Parse configuration files or logs
'''