## 1. Getting Started with re Module

In [None]:
import re

# Basic search
text = "Hello, World!"

# Find a pattern
match = re.search(r"World", text)
if match:
    print(f"Found: '{match.group()}'")
    print(f"Position: {match.start()} to {match.end()}")

# Case-insensitive search
match = re.search(r"world", text, re.IGNORECASE)
if match:
    print(f"Case-insensitive: '{match.group()}'")

In [None]:
# Common re functions
text = "The cat sat on the mat. The cat was happy."

# search() - finds first match
first = re.search(r"cat", text)
print(f"search(): {first.group()} at position {first.start()}")

# findall() - finds all matches
all_matches = re.findall(r"cat", text)
print(f"findall(): {all_matches}")

# finditer() - iterator of match objects
print("finditer():")
for match in re.finditer(r"cat", text):
    print(f"  '{match.group()}' at {match.start()}")

# match() - only matches at beginning
match_start = re.match(r"The", text)
print(f"match('The'): {match_start.group() if match_start else None}")

match_mid = re.match(r"cat", text)
print(f"match('cat'): {match_mid}")

## 2. Basic Patterns

In [None]:
# Literal characters
text = "Python 3.11 is awesome!"

print("Literal patterns:")
print(f"  'Python': {re.search(r'Python', text).group()}")
print(f"  '3.11': {re.search(r'3.11', text).group()}")

In [None]:
# Special characters (metacharacters)
print("Metacharacters:")

text = "Hello\nWorld"

# . matches any character (except newline)
print(f"  '.ello': {re.search(r'.ello', text).group()}")

# ^ matches start of string
print(f"  '^Hello': {re.search(r'^Hello', text).group()}")

# $ matches end of string  
print(f"  'World$': {re.search(r'World$', text).group()}")

# Escape special characters with \
text2 = "Price: $10.99"
print(f"  '\$10\.99': {re.search(r'\$10\.99', text2).group()}")

In [None]:
# Special sequences
text = "Hello123 World_456"

print("Special sequences:")

# \d - digit
print(f"  \\d+ (digits): {re.findall(r'\d+', text)}")

# \w - word character (a-z, A-Z, 0-9, _)
print(f"  \\w+ (words): {re.findall(r'\w+', text)}")

# \s - whitespace
print(f"  \\s+ (spaces): {re.findall(r'\s+', text)}")

# \D, \W, \S - negated versions
print(f"  \\D+ (non-digits): {re.findall(r'\D+', text)}")

## 3. Character Classes

In [None]:
# Character classes []
text = "Cat bat Rat mat 1at 2at"

print("Character classes:")

# [abc] - matches a, b, or c
print(f"  [CbR]at: {re.findall(r'[CbR]at', text)}")

# [a-z] - range
print(f"  [a-z]at: {re.findall(r'[a-z]at', text)}")

# [A-Za-z] - all letters
print(f"  [A-Za-z]at: {re.findall(r'[A-Za-z]at', text)}")

# [^abc] - not a, b, or c
print(f"  [^Cb]at: {re.findall(r'[^Cb]at', text)}")

# Combining
print(f"  [A-Za-z0-9]at: {re.findall(r'[A-Za-z0-9]at', text)}")

In [None]:
# Common character class patterns
text = "Email: user@domain.com, Phone: 123-456-7890"

print("Practical examples:")

# Hexadecimal
hex_text = "Colors: #FF0000, #00FF00, #0000FF"
print(f"  Hex colors: {re.findall(r'#[0-9A-Fa-f]+', hex_text)}")

# Phone parts
print(f"  Phone digits: {re.findall(r'[0-9]+', text)}")

# Words starting with uppercase
print(f"  Capitalized: {re.findall(r'[A-Z][a-z]+', text)}")

## 4. Quantifiers

In [None]:
# Quantifiers control how many times a pattern matches
text = "goood gooood goooood gd god good"

print("Quantifiers:")

# * - zero or more
print(f"  go*d: {re.findall(r'go*d', text)}")

# + - one or more
print(f"  go+d: {re.findall(r'go+d', text)}")

# ? - zero or one
print(f"  go?d: {re.findall(r'go?d', text)}")

# {n} - exactly n times
print(f"  go{{2}}d: {re.findall(r'go{2}d', text)}")

# {n,m} - between n and m times
print(f"  go{{2,4}}d: {re.findall(r'go{2,4}d', text)}")

# {n,} - n or more times
print(f"  go{{3,}}d: {re.findall(r'go{3,}d', text)}")

In [None]:
# Greedy vs Non-greedy
html = "<p>First</p><p>Second</p>"

print("Greedy vs Non-greedy:")

# Greedy (default) - matches as much as possible
greedy = re.findall(r'<p>.*</p>', html)
print(f"  Greedy: {greedy}")

# Non-greedy (?) - matches as little as possible
non_greedy = re.findall(r'<p>.*?</p>', html)
print(f"  Non-greedy: {non_greedy}")

## 5. Practical Examples

In [None]:
# Validate email (simple)
def is_valid_email(email):
    pattern = r'^[\w.-]+@[\w.-]+\.[a-zA-Z]{2,}$'
    return bool(re.match(pattern, email))

emails = [
    "user@example.com",
    "user.name@domain.co.uk",
    "invalid@",
    "@nodomain.com",
    "spaces in@email.com"
]

print("Email validation:")
for email in emails:
    status = "✓" if is_valid_email(email) else "✗"
    print(f"  {status} {email}")

In [None]:
# Validate phone number
def is_valid_phone(phone):
    # Accepts: 123-456-7890, (123) 456-7890, 1234567890
    pattern = r'^(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}$'
    return bool(re.match(pattern, phone))

phones = [
    "123-456-7890",
    "(123) 456-7890",
    "1234567890",
    "456-7890",
    "12-34-56",
    "abc-def-ghij"
]

print("Phone validation:")
for phone in phones:
    status = "✓" if is_valid_phone(phone) else "✗"
    print(f"  {status} {phone}")

In [None]:
# Extract data from text
text = """
Contact us:
Email: support@company.com or sales@company.org
Phone: 1-800-555-0123 or (555) 123-4567
Visit: https://www.company.com or http://shop.company.com
"""

print("Data extraction:")

# Extract emails
emails = re.findall(r'[\w.-]+@[\w.-]+', text)
print(f"  Emails: {emails}")

# Extract phone numbers
phones = re.findall(r'[\d()-]+\d{4}', text)
print(f"  Phones: {phones}")

# Extract URLs
urls = re.findall(r'https?://[\w.-]+', text)
print(f"  URLs: {urls}")

## Summary

### Common Functions:

| Function | Purpose |
|----------|----------|
| `search()` | Find first match |
| `match()` | Match at beginning |
| `findall()` | Find all matches |
| `finditer()` | Iterator of matches |

### Metacharacters:

| Character | Meaning |
|-----------|----------|
| `.` | Any character |
| `^` | Start of string |
| `$` | End of string |
| `\d` | Digit |
| `\w` | Word character |
| `\s` | Whitespace |

### Quantifiers:

| Quantifier | Meaning |
|------------|----------|
| `*` | 0 or more |
| `+` | 1 or more |
| `?` | 0 or 1 |
| `{n}` | Exactly n |
| `{n,m}` | n to m |

### Next Lesson: Advanced Regular Expressions