##### Examples of Regex

In [3]:
import re
# Sample text for regex component demonstration
sample = "Data science involves 4 pillars, programming (Python or R), statistics, domain knowledge, and communication. Contact: info2@datascience.org"

# 1. Character classes
digits = re.findall(r'\d', sample)  # Find all digits
print(f"Digits: {digits}")

word_chars = re.findall(r'\w{5}', sample)  # Find all 5-character words
print(f"5-letter words: {word_chars}")

# 2. Quantifiers
python_variations = re.findall(r'Python\+?', sample)  # Find "Python" or "Python+"
print(f"Python variations: {python_variations}")

words_3_to_6 = re.findall(r'\b\w{3,6}\b', sample)  # Find words between 3-6 letters
print(f"Words with 3-6 letters: {words_3_to_6}")

# 3. Anchors
starts_with_d = re.findall(r'\bD\w*', sample)  # Words starting with D
print(f"Words starting with D: {starts_with_d}")

ends_with_ing = re.findall(r'\w+ing\b', sample)  # Words ending with "ing"
print(f"Words ending with 'ing': {ends_with_ing}")

# 4. Special characters and groups
programming_languages = re.findall(r'(Python|R)', sample)  # Find Python or R
print(f"Programming languages: {programming_languages}")

parentheses_content = re.findall(r'\((.*?)\)', sample)  # Find content inside parentheses
print(f"Content inside parentheses: {parentheses_content}")

Digits: ['4', '2']
5-letter words: ['scien', 'invol', 'pilla', 'progr', 'ammin', 'Pytho', 'stati', 'stics', 'domai', 'knowl', 'commu', 'nicat', 'Conta', 'info2', 'datas', 'cienc']
Python variations: ['Python']
Words with 3-6 letters: ['Data', 'Python', 'domain', 'and', 'info2', 'org']
Words starting with D: ['Data']
Words ending with 'ing': ['programming']
Programming languages: ['Python', 'R']
Content inside parentheses: ['Python or R']


##### Sample of In-line Comments for a Complex Pattern

In [4]:
# Complex regex pattern for email matching with comments using the verbose flag
email_pattern = re.compile(r'''
    \b                  # Word boundary
    [A-Za-z0-9._%+-]+   # Username part
    @                   # @ symbol
    [A-Za-z0-9.-]+      # Domain name
    \.                  # Dot
    [A-Z|a-z]{2,}       # Top level domain (at least 2 chars)
    \b                  # Word boundary
''', re.VERBOSE)
