# Regular Expressions (RegEx) Cheat Sheet with Examples

In [1]:
import re

# 1. Basic Patterns

In [None]:
# \d - Matches any digit (0-9). It will return a list of all the digits in the string.
print(re.findall(r'\d', 'I have 3 apples and 5 oranges'))  

In [None]:
# \w - Matches any alphanumeric character (a-z, A-Z, 0-9, _)
print(re.findall(r'\w', 'Hello, World!+@'))  

In [None]:
# \s - Matches any whitespace character (space, tab, newline)
print(re.findall(r'\s', 'Hello World\tTab\nNewline'))  

In [None]:
# . - Matches any character between two characters except newline
# This will return a list of all substrings that match the pattern
print(re.findall(r'h.t', 'hit hat hot hut but h+t h\tt h\nt'))  

# 2. Quantifiers

In [None]:
# * - Matches 0 or more occurrences of the preceding character or group
# It will return a list of all substrings that match the pattern
print(re.findall(r'ab*c', 'ac abc abbc fgt abcc'))  
print(re.findall(r'ab*', 'a ab abb abbb'))

In [None]:
# + - The + quantifier applies to the character or group immediately preceding it.
# + requires at least one occurrence of the preceding element.
# * allows for zero or more occurrences, meaning the element can be completely absent and still result in a match.
print(re.findall(r'ab+c', 'ac abc abbc fgt abcc'))  
print(re.findall(r'ab+', 'a ab abb abbb'))

In [None]:
# ? - means the preceding character is optional (0 or 1 times)
print(re.findall(r'colou?r', 'color colour'))  

In [None]:
# {n} - matches exactly n occurrences of the preceding pattern
print(re.findall(r'\d{3}', '123 456 78'))  # matches 3 digits in a row

In [None]:
# {n,m} - matches between n and m occurrences
print(re.findall(r'\d{2,4}', '0 12 345 6789 101112 13141516'))  # matches 2 - 4 digits in a row

In [None]:
# [abc] - matches any one character from the set
print(re.findall(r'[aeiou]', 'hello world'))  # matches vowels

In [None]:
# [^abc] - matches any one character NOT in the set
print(re.findall(r'[^aeiou]', 'hello'))  # matches non-vowel characters

In [None]:
# [a-z] - matches any one character in the range
print(re.findall(r'[a-c]', 'abcdefgAB'))  # matches a, b, or c

In [None]:
# ^ - matches the start of the string
print(re.findall(r'^hello', 'hello world\nhello there'))  # matches 'hello' only if at the start

In [None]:
# $ - matches the end of the string
print(re.findall(r'world$', 'hello world\nbye world'))  # matches 'world' only if at the end

In [None]:
# The parentheses () create a capturing group that extracts matched portions
# Multiple groups like (abc)-(def) capture separate parts of a pattern
# \d+ matches one or more digits (0-9)
# In the example below, (\d+)-(\d+) captures two groups of digits separated by a hyphen
my_str = 'Order: 123-456 has been processed but 789-012 is pending'
match = re.search(r'(\d+)-(\d+)', my_str)
print(match.groups())  # returns matched groups

In [None]:
# If you want to find all occurrences of the pattern in the string, you should use re.findall
matches = re.findall(r'(\d+)-(\d+)', my_str)
for match in matches:
    print(match) 

In [None]:
# | - means OR between patterns

# The re.search function finds the first occurrence of either "cat" or "dog"
match = re.search(r'cat|dog', 'I have a cat and a dog, and another cat')
print(match.group())  

# The re.findall function finds all occurrences of either "cat" or "dog"
matches = re.findall(r'cat|dog', 'I have a cat and a dog, and another cat')
print(matches)  

# Use a set to get unique matches
unique_matches = set(matches)
print(unique_matches)

In [None]:
# Email pattern:
# ^ - Asserts position at the start of the string
# [\w\.-]+ - Matches one or more word characters (a-z, A-Z, 0-9, _), dots, or hyphens for the username part
# @ - Matches the @ symbol that separates username from domain
# [\w\.-]+ - Matches one or more word characters, dots, or hyphens for the domain name
# \. - Matches a literal dot (escaped with \) that separates domain name from TLD
# \w+ - Matches one or more word characters for the top-level domain (com, org, etc.)
# $ - Asserts position at the end of the string, ensuring the entire string is an email
email_pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'
print(re.match(email_pattern, 'user@example.com').group())  # matches a valid email

In [None]:
# https?://[^\s]+ - matches http or https URLs
url_pattern = r'https?://[^\s]+'
print(re.findall(url_pattern, 'Visit https://example.com or http://test.org'))  # matches URLs

In [None]:
# You can use the re.sub function to replace all occurrences of a pattern with a new string
print(re.sub(url_pattern, 'URL', 'Visit https://example.com or http://test.org'))  # replaces URLs with 'URL'

In [None]:
# Phone number pattern:
# \(? - Matches an optional opening parenthesis
# \d{3} - Matches exactly 3 digits (area code)
# \)? - Matches an optional closing parenthesis
# [-.\s]? - Matches an optional separator (hyphen, dot, or whitespace)
# \d{3} - Matches exactly 3 digits (prefix)
# [-.\s]? - Matches an optional separator (hyphen, dot, or whitespace)
# \d{4} - Matches exactly 4 digits (line number)
# This pattern matches US phone numbers in formats like (123) 456-7890, 123-456-7890, or 123.456.7890
phone_pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
print(re.findall(phone_pattern, 'Call (123) 456-7890 or 987.654.3210'))  # matches phone numbers

In [None]:
# Decimal number pattern:
# \d+ - Matches one or more digits before the decimal point
# \. - Matches a literal dot (escaped with \) since dot is a special character in regex that matches any character
# \d+ - Matches one or more digits after the decimal point
# This pattern will find numbers like 10.99, 3.14, 0.5, etc. in text
print(re.findall(r'\d+\.\d+', 'Price: 10.99'))  # matches decimal numbers (finds "10.99")