In [2]:
import re

In [3]:
# the dot matches any character, except a new line
# "escape" special characters with a "\" to match them as literals

crush  = 'Alicia&Keys'
print(crush)

match = re.search("licia", crush)

if match:
    print("Found a match:", match.group(0))

# the preceding r tells Python to match *raw strings*, i.e., not to interpolate special metacharacters
match = re.search(r"(a.)", crush)
if match:
    print("Found another match:", match.group(1))
else:
    print("No match")

Alicia&Keys
Found a match: licia
Found another match: a&


In [4]:
# character classes
crush = 'Alicia Keys'
match = re.search("Alicia ([a-zA-Z])", crush)
if match:
    print("Full match:", match.group(0))
    print("Captured:", match.group(1))
else:
    print("No match")

Full match: Alicia K
Captured: K


In [5]:
# QUANTIFIERS: "+" means "match the preceding character or character class one or more times"
crush = 'Allllllllllllicia Keys'
match = re.search("A(l+)icia Keys", crush)
if match:
    print("Full match:", match.group(0))
    print("There are", len(match.group(1)), "l's:", match.group(1))
else:
    print("No match")

Full match: Allllllllllllicia Keys
There are 12 l's: llllllllllll


In [6]:
crush = 'AliciaAiciaAicia Keys'
match = re.search("(Alicia)+", crush)
if match:
    print("Full match:", match.group(0))
    print("Captured:", match.group(1))
else:
    print("No match")

Full match: Alicia
Captured: Alicia


In [7]:
# QUANTIFIERS: "*" means "match the preceding character or character class 0 or more times", i.e., it's optional
crush = 'AliciaKeys'
match = re.search("Alicia(.*)Keys", crush)
if match:
    print("Full match:", match.group(0))
    print("Captured:" + "[" + match.group(1) + "]")
else:
    print("No match")

Full match: AliciaKeys
Captured:[]


In [8]:
# either, or
crush = 'Beyonce'
diva = re.search("Alicia|Beyonce", crush)
if match:
    print("Found a diva:", match.group())
else:
    print("No match:", match)

Found a diva: AliciaKeys


In [9]:
# capture and test for a match
crush = 'Beyonce'
match = re.search("(Be).*(y(on)ce)", crush)
if match:
    print("the third capture:", match.group(3))
else:
    print("No match")

the third capture: on


In [10]:
# more specific quantifiers
crush = 'AliciaAliciaAlicia Keys'
match = re.search("(Alicia){3,5}", crush)
if match:
    print("Found a match:", match.group(1))
else:
    print("No match:", match)

Found a match: Alicia


In [11]:
crush = 'AliciaAliciaAlicia\t\t\tKeys'
print('crush:', crush)
match = re.search("((Alicia){2,})(\s+)Keys", crush)
if match:
    print("the third capture:" , "[" + match.group(3) + "]")
else:
    print("No match:", match)

crush: AliciaAliciaAlicia			Keys
the third capture: [			]


In [12]:
# modifiers
crush = 'Alicia Keys'
match = re.search("alicia", crush, re.I)
if match:
    print("Found a match:", match.group())
else:
    print("No match:", match)

Found a match: Alicia


In [13]:
# use variables inside your regular expressions
cat1 = 'Peaches'
fact = 'We love ' + cat1
print(fact)

pat = re.compile(cat1)
match = pat.search(fact)

We love Peaches


In [14]:
# *, +, and {} are greedy, they match as much as they possibly can
crush = "<BOLD>Holy moly</BOLD>, it's <BOLD>Alicia Keys</BOLD>"
match = re.search("<BOLD>(.*)</BOLD>", crush)
if match:
    print("Found a match:" + "[" + match.group(1) + "]")
else:
    print("No match:", match)

Found a match:[Holy moly</BOLD>, it's <BOLD>Alicia Keys]


In [15]:
# add ? to make *, +, and {} NON-GREEDY
crush = "<BOLD>Holy moly</BOLD>, it's <BOLD>Alicia Keys</BOLD>"
match = re.search("<BOLD>(.*?)</BOLD>", crush)
if match:
    print("Found a match:" + "[" + match.group(1) + "]")
else:
    print("No match:", match)

Found a match:[Holy moly]
