A Regular Expression (RegEx) is a sequence of characters that defines a search pattern.

MetaCharacters:
[]	A set of characters
\	Signals a special sequence (can also be used to escape special characters)	"\d"
.	Any character (except newline character)	"he..o"
^	Starts with	"^hello"
$	Ends with	"planet$"
*	Zero or more occurrences	"he.*o"
+	One or more occurrences	"he.+o"
?	Zero or one occurrences	"he.?o"
{}	Exactly the specified number of occurrences	"he.{2}o"
|	Either or	"falls|stays"
()	Capture and group

In [4]:
import re

txt = "The rain in Spain"
x = re.findall("Spain", txt)
print(x)

['Spain']


In [6]:
txt = "abxz"
x = re.findall("(a|b|c)xz", txt)
print(x)

['b']


In [10]:
import re

pattern = '^a...s$'
test_string = 'abyss'
result = re.match(pattern, test_string)

if result:
  print("Search successful.")
else:
  print("Search unsuccessful.")	


Search successful.


In [22]:
# zero or one occurrence of the pattern left to it.
pattern = 'ma?n'
test_string = 'man'
result = re.match(pattern, test_string)

if result:
  print("Search successful.")
else:
  print("Search unsuccessful.")	

Search successful.


In [33]:
# {} zero or one occurrence of the pattern left to it.
pattern = 'a{2,3}'
test_string = 'daa adaaa'
result = re.match(pattern, test_string)

if result:
  print("Search successful.")
else:
  print("Search unsuccessful.")



Search unsuccessful.


In [38]:
pattern = '[0-9]{2,4}'
test_string = '21wwwww'
result = re.match(pattern, test_string)

if result:
  print("Search successful.")
else:
  print("Search unsuccessful.")

Search successful.


| is used for alternation (or operator).

In [42]:
pattern = 'a|b'
test_string = 'ad'
result = re.match(pattern, test_string)

if result:
  print("Search successful.")
else:
  print("Search unsuccessful.")

Search successful.


Parentheses () is used to group sub-patterns. For example, (a|b|c)xz match any string that matches either a or b or c followed by xz

In [45]:
pattern = '(a|b|c)xz'
test_string = 'axz'
result = re.match(pattern, test_string)

if result:
  print("Search successful.")
else:
  print("Search unsuccessful.")

Search successful.


# Special Sequences

In [49]:
# \A - Matches if the specified characters are at the start of a string.



pattern = '\Athe'
test_string = 'the sun'
result = re.match(pattern, test_string)

if result:
  print("Search successful.")
else:
  print("Search unsuccessful.")

Search successful.


\b - Matches if the specified characters are at the beginning or end of a word.
\B - Opposite of \b. Matches if the specified characters are not at the beginning or end of a word.
\d - Matches any decimal digit. Equivalent to [0-9]


In [54]:

# Program to extract numbers from a string

import re

string = 'hello 12 hi 89. a number is 34'
pattern = '\d+'

result = re.findall(pattern, string) 
print(result)


['12', '89', '34']


In [55]:
import re

string = 'Twelve:12 Eighty nine:89.'
pattern = '\d+'

result = re.split(pattern, string) 
print(result)

['Twelve:', ' Eighty nine:', '.']


In [58]:
# Program to remove all whitespaces
import re

# multiline string
string = 'abc 12\n de 23 \n f45 6'

# matches all whitespace characters
pattern = '\s+'

# empty string
replace = ''

new_string = re.sub(pattern, replace, string) 
print(new_string)

abc12de23f456


In [59]:
import re

string = "Python is fun"

# check if 'Python' is at the beginning
match = re.search('\APython', string)

if match:
  print("pattern found inside the string")
else:
  print("pattern not found")  

pattern found inside the string


In [64]:

import re

string = '39801 356, 2102 1111'

# Three digit number followed by space followed by two digit number
pattern = '(\d{3}) (\d{2})'

# match variable contains a Match object.
match = re.search(pattern, string) 

if match:
  print(match.group())
else:
  print("pattern not found")

801 35


In [78]:
import re

string = 'email43@gmail.com'

pattern = '^[\w\.-]+@[\w\.-]+\.\w+$'
# pattern = '^[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]{2,}$'

# match variable contains a Match object.
match = re.search(pattern, string) 
 

result = re.match(pattern, string)

if result:
  print("Search successful.")
else:
  print("Search unsuccessful.")	

Search successful.
