# Regular Expression

In [1]:
import re

In [2]:
#make an example sentence
txt = "The rain falls mainly in Spain. It last for 11 days"

# Functions

## findall()

In [3]:
#Print a list of all matches of 'ai'
x = re.findall("ai", txt)

print(x)


['ai', 'ai', 'ai']


In [4]:
#Return an empty list if no match was found
x = re.findall("Portugal", txt)
print(x)

[]


## search()

In [5]:
#Search for the first white-space character in the string
x = re.search("\s", txt)
print(txt)
print("The first white-space character is located in position:", x.start())

The rain falls mainly in Spain. It last for 11 days
The first white-space character is located in position: 3


In [6]:
#Search for the first letter 's' in the string
x = re.search("s", txt)

print("The first letter 's' is located in position:", x.start())

The first letter 's' is located in position: 13


In [7]:
#Search for the first word Portugal in the string
x = re.search("Portugal", txt)

print(x)

None


## split()

In [8]:
#Split at each white-space character
x = re.split("\s", txt)

print(x)

['The', 'rain', 'falls', 'mainly', 'in', 'Spain.', 'It', 'last', 'for', '11', 'days']


In [9]:
#Split the string only at the first occurrence
x = re.split("\s", txt, 1)

print(x)

['The', 'rain falls mainly in Spain. It last for 11 days']


## sub()

In [10]:
#Replace every white-space character with the number 0
x = re.sub("\s", "0", txt)

print(x)

The0rain0falls0mainly0in0Spain.0It0last0for0110days


In [11]:
#Replace the first 2 occurrences
x = re.sub("\s", "0", txt, 2)

print(x)

The0rain0falls mainly in Spain. It last for 11 days


# Rules

## Metacharacters

![jupyter](./metacharacters.png)

In [12]:
#[] :  a set of characters
#Find all lower case characters alphabetically between "a" and "m":

x = re.findall("[a-m]", txt)
print(x)

['h', 'e', 'a', 'i', 'f', 'a', 'l', 'l', 'm', 'a', 'i', 'l', 'i', 'a', 'i', 'l', 'a', 'f', 'd', 'a']


In [13]:
#Find all digit characters:
#same as [0-9]
x = re.findall("\d", txt)
print(x)

['1', '1']


In [45]:
#. : any character
#Search for a sequence that starts with "f", followed by two (any) characters:
print(txt)
x = re.findall("f.....", txt)
print(x)

The rain falls mainly in Spain. It last for 11 days
['fal', 'for']


In [16]:
# ^ : start with
#Check if the string starts with 'The':

x = re.findall("^The", txt)
if x:
    print("Yes, the string starts with 'The'")
else:
    print("No match")

Yes, the string starts with 'The'


In [17]:
#$ : end with
#Check if the string ends with 'days':

x = re.findall("days$", txt)
if x:
    print("Yes, the string ends with 'days'")
else:
    print("No match")

Yes, the string ends with 'days'


In [18]:
#* : 0 or more appearance 
#Check if the string contains "a" followed by 0 or more "l" characters:

x = re.findall("al*", txt)
print(txt)
print(x)
#'a'
#'al'
#'all'
#'alll'
if x:
    print("Yes, there is at least one match!")
else:
    print("No match")

The rain falls mainly in Spain. It last for 11 days
['a', 'all', 'a', 'a', 'a', 'a']
Yes, there is at least one match!


In [19]:
#+ : one or more appearance
#Check if the string contains "ai" followed by 1 or more "x" characters:

x = re.findall("al+", txt)
#'al'
#'all'
#'alll'
print(x)

if x:
    print("Yes, there is at least one match!")
else:
    print("No match")

['all']
Yes, there is at least one match!


In [20]:
#{} : specified number of appearance 
#Check if the string contains "a" followed by exactly two "l" characters:

x = re.findall("al{2}", txt)
# all
print(x)

if x:
    print("Yes, there is at least one match!")
else:
    print("No match")

['all']
Yes, there is at least one match!


In [21]:
#Check if the string contains either "falls" or "stays":

x = re.findall("falls|stays", txt)

print(x)

if x:
    print("Yes, there is at least one match!")
else:
    print("No match")

['falls']
Yes, there is at least one match!


## Special Sequences

![jupyter](./special_sequences.png)

In [22]:
#Check if the string starts with "The":

x = re.findall("\AThe", txt)
print(txt)
print(x)

if x:
  print("Yes, there is a match!")
else:
  print("No match")

The rain falls mainly in Spain. It last for 11 days
['The']
Yes, there is a match!


In [23]:
#\b: where the specified character at the beginning(\b put in the beginning) or end of a word(\b put in the end).

#Check if "ain" is present at the beginning of a WORD:
# \b in ASCII code  = backspace
txt1 = 'The rain falls mainly in Spain. It last \bfor 11 days'
txt  = 'The rain falls mainly in Spain. It last for 11 days'

x = re.findall(r"\bfo", txt)
print(txt1)
print(txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")



The rain falls mainly in Spain. It last for 11 days
The rain falls mainly in Spain. It last for 11 days
['fo']
Yes, there is at least one match!


In [24]:
#Check if "ain" is present at the end of a WORD:
print(txt)
x = re.findall(r"ain\b", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

The rain falls mainly in Spain. It last for 11 days
['ain', 'ain']
Yes, there is at least one match!


In [25]:
#Check if "ain" is present, but NOT at the beginning of a word:
print(txt)
x = re.findall(r"\Bain", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

The rain falls mainly in Spain. It last for 11 days
['ain', 'ain', 'ain']
Yes, there is at least one match!


In [26]:
#Check if "ain" is present, but NOT at the end of a word:
print(txt)
x = re.findall(r"ain\B", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

The rain falls mainly in Spain. It last for 11 days
['ain']
Yes, there is at least one match!


In [27]:
#\d : 0-9
#Check if the string contains any digits (numbers from 0-9):

x = re.findall("\d", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['1', '1']
Yes, there is at least one match!


In [28]:

#Return a match at every no-digit character:

x = re.findall("\D", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['T', 'h', 'e', ' ', 'r', 'a', 'i', 'n', ' ', 'f', 'a', 'l', 'l', 's', ' ', 'm', 'a', 'i', 'n', 'l', 'y', ' ', 'i', 'n', ' ', 'S', 'p', 'a', 'i', 'n', '.', ' ', 'I', 't', ' ', 'l', 'a', 's', 't', ' ', 'f', 'o', 'r', ' ', ' ', 'd', 'a', 'y', 's']
Yes, there is at least one match!


In [29]:
#Return a match at every white-space character:

x = re.findall("\s", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
Yes, there is at least one match!


In [30]:
#Return a match at every NON white-space character:

x = re.findall("\S", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['T', 'h', 'e', 'r', 'a', 'i', 'n', 'f', 'a', 'l', 'l', 's', 'm', 'a', 'i', 'n', 'l', 'y', 'i', 'n', 'S', 'p', 'a', 'i', 'n', '.', 'I', 't', 'l', 'a', 's', 't', 'f', 'o', 'r', '1', '1', 'd', 'a', 'y', 's']
Yes, there is at least one match!


In [31]:
#Return a match at every word character (characters from a to Z, digits from 0-9, and the underscore _ character):
#[a-zA-Z0-9_]
x = re.findall("\w", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['T', 'h', 'e', 'r', 'a', 'i', 'n', 'f', 'a', 'l', 'l', 's', 'm', 'a', 'i', 'n', 'l', 'y', 'i', 'n', 'S', 'p', 'a', 'i', 'n', 'I', 't', 'l', 'a', 's', 't', 'f', 'o', 'r', '1', '1', 'd', 'a', 'y', 's']
Yes, there is at least one match!


In [32]:
#Return a match at every NON word character (characters NOT between a and Z. Like "!", "?" white-space etc.):
#[^a-zA-Z0-9_]
x = re.findall("\W", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

[' ', ' ', ' ', ' ', ' ', '.', ' ', ' ', ' ', ' ', ' ']
Yes, there is at least one match!


In [33]:
#Check if the string ends with "days":
# same with $
x = re.findall("days\Z", txt)

print(x)

if x:
  print("Yes, there is a match!")
else:
  print("No match")

['days']
Yes, there is a match!


## Sets

![jupyter](./sets.png)

In [34]:
# Find character matches a or r or n
x = re.findall("[arn]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['r', 'a', 'n', 'a', 'a', 'n', 'n', 'a', 'n', 'a', 'r', 'a']
Yes, there is at least one match!


In [35]:
# Find character matches a-e
x = re.findall("[a-e]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['e', 'a', 'a', 'a', 'a', 'a', 'd', 'a']
Yes, there is at least one match!


In [36]:
#Check if the string has other characters than a, r, n, or space:

x = re.findall("[^arn ]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['T', 'h', 'e', 'i', 'f', 'l', 'l', 's', 'm', 'i', 'l', 'y', 'i', 'S', 'p', 'i', '.', 'I', 't', 'l', 's', 't', 'f', 'o', '1', '1', 'd', 'y', 's']
Yes, there is at least one match!


In [37]:
#Check if the string has any 0, 1, 2, or 3 digits:

x = re.findall("[0123]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['1', '1']
Yes, there is at least one match!


In [38]:
#Check if the string has any digits:

x = re.findall("[0-3]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['1', '1']
Yes, there is at least one match!


In [39]:
#Check if the string has any two-digit numbers, from 00 to 59:

x = re.findall("[0-5][0-9]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['11']
Yes, there is at least one match!


In [40]:
#Check if the string has any characters from a to z lower case, and A to Z upper case:

x = re.findall("[a-zA-Z]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['T', 'h', 'e', 'r', 'a', 'i', 'n', 'f', 'a', 'l', 'l', 's', 'm', 'a', 'i', 'n', 'l', 'y', 'i', 'n', 'S', 'p', 'a', 'i', 'n', 'I', 't', 'l', 'a', 's', 't', 'f', 'o', 'r', 'd', 'a', 'y', 's']
Yes, there is at least one match!


In [42]:
#Check if the string has any + characters:
txt2 = 'The rain+ falls mainly in Spain. It last for 11+ days'
x = re.findall("[+]", txt2)

print(txt2)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

The rain+ falls mainly in Spain. It last for 11+ days
['+', '+']
Yes, there is at least one match!


In [44]:

x = re.findall("n+", txt2)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['n', 'n', 'n', 'n']
Yes, there is at least one match!
