# Python RegEx:
    -- A RegEx or Regular Expression is a sequence of characters that forms a search pattern.

    -- RegEx can be used to check if a string contains the specified search pattern.

    -- Python has a built-in package called re, which can be used to work with Regular Expressions.

    -- Importing the re module. When we imported re module, we are ready to use regular expression.

In [None]:
import re

In [None]:
# Example-01: Search the string to see if it starts with "This" and ends with "Hasan"
import re
text = "This is Mehedi Hasan"
txt = re.search("^This.*Mehedi$", text)     # This returns None
txt = re.search("^This.*Hasan$", text)      # This returns the whole string
print(txt)


# RegEx Functions :
    -- The re module offers a set of functions that allows us to search a string for a match:

    # Function	    # Description
      -- findall	-- Returns a list containing all matches
      -- search	    -- Returns a Match object if there is a match anywhere in the string
      -- split	    -- Returns a list where the string has been split at each match
      -- sub	    -- Replaces one or many matches with a string




# Metacharacters : Metacharacters are characters with a special meaning.

# Character	        # Description	                                                Example	
[]	                A set of characters	                                          "[a-m]"	
\	                  Signals a special sequence
                    (can also be used to escape special characters)	              "\d"	

.	                  Any character (except newline character)	                    "he..o"	
^	                  Starts with	                                                  "^hello"	
$	                  Ends with	                                                    "planet$"	
*	                  Zero or more occurrences	                                    "he.*o"	
+	                  One or more occurrences	                                      "he.+o"	
?	                  Zero or one occurrences	                                      "he.?o"	
{}	                Exactly the specified number of occurrences	                  "he{2}o"	
|	                  Either or	                                                    "falls|stays"	
()	                Capture and group	 	


Note: Examples of using every metacharacters are given below:

In [None]:
# Example of []: A set of characters
# Find all lower case characters alphabetically between "a" and "m"

import re
text = "This is Mehedi Hasan"
txt = re.findall("[a-m]", text)             # This returns from a to m but exclude m because index starts from 0
txt = re.findall("[a,e,i,o,u]", text)       # This returns between a,e,i,o,u and ofcourse in a list
print(txt)

In [None]:
# Example of \: Signals a special sequence (can also be used to escape special characters)
# Find all digit characters

import re
text = "And He is 35 dollars"
txt = re.findall("\d", text)            # returns one digit number in a list
txt = re.findall("\d\d", text)          # returns two digit number in a list
print(txt)

In [None]:
# Example of .: Any character (except newline character)
# Search for a sequence that starts with "Meh", followed by two (any) characters, and an "i"

import re
txt = "This is Mehedi Hasan"
x = re.findall("Meh..i", txt)       # Returns in a list
print(x)


In [None]:
# Example of ^: Starts with
# Check if the string starts with 'hello'

import re
txt = "hello Mehedi"
x = re.findall("^hello", txt)
if x:
  print("Yes, the string starts with 'hello'")
else:
  print("No match")

In [None]:
# Example of $: Ends with
# Check if the string ends with 'planet'

import re
txt = "hello Mehedi Hasan"
x = re.findall("Hasan$", txt)
if x:
  print("Yes, the string ends with 'Hasan'")
else:
  print("No match")

In [None]:
# Example of *:	Zero or more occurrencesimport re
# Search for a sequence that starts with "Me", followed by 0 or more  (any) characters, and an "i"

txt = "hello Mehedi Hasan"
x = re.findall("Me.*i", txt)
print(x)

In [None]:
# Example of +:	One or more occurrences
# Search for a sequence that starts with "Me", followed by 1 or more  (any) characters, and an "i"

import re
txt = "hello Mehedi Hasan"
x = re.findall("Me.+i", txt)
print(x)

In [None]:
# Example of ?:	Zero or one occurrences
# Search for a sequence that starts with "he", followed by 0 or 1  (any) character, and an "o"

import re
txt = "hello planet"
x = re.findall("he.?o", txt)
print(x)

# Note: This time we got no match, because there were not zero, not one, but two characters between "he" and the "o"


In [None]:
# Example of {}: Exactly the specified number of occurrences
# Search for a sequence that starts with "Me", followed excactly 3 (any) characters, and an "i"

import re
txt = "hello Mehedi Hasan"
x = re.findall("Me.{3}i", txt)
print(x)

In [None]:
# Example of |:	Either or
#Check if the string contains either "Mehedi" or "stays"

import re
txt = "This is Mohammed Mehedi Hasan"
x = re.findall("Mehedi|stays", txt)
print(x)
if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

# Special Sequences :
    -- A special sequence is a \ followed by one of the characters in the list below, and has a special meaning:

# Character	    # Description	                                                                                    # Example	
\A	            Returns a match if the specified characters are at the beginning of the string	                    "\AThe"	

\b	            Returns a match where the specified characters are at the beginning or at the end of a word         r"\bain"
                (the "r" in the beginning is making sure that the string is being treated as a "raw string")	    r"ain\b"
	
\B	            Returns a match where the specified characters are present, but NOT at the beginning                r"\Bain"
                (or at the end) of a word                                                                           r"ain\B"
                (the "r" in the beginning is making sure that the string is being treated as a "raw string")	
  


\d	            Returns a match where the string contains digits (numbers from 0-9)	                                "\d"	
\D	            Returns a match where the string DOES NOT contain digits	                                        "\D"	
\s	            Returns a match where the string contains a white space character	                                "\s"	
\S	            Returns a match where the string DOES NOT contain a white space character	                        "\S"	

\w	            Returns a match where the string contains any word characters 
                (characters from a to Z, digits from 0-9, and the underscore _ character)	                        "\w"

\W	            Returns a match where the string DOES NOT contain any word characters	                            "\W"	
\Z	            Returns a match if the specified characters are at the end of the string	                        "Spain\Z"	


In [None]:
# Example of \A: Returns a match if the specified characters are at the beginning of the string
# Check if the string starts with "The"

import re
txt = "The First Independent City In Bangladesh! Jessore"
x = re.findall("\AThe", txt)
print(x)

if x:
  print("Yes, there is a match!")
else:
  print("No match")

In [None]:
# Example of \b: Returns a match where the specified characters are at the beginning or at the end of a word
# (the "r" in the beginning is making sure that the string is being treated as a "raw string")
# Check if "rai" is present at the beginning of a WORD:

import re
txt = "The rain in Spain"
x = re.findall(r"\brai", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")


In [None]:
# Another Example of speacial sequence \b:
#Check if "ain" is present at the end of a WORD

import re
txt = "The rain in Spain"
x = re.findall(r"ain\b", txt)           # Returns as a list
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of \B: Returns a match where the specified characters are present, but NOT at the beginning (or at the end) of a word
#Check if "ain" is present, but NOT at the beginning of a word

import re
txt = "The rain in Spain"
x = re.findall(r"\Bain", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Another example of \B:
#Check if "ain" is present, but NOT at the end of a word

import re
txt = "This is Mehedi Hasan"
x = re.findall(r"he\B", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of \d: Returns a match where the string contains digits (numbers from 0-9)
# Check if the string contains any digits (numbers from 0-9)

import re
txt = "He has 38 dollars and 59 cents"
x = re.findall("\d", txt)                   # It returns one digit in a list
x = re.findall("\d\d", txt)                 # It returns two digits in a list
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of \D: Returns a match where the string DOES NOT contain digits
# Return a match at every no-digit character

import re
txt = "He is 35"
x = re.findall("\D", txt)               # It takes space as a value of a list
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of \s: Returns a match where the string contains a white space character
# Return a match at every white-space character

import re
txt = "This is Mehedi Hasan !"
x = re.findall("\s", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of \S: Returns a match where the string DOES NOT contain a white space character
# Return a match at every NON white-space character

import re
txt = "This is Mehedi"
x = re.findall("\S", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of \w: Returns a match where the string contains any word characters 
# (characters from a to Z, digits from 0-9, and the underscore _ character)
# Return a match at every word character (characters from a to Z, digits from 0-9, and the underscore _ character)

import re
txt = "He is 35 years"
x = re.findall("\w", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of \W: Returns a match where the string DOES NOT contain any word characters
# Return a match at every NON word character (characters NOT between a and Z. Like "!", "?" white-space etc.)

import re
txt = "This is Mehedi and He is 35 years old ! ?"
x = re.findall("\W", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of \Z: Returns a match if the specified characters are at the end of the string
# Check if the string ends with "Spain"

import re
txt = "This Hasan Mehedi"
x = re.findall("Mehedi\Z", txt)
print(x)

if x:
  print("Yes, there is a match!")
else:
  print("No match")

# Sets :
    -- A set is a set of characters inside a pair of square brackets [] with a special meaning:

# Set	        # Description	
[arn]	        Returns a match where one of the specified characters (a, r, or n) are present	
[a-n]	        Returns a match for any lower case character, alphabetically between a and n	
[^arn]	        Returns a match for any character EXCEPT a, r, and n	
[0123]	        Returns a match where any of the specified digits (0, 1, 2, or 3) are present	
[0-9]	        Returns a match for any digit between 0 and 9	
[0-5][0-9]	    Returns a match for any two-digit numbers from 00 and 59	
[a-zA-Z]	    Returns a match for any character alphabetically between a and z, lower case OR upper case	
[+]	            In sets, +, *, ., |, (), $,{} has no special meaning, so [+] means: return a match for any + character in the string

In [None]:
# Example of [arn]:	Returns a match where one of the specified characters (a, r, or n) are present
# Check if the string has any a, r, or n characters

import re
txt = "This is Mehedi"
x = re.findall("[Meh]", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of [a-n]:	Returns a match for any lower case character, alphabetically between a and n
# Check if the string has any characters between a and n

import re
txt = "This is Mehedi"
x = re.findall("[a-d]", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of [^arn]: Returns a match for any character EXCEPT a, r, and n
# Check if the string has other characters than a, r, or n

import re
txt = "This is Mehedi"
x = re.findall("[^Thisd]", txt)             # Different output
x = re.findall("[^This d]", txt)            # Different output
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of [0123]: Returns a match where any of the specified digits (0, 1, 2, or 3) are present
# Check if the string has any 0, 1, 2, or 3 digits

import re
txt = "He has 2 3 4 laptop"
x = re.findall("[0123]", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of [0-9]:	Returns a match for any digit between 0 and 9
# Check if the string has any digits

import re
txt = "9 times before 11:45 AM"
x = re.findall("[0-9]", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of [0-5][0-9]: Returns a match for any two-digit numbers from 00 and 59
#Check if the string has any two-digit numbers, from 00 to 59

import re
txt = "9 times before 11:45 AM"
x = re.findall("[0-5][0-9]", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of [a-zA-Z]: Returns a match for any character alphabetically between a and z, lower case OR upper case
# Check if the string has any characters from a to z lower case, and A to Z upper case

import re
txt = "9 times before 11:45 PM"
x = re.findall("[a-zA-Z]", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# Example of [+]: In sets +, *, ., |, (), $,{} has no special meaning, so [+] means: return a match for any + character in the string
# Check if the string has any + characters

import re
txt = "8 times before *.| 11:45 AM"
x = re.findall("[+*.|]", txt)
print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

In [None]:
# The findall() Function
# The findall() function returns a list containing all matches.

# Example-01: Print a list of all matches:

import re
txt = "The rain in Spain"
x = re.findall("ai", txt)
print(x)

# The list contains the matches in the order they are found.




# If no matches are found, an empty list is returned:
# Example-02: Return an empty list if no match was found:

import re

txt = "The rain in Spain"
x = re.findall("Portugal", txt)
print(x)

In [None]:
# The search() Function
# The search() function searches the string for a match, and returns a Match object if there is a match.
# If there is more than one match, only the first occurrence of the match will be returned:

# Example-01: Search for the first white-space character in the string:

import re
txt = "The rain in Spain"
x = re.search("\s", txt)
print("The first white-space character is located in position:", x.start())



# If no matches are found, the value None is returned:
# Example-02: Make a search that returns no match:

import re
txt = "The rain in Spain"
x = re.search("Portugal", txt)
print(x)

In [None]:
# The split() Function :
# The split() function returns a list where the string has been split at each match:

# Example-01: Split at each white-space character:

import re
txt = "This is Mehedi Hasan"
x = re.split("\s", txt)
print(x)





# We can control the number of occurrences by specifying the maxsplit parameter:
# Example-02: Split the string only at the first occurrence:

import re
txt = "The rain in Spain"
x = re.split("\s", txt, 1)
print(x)

x = re.split("\s", txt, 2)
print(x)

x = re.split("\s", txt, 3)
print(x)

In [None]:
# The sub() Function :
# The sub() function replaces the matches with the text of your choice:


# Example-01: Replace every white-space character with the number 9:

import re
txt = "The rain in Spain"
x = re.sub("\s", "9", txt)
print(x)



# You can control the number of replacements by specifying the count parameter:
# Example-02: Replace the first 2 occurrences:

import re
txt = "The rain in Spain"
x = re.sub("\s", "9", txt, 2)
print(x)

In [None]:
# Match Object : 
# A Match Object is an object containing information about the search and the result.

# Note: If there is no match, the value None will be returned, instead of the Match Object.
# Example-01 : Do a search that will return a Match Object:

import re
txt = "The rain in Spain"
x = re.search("ai", txt)
print(x)                    # this will print an object


# The Match Object : 
    -- The Match object has properties and methods used to retrieve information about the search, and the result:
    -- .span() returns a tuple containing the start-, and end positions of the match.
    -- .string returns the string passed into the function
    -- .group() returns the part of the string where there was a match

In [None]:
# Example-01 : Print the position (start- and end-position) of the first match occurrence.
# The regular expression looks for any words that starts with an upper case "S":

import re
txt = "The rain in Spain"
x = re.search(r"\bS\w+", txt)
print(x.span())



# Example-02 : Print the string passed into the function

import re
txt = "The rain in Spain"
x = re.search(r"\bS\w+", txt)
print(x.string)





# Example-03 : Print the part of the string where there was a match.
# The regular expression looks for any words that starts with an upper case "S":

import re
txt = "The rain in Spain"
x = re.search(r"\bS\w+", txt)
print(x.group())

# Note: If there is no match, the value None will be returned, instead of the Match Object.