# RegEx Module

In [1]:
import re

# RegEx Functions

## The findall() Function
### returns a list containing all matches.

In [2]:
txt = "The rain in Spain"
x1 = re.findall("ai", txt)
x1

['ai', 'ai']

In [3]:
x2 = re.findall("Portugal", txt)
x2

[]

## The search() Function
### searches the string for a match, and returns a  Match object if there is a match.
### (If there is more than one match, only the first occurrence of the match will be returned)

In [4]:
x3 = re.search("\s", txt)
x3

<re.Match object; span=(3, 4), match=' '>

In [5]:
x3.start()

3

In [6]:
x3.end()

4

In [7]:
x4 = re.search("Portugal", txt)
x4

In [8]:
print(x4)

None


In [9]:
x5 = re.search("a", txt)
x5

<re.Match object; span=(5, 6), match='a'>

In [10]:
x5.start()

5

In [11]:
x5.end()

6

## Match Object
### A Match Object is an object containing information about the search and the result.

### Note: If there is no match, the value None will be returned, instead of the Match Object.

In [12]:
x6 = re.search("ai", txt)
print(x6) #this will print an object

<re.Match object; span=(5, 7), match='ai'>


### The Match object has properties and methods used to retrieve information about the search, and the result:

### .span() returns a tuple containing the start-, and end positions of the match.
### .string returns the string passed into the function
### .group() returns the part of the string where there was a match

In [13]:
# The regular expression looks for any words that starts with an upper case "S":
x7 = re.search(r"\bS\w+", txt)
print(x7.span())

(12, 17)


In [14]:
x7.start()

12

In [15]:
x7.end()

17

In [16]:
#Print the string passed into the function:
print(x7.string)

The rain in Spain


In [17]:
#Print the part of the string where there was a match.

#The regular expression looks for any words that starts with an upper case "S":

x7 = re.search(r"\bS\w+", txt)
x7

<re.Match object; span=(12, 17), match='Spain'>

In [18]:
print(x7.group())

Spain


In [19]:
x7_1 = re.match(r'(\w+)@(\w+)\.(\w+)','username@hackerrank.com')
x7_1.group()       # The entire match 

'username@hackerrank.com'

In [20]:
x7_1.group(0)       # The entire match 

'username@hackerrank.com'

In [21]:
x7_1.group(1)       # The first parenthesized subgroup.

'username'

In [22]:
x7_1.group(2)       # The second parenthesized subgroup.

'hackerrank'

In [23]:
x7_1.group(3)       # The third parenthesized subgroup.

'com'

In [24]:
x7_1.group(1,2,3)   # Multiple arguments give us a tuple.

('username', 'hackerrank', 'com')

## Example

In [25]:
s = "MohammedHacherrankTook"
ex = re.search(r'([a-zA-Z0-9])\1', s)
print(ex.group(0)[0])

m


### A groups() expression returns a tuple containing all the subgroups of the match.

In [26]:
x7_1.groups()

('username', 'hackerrank', 'com')

### groupdict()
### Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name

In [27]:
x7_2 = re.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", "Malcolm Reynolds")
x7_2.groupdict()

{'first_name': 'Malcolm', 'last_name': 'Reynolds'}

## The split() Function
### returns a list where the string has been split at each match:

In [28]:
x8 = re.split("\s", txt)
print(x8)

['The', 'rain', 'in', 'Spain']


### Split the string only at the first occurrence:

In [29]:
x9 = re.split("\s", txt, 1)
print(x9)

['The', 'rain in Spain']


## The sub() Function
### replaces the matches with the text of your choice:

In [30]:
x10 = re.sub("\s", "9", txt)
print(x10)

The9rain9in9Spain


### You can control the number of replacements by specifying the count parameter:

In [31]:
x11 = re.sub("\s", "9", txt, 2)
print(x11)

The9rain9in Spain


## The Match() Function
### checks for a match only at the beginning of the string:

In [32]:
x12 = re.match("c", "abcdef")
x12

In [33]:
print(x12)

None


In [34]:
x13 = re.match("c", "cdef")
x13

<re.Match object; span=(0, 1), match='c'>