- Search finds the first occurrence and returns a versatile Match object
- Findall/ Replace => easy
- FindIter to go through all the matches.
    - Can be combined with Group "()" to process each match

In [20]:
import re
txt = "The rain in Spain1 and Spain2 Spain9"

## _Search_ only find the first match and extract it, returning a Match object

In [21]:
x = re.search(" Spain[0-9]", txt)
print(x)
print(x.span(), x.group())
print(x.start(), x.end(), x.string)

<re.Match object; span=(11, 18), match=' Spain1'>
(11, 18)  Spain1
11 18 The rain in Spain1 and Spain2 Spain9


## _Findall_ returns all matches

In [22]:
re.findall(" Spain[0-9]", txt)

[' Spain1', ' Spain2', ' Spain9']

## Replace

In [24]:
txt = "The rain in Spain"
re.sub("\s", "9", txt)

'The9rain9in9Spain'

## Using group
https://pynative.com/python-regex-capturing-groups/

In [34]:
target_string = "The price of PINEAPPLE ice cream is 20"
# Match upper case word => [A-Z]+
# Match the price => [\d]+
# \b = word boundary
result = re.search(r"(\b[A-Z]+\b).+(\b\d+)", target_string)
print(result.groups())
for i in range(3):
    print(i, result.group(i))
print(re.findall(r"(\b[A-Z]+\b).+(\b\d+)", target_string))

('PINEAPPLE', '20')
0 PINEAPPLE ice cream is 20
1 PINEAPPLE
2 20
[('PINEAPPLE', '20')]


In [30]:
target_string = "The price of PINEAPPLE ice cream is 20 but CHOCOLATE is 30"
# Match upper case word => [A-Z]+
# Match the price => [\d]+
result = re.search(r"(\b[A-Z]+\b).+(\b\d+)", target_string)
print(result.groups())
re.findall(r"(\b[A-Z]+\b).+(\b\d+)", target_string)

('PINEAPPLE', '30')


[('PINEAPPLE', '30')]

## Use Find iter to return all matches as Match object

In [54]:
target_string = "The price of ice-creams PINEAPPLE 20 MANGO 30 CHOCOLATE 40 some ending words"

# two groups enclosed in separate ( and ) bracket
# group 1: find all uppercase letter
# group 2: find all numbers
# you can compile a pattern or directly pass to the finditer() method
pattern = re.compile(r"(\b[A-Z]+\b).(\b\d+\b)")

# find all matches to groups
for i, match in enumerate(pattern.finditer(target_string)):
    print(i, match.group(1), match.group(2))
    print(match)


0 PINEAPPLE 20
<re.Match object; span=(24, 36), match='PINEAPPLE 20'>
1 MANGO 30
<re.Match object; span=(37, 45), match='MANGO 30'>
2 CHOCOLATE 40
<re.Match object; span=(46, 58), match='CHOCOLATE 40'>


### Attempt to process each match separately

In [66]:
def process_group(match):
    return list(match.group(1)[:2].lower()) + list(str(int(match.group(2)) + 5))
    
list_match = list(pattern.finditer(target_string))
last_end =  0
tmp = []
for i, match in enumerate(list_match):
    tmp_str = target_string[last_end:match.start()]
    tmp = tmp + list(tmp_str)
    print(i, tmp_str, last_end, match.start())
    
    last_end = match.end()
    tmp = tmp + process_group(match)

tmp = tmp + list(target_string[match.end():])
print("Original string: " + target_string)
print("Processed string: "+''.join(tmp))
        

0 The price of ice-creams  0 24
1   36 37
2   45 46
Original string: The price of ice-creams PINEAPPLE 20 MANGO 30 CHOCOLATE 40 some ending words
Processed string: The price of ice-creams pi25 ma35 ch45 some ending words
