# Regex

In [1]:
import re

In [2]:
txt = 'Inside a Room'

In [3]:
re.search('Room',txt)

<re.Match object; span=(9, 13), match='Room'>

In [4]:
match = re.search('Room',txt)

In [5]:
match.end()

13

In [6]:
match.start()

9

In [12]:
match.span()

(9, 13)

In [23]:
new_txt = 'Rooming Inside A Room'

In [24]:
re.findall('Room',new_txt)

['Room', 'Room']

In [25]:
finds = re.findall('Room',new_txt)

In [26]:
finds

['Room', 'Room']

In [27]:
finds.count('Room')

2

In [29]:
value = re.fullmatch('Room',new_txt)

In [30]:
mail = 'example@gmail.com'

In [32]:
value = re.split('@',mail)

In [33]:
value

['example', 'gmail.com']

In [39]:
tex = 'Room of a Room of a Rooming'

In [40]:
re.finditer('Room',tex)

<callable_iterator at 0x1db2a669e08>

In [45]:
for match in re.finditer('Room',tex):
    print(match.group())

Room
Room
Room


# Part-2 - Special Pattern Codes.

![image.png](attachment:image.png)

In [47]:
text = '123-456-7890'

In [49]:
result = re.search('\d\d\d-\d\d\d\-\d\d\d\d',text)

In [50]:
result

<re.Match object; span=(0, 12), match='123-456-7890'>

![image.png](attachment:image.png)

In [53]:
result1 = re.search(r'\d{3}-\d{3}-\d{4}',text) # Use 'r' to indicate the regex notation here, else it might treat it as escape characters

In [54]:
result1

<re.Match object; span=(0, 12), match='123-456-7890'>

In [55]:
result1.group()

'123-456-7890'

In [57]:
result1.group(0)

'123-456-7890'

In [58]:
type(result1.group(0))

str

#### To get a subset of the phone number..
- Use the compiler to compile together different regex expression pattern codes.

In [68]:
compiler = re.compile(r'(\d{3})-(\d{3})-(\d{4})') # Use brackets ( ) to seperate multiple regex.

In [69]:
result2 = re.search(compiler,text)

In [70]:
result2

<re.Match object; span=(0, 12), match='123-456-7890'>

In [71]:
result2.group()

'123-456-7890'

In [72]:
result2.group(0)

'123-456-7890'

In [73]:
result2.group(1)

'123'

# Part-3 - Additional Regex Syntax

### OR

In [75]:
re.search(r'cat','The cat is here')

<re.Match object; span=(4, 7), match='cat'>

In [76]:
re.search(r'dog','The cat is here')

In [77]:
# There are no matches. If we want to put an either or OR condition.
re.search(r'cat|dog','The cat is here')

<re.Match object; span=(4, 7), match='cat'>

In [78]:
re.search(r'cat|dog','The dog is here')

<re.Match object; span=(4, 7), match='dog'>

### Wild Card Operator

In [79]:
re.findall(r'at','The cat in the hat sat there.')

['at', 'at', 'at']

In [80]:
re.findall(r'.at','The cat in the hat sat there.')

['cat', 'hat', 'sat']

In [81]:
re.findall(r'.at','The cat in the hat sat there and splat.')

['cat', 'hat', 'sat', 'lat']

In [83]:
# If we use 2 dots, 2 letters will be considered
re.findall(r'...at','The cat in the hat sat there and splat.')

['e cat', 'e hat', 'splat']

#### Starts With & Ends With

In [85]:
# Starts With
re.findall(r'^6','6 is the number')

['6']

In [87]:
# Ends With
re.findall(r'6$','The number is 6')

['6']

In [88]:
# Starts With
re.findall(r'^\d','6 is the number')

['6']

In [89]:
# Starts With
re.findall(r'^\d','77 is the number')

['7']

In [92]:
''.join(re.findall(r'\D','there are 3 numbers 34 inside this 5 sentence.'))

'there are  numbers  inside this  sentence.'

In [110]:
phrase = 'there are 3 numbers 34 inside this 5 sentence.'

In [111]:
pattern =r'[\d]'

In [112]:
re.findall(pattern,phrase)

['3', '3', '4', '5']

In [116]:
pattern =r'[\d]+'  # With a plus at last
re.findall(pattern,phrase)

['3', '34', '5']

In [117]:
pattern =r'[^\d]+'  # With ^, would exclude the numbers
re.findall(pattern,phrase)

['there are ', ' numbers ', ' inside this ', ' sentence.']

In [121]:
pattern =r'[^\d]'  # Without a plus at last
re.findall(pattern,phrase)

['t',
 'h',
 'e',
 'r',
 'e',
 ' ',
 'a',
 'r',
 'e',
 ' ',
 ' ',
 'n',
 'u',
 'm',
 'b',
 'e',
 'r',
 's',
 ' ',
 ' ',
 'i',
 'n',
 's',
 'i',
 'd',
 'e',
 ' ',
 't',
 'h',
 'i',
 's',
 ' ',
 ' ',
 's',
 'e',
 'n',
 't',
 'e',
 'n',
 'c',
 'e',
 '.']

In [123]:
pattern =r'[\d]+'  # With a plus at last
match = re.search(pattern,phrase)

In [124]:
match.group() # Picks only the first one.

'3'

In [154]:
# Common use case is to get rid of punctuation from a sentence
phrase = 'This is a string! But it has punctuation. How can we remove it?'
pattern =  r'[^!.?]+' # I learnt here that '^' represents doesn't include.

In [155]:
re.findall(pattern,phrase)

['This is a string', ' But it has punctuation', ' How can we remove it']

In [156]:
pattern =  r'[^!. ?]+' # Remove whitespace too.
re.findall(pattern,phrase)

['This',
 'is',
 'a',
 'string',
 'But',
 'it',
 'has',
 'punctuation',
 'How',
 'can',
 'we',
 'remove',
 'it']

#### Trying to understand the use of [   ]  with and without ^

In [142]:
# Starts With
re.findall(r'[^\d]','6 is the num6ber')

[' ', 'i', 's', ' ', 't', 'h', 'e', ' ', 'n', 'u', 'm', 'b', 'e', 'r']

In [139]:
# Starts With
re.findall(r'[\d]','6 is the num6ber')

['6', '6']

In [145]:
re.findall(r'^\d','6 is the num6ber')

['6']

In [137]:
re.findall(r'\d','6 is the num6ber')

['6', '6']