# Regex Examples

In [1]:
import re

In [54]:
pattern_ignore_case=re.compile("[a-z]*",re.I)   #re.I is used to ignore the case

In [55]:
pattern_ignore_case.findall("Hello kusumakar how are you")

['Hello', '', 'kusumakar', '', 'how', '', 'are', '', 'you', '']

## Ignoring new lines

In [56]:
pattern_ignore_new_line = re.compile("[a-z]+",re.S)

In [58]:
pattern_ignore_new_line.findall("hello kusumakar  whats up")

['hello', 'kusumakar', 'whats', 'up']

## Date Logic

In [42]:
pattern_date = re.compile("\w+\W\w+\W\w+\s*")

In [43]:
pattern_date.findall("2020-20-12 20/20/2020 ")

['2020-20-12 ', '20/20/2020 ']

## re.DOTALL or re.S
### (Dot.) In the default mode, this matches any character except a newline. If the DOTALL flag has been specified, this matches any character including a newline.

## re.LOCALE or re.L
### re.LOCALE to make \w match all characters that are considered letters given the current locale settings

In [72]:
patter_locale=re.compile("\w*")

In [75]:
patter_locale.findall("hello kusumakar how are you",re.LOCALE)

['o', '', 'kusumakar', '', 'how', '', 'are', '', 'you', '']

# re.Verbose
##  This flag allows you to write regular expressions that look nicer and are more readable by allowing you to visually separate logical sections of the pattern and add comments.

In [165]:
verbose_pattern = re.compile("""
[A|The]+[\w\s]+
\.

""",re.VERBOSE|re.IGNORECASE|re.S)

In [166]:
verbose_pattern.findall("A cat that was running.That cat has fallen into the river.")

['A cat that was running.', 'That cat has fallen into the river.']

# Grouping

## Grouping is a powerful operation that allows operations such as :
- Creating sub expressions to apply quantifiers
- Limiting the scope of alternation
- Extracting information from the matched text
- Using extracted information in the regex

### Grouping is achieved by the using ( ). Pattern written inside ( ) is treated as 1 unit.

#### i.  Regular expression for ababababc

In [173]:
pattern1=re.compile("(ab)+c")

In [176]:
pattern1.search("abababababc")

<re.Match object; span=(0, 11), match='abababababc'>

## Capturing
### Another important feature in grouping. Groups capture the matched pattern which helps in using in other operations such as sub or in regex itself.

In [210]:
pattern1=re.compile(r"(ab)+(cd)+(ef)+")

In [211]:
it=pattern1.finditer(r"ababcdefefef")

In [213]:
groups=next(it)

In [217]:
print ( groups.group(1), groups.group(2), groups.group(3))

ab cd ef


# Named groups
## Syntax : (?P < name > pattern)

In [386]:
patter=re.compile("(?P<first>\w+)-(?P<second>\w+)")

In [387]:
match=patter.search("hello-world")

In [388]:
match.group('first')

'hello'

In [389]:
match.group('second')

'world'

In [420]:
logfile = "VolDis_log_23-10-2020.log"

In [428]:
patt= re.compile("(?P<file>\w+_+\w+)_(?P<date>\d+\W+\d+\W+\d+).log")

In [429]:
match=patt.search(logfile)

In [430]:
match.group('file')

'VolDis_log'

In [431]:
match.group('date')

'23-10-2020'

In [434]:
new_file= "Voldis_log_24-10-2020.log"

In [437]:
patt.sub("\g<date>-\g<file>",new_file)

'24-10-2020-Voldis_log'

# Atomic Groups
## These are special groups in regex module. They are designed to improve performance because when regex engine fails to match, it doesnt keep trying with every character in the data