## What are regular expressions

In [None]:
print('Hello')

Hello


In [None]:
filenames = ['nov-12.txt', 'november-14.txt', 'Oct-17.txt', 'Nov-22.txt']

## Regular Expression Example

In [None]:
text = 'Hi there you here exa_mple@example.com @blabla some more text here and there another@example.de'

In [None]:
import re

pattern = re.compile("[^ ]+@[^ ]+.[a-z]+")
matches = pattern.findall(text)
matches

['exa_mple@example.com', 'another@example.de']

## Meta characters

In [None]:
.        Matches any single character
\        Escapes one of the meta characters to treat it as a regular character
[...]    Matches a single character or a range that is contained within brackets
         _- -_ order does not matter but without brackets order does matter
+        Matches the preeceding element one or more times
?        Matches the preeceding pattern element zero or one time
*        Matches the preeceding element zero or more times
{m,n}    Matches the preeceding element at least m and not more than n times
^        Matches the beginning of a line or string
$        Matches the end of a line or string
[^...]   Matches a single character or a range that is not contained within the brackets
?:...|..."Or" operator
()       Matches an optional expression

In [None]:
text = 'Hi there you here exa_mple@example.com @blabla.com some more text here and there another@example.de another@exampl.ne'

import re

pattern = re.compile("[^ ]+@[^ ]+\.(?:com|de)+")
matches = pattern.findall(text)
matches


['exa_mple@example.com', 'another@example.de']

## Extract URLs from Text

In [None]:
with open('urls.txt', 'r') as file:
    content = file.read()

print(content)

http://google.com
https://example.com
http://www.wikipedia.com
http://pythonhow.com
https://python.org


In [None]:
import re

pattern = re.compile("https?://(?:www.)?[^ \n]+\.com")
matches = pattern.findall(content)
matches

['http://google.com',
 'https://example.com',
 'http://www.wikipedia.com',
 'http://pythonhow.com']

## Extract IP addresses using regex

In [None]:
with open('ips.txt', 'r') as file:
    content = file.read()
print(content)

912.131.120.111
912.131.134.000
912.131.129.129


In [None]:
import re

pattern = re.compile("[0-9]{3}\.[0-9]{3}\.12[0-9]{1}\.[0-9]{3}")
matches = pattern.findall(content)
matches

['912.131.120.111', '912.131.129.129']

## Filter filenames

In [None]:
from pathlib import Path 

root_dir = Path('files')
filenames = root_dir.iterdir()
filenames_str = [filename.name for filename in filenames]
filenames_str

['Nov-12.txt',
 'billy_Nov-13.txt',
 'november-14.txt',
 'nov-20.txt',
 'Nov-22.txt',
 'November-24.txt',
 'Oct-17.txt']

In [None]:
import re

pattern = re.compile("nov[a-z]*-(?:[1-9]|1[0-9]|20).txt", re.IGNORECASE)
matches = [filename for filename in filenames_str if pattern.findall(filename)]
matches

['Nov-12.txt', 'billy_Nov-13.txt', 'november-14.txt', 'nov-20.txt']

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=e50798d4-8475-4246-be17-3e2d4aac3759' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>