## Regular Expressions

Regular expressions are essential part of Python. It is a tiny, highly specialized programming language embedded inside Python and made available through the <code>re</code> module. Regular expressions serve for the typical problem of search and replace.

### Import module

In [392]:
import re

### Functions

### Basic Features

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>.</code></td>
    <td>any character</td>
    <td>Matches any character except the newline <code>\n</code>.</td>
  </tr>
</table>

In [393]:
string = 'gray, grey, grAy, gr4y, gr@y, gr\ny grep'

re.findall('gr.y', string)

['gray', 'grey', 'grAy', 'gr4y', 'gr@y']

In [394]:
re.findall('gr..', string)

['gray', 'grey', 'grAy', 'gr4y', 'gr@y', 'grep']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>x|y</code></td>
    <td>alternation</td>
    <td>Matches either <code>x</code> or <code>y</code>.</td>
  </tr>
</table>

In [396]:
string = 'colour and color are the same'

# check if the string containes either 'color' or 'colour' 
re.findall('colour|color', string)

['colour', 'color']

Alternation is eager.

In [397]:
# alternation returns the first alternative that matches
re.findall('a|ab', 'ab')

['a']

### Anchors

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>^x (or \Ax) </code></td>
    <td>string start</td>
    <td>String starts with <code>x</code>.</td>
  </tr>
  <tr>
    <td><code>^x</code></td>
    <td>line start (optional)</td>
    <td>Line starts with <code>x</code>. Option?</td>
  </tr>
</table>

In [398]:
string = 'python is awesome and easy to learn'

# find the first word from the string
re.findall('^\w+', string)

['python']

In [399]:
# same but with \A
re.findall('\A\w+', string)

['python']

In [400]:
string = '   hi awesomepy!'

# left trim. Replace one or more whitespaces at the beginning of the line.
string = re.sub('^\s+', '', string)
string

'hi awesomepy!'

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>x$ (or x\Z)</code></td>
    <td>string end</td>
    <td>String ends with <code>x</code>.</td>
  </tr>
  <tr>
    <td><code>x$</code></td>
    <td>line end (optional)</td>
    <td>Line ends with <code>x</code>. Option?</td>
  </tr>
</table>

In [401]:
string = 'python is awesome and easy to learn'

# find the last word from the string
re.findall('\w+$', string)

['learn']

In [402]:
# same but with \Z
re.findall('\w+\Z', string)

['learn']

In [403]:
string = 'hi awesomepy!     '

# right trim. Replace one or more whitespaces at the end of the line.
string = re.sub('\s+$', '', string)
string

'hi awesomepy!'

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>\bword\b (also \bx or x\b)</code></td>
    <td>word boundaries</td>
    <td>Matches a <code>word</code> that has word boundaries, i.e. whitespaces or special characters.</td>
  </tr>
  <tr>
    <td><code>\Bword\B (also \Bx or x\B)</code></td>
    <td>non-word boundaries</td>
    <td>Matches a <code>word</code> that has non-word boundaries, i.e. literals, digits or underscores.</td>
  </tr>
  <tr>
    <td><code>\bword\B (or \Bword\b)</code></td>
    <td>mixed boundaries</td>
    <td>Matches a <code>word</code> that has mixed boundaries.</td>
  </tr>
</table>

In [404]:
string = 'is is is4 1is is% $is iiss 1is2 1is_ 4is& $is% is\n'

# find all the 'is'(s) that are surrounded with non-word characters and show their surroundings.
re.findall(r'.\bis\b.', string) #

[' is ', ' is%', '$is ', '$is%']

In [405]:
# the same but surrounded with word characters.
re.findall(r'.\Bis\B.', string)

['iiss', '1is2', '1is_']

In [406]:
# now surrounded with non-word and word characters.
re.findall(r'.\bis\B.', string)

[' is4']

In [407]:
# raplace the boundaries.
re.findall(r'.\Bis\b.', string)

['1is ', '4is&']

### Quantifiers

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>x?</code></td>
    <td>optional character</td>
    <td>The character <code>x</code> is optional.</td>
  </tr>
  <tr>
    <td><code>x??</code></td>
    <td>optional character (lazy)</td>
    <td>The character x is optional. Lazy, so the x is excluded in the match if possible.</td>
  </tr>
</table>

In [408]:
string = "ab abc bc ca"
re.findall('abc?', string)

['ab', 'abc']

In [409]:
re.findall('abc??', string)

['ab', 'ab']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>x*</code></td>
    <td>0 or more</td>
    <td>Zero or more occurrences of <code>x</code>.</td>
  </tr>
  <tr>
    <td><code>x*?</code></td>
    <td>0 or more (lazy)</td>
    <td>Zero or more occurrences of <code>x</code>, but as few times as possible</td>
  </tr>
</table>

In [410]:
string = '10, 101, 102, 1022, 102222, 1010, 112'

# check if the string contains "10" followed by 0 or more "2" digits:
re.findall('102*', string)

['10', '10', '102', '1022', '102222', '10', '10']

In [411]:
string = '10, 101, 102, 1022, 102222, 1010, 112'

# check if the string contains "10" followed by 0 or more "2" digits:
re.findall('102*?', string)

['10', '10', '10', '10', '10', '10', '10']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>x+</code></td>
    <td>1 or more</td>
    <td>One or more occurrences of <code>x</code>.</td>
  </tr>
  <tr>
    <td><code>x+?</code></td>
    <td>1 or more (lazy)</td>
    <td>One or more occurrences of <code>x</code>, but as few times as possible.</td>
  </tr>
</table>

In [412]:
string = '10, 101, 102, 1022, 102222, 1010, 201'

# now check if the same string as above contains "10" followed by one or more "2" digits:
re.findall('102+', string)

['102', '1022', '102222']

In [413]:
string = '10, 101, 102, 1022, 102222, 1010, 201'

# now check if the same string as above contains "10" followed by one or more "2" digits:
re.findall('102+?', string)

['102', '102', '102']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>x{n}</code></td>
    <td>fixed</td>
    <td>Exactly <code>n</code> occurrences of <code>x</code>.</td>
  </tr>
  <tr>
    <td><code>x{n,}</code></td>
    <td><code>n</code> or more</td>
    <td><code>n</code> or more occurrences of <code>x</code>.</td>
  </tr>
  <tr>
    <td><code>x{,m}</code></td>
    <td>0 through <code>n</code></td>
    <td>From zero to <code>m</code> occurrences of <code>x</code>.</td>
  </tr>
  <tr>
    <td><code>x{n,m}</code></td>
    <td><code>n</code> through <code>m</code></td>
    <td>From <code>n</code> to <code>m</code> occurrences of <code>x</code>.</td>
  </tr>
</table>

In [414]:
string = 'python pyython pyyyyython'
re.findall('py{1}thon', string)

['python']

In [415]:
re.findall('py{1,}thon', string)

['python', 'pyython', 'pyyyyython']

In [416]:
re.findall('py{,2}thon', string)

['python', 'pyython']

In [417]:
re.findall('py{1,2}thon', string)

['python', 'pyython']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>x{n,}?</code></td>
    <td><code>n</code> or more (lazy)</td>
    <td><code>n</code> or more occurrences of <code>x</code>, but as few times as possible.</td>
  </tr>
  <tr>
    <td><code>x{,m}?</code></td>
    <td>0 through <code>n</code> (lazy)</td>
    <td>From zero to <code>m</code> occurrences of <code>x</code>, but as few times as possible.</td>
  </tr>
  <tr>
    <td><code>x{n,m}?</code></td>
    <td><code>n</code> through <code>m (lazy)</code></td>
    <td>From <code>n</code> to <code>m</code> occurrences of <code>x</code>, but as few times as possible.</td>
  </tr>
</table>

In [418]:
string = 'py pyyy pyyyyyy'

regex = re.compile('y{3,6}?')
re.findall(regex, string) # py p/yyy/ p/yyy//yyy/

['yyy', 'yyy', 'yyy']

In [419]:
string = 'py pyyy pyyyyyy'

regex = re.compile('y{3,}?')
re.findall(regex, string) # py p/yyy/ p/yyy//yyy/

['yyy', 'yyy', 'yyy']

In [420]:
string = 'py pyyy pyyyyyy'

regex = re.compile('y{,3}?')
re.findall(regex, string)

['',
 '',
 'y',
 '',
 '',
 '',
 'y',
 '',
 'y',
 '',
 'y',
 '',
 '',
 '',
 'y',
 '',
 'y',
 '',
 'y',
 '',
 'y',
 '',
 'y',
 '',
 'y',
 '']

### Sets and Ranges

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>[xyz]</code></td>
    <td>set</td>
    <td>Matches where one of the specified characters (<code>x</code>, <code>y</code>, or <code>z</code>) are present. Any character except <code>^-]\</code> can be used. </td>
  </tr>
  <tr>
    <td><code>[x-z]</code></td>
    <td>range</td>
    <td>Matches where one of the specified characters (from <code>x</code> to <code>z</code>) are present. Any character except <code>^-]\</code> can be used. </td>
  </tr>
</table>

In [421]:
string = 'x-axis, y-axis and z-axis'

# find all three axes
re.findall('[xyz]-axis', string)

['x-axis', 'y-axis', 'z-axis']

In [422]:
# same result but using the range from x to z.
re.findall('[x-z]-axis', string)

['x-axis', 'y-axis', 'z-axis']

In [423]:
# show how to escape special and reserved characters
string = 'char=x char=y char=z char=^ char=[ char=] char=- char=" char=§ char=$ char=\\ char=* char=( char=)'

# find all special characters.
re.findall('char=[\^\[\]\-\"§$\\\*()]', string)

['char=^',
 'char=[',
 'char=]',
 'char=-',
 'char="',
 'char=§',
 'char=$',
 'char=\\',
 'char=*',
 'char=(',
 'char=)']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>[^xyz]</code></td>
    <td>negated set</td>
    <td>Matches for any character EXCEPT <code>x</code>, <code>y</code>, and <code>z</code>. Any character except <code>^-]\</code> can be used.</td>
  </tr>
  <tr>
    <td><code>[^x-z]</code></td>
    <td>negated range</td>
    <td>Matches for any character EXCEPT characters from <code>x</code> to <code>z</code>. Any character except <code>^-]\</code> can be used.</td>
  </tr>
</table>

In [424]:
string = 'x-axis, y-axis and z-axis'

# find all axes except z.
re.findall('[^z]-axis', string)

['x-axis', 'y-axis']

In [425]:
# all the axis except the ones from x to z.
re.findall('[^x-z]-axis', string)

[]

In [426]:
# show how to escape special and reserved characters
string = 'char=x char=y char=z char=^ char=[ char=] char=- char=" char=§ char=$ char=\\ char=* char=( char=)'

# find all except the special ones.
re.findall('char=[^\^\[\]\-\"§$\\\*()]', string)

['char=x', 'char=y', 'char=z']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>[123]</code></td>
    <td>set</td>
      <td>Matches where any of the specified digits (<code>1</code>, <code>2</code>, or <code>3</code>) are present.</td>
  </tr>
  <tr>
    <td><code>[1-3]</code></td>
    <td>range</td>
    <td>Matches where any of the specified digits from <code>1</code> to <code>3</code> are present.</td>
  </tr>
</table>

In [427]:
string = 'x1 x2 x3 x4 x5'
re.findall('x[123]', string)

['x1', 'x2', 'x3']

In [428]:
re.findall('x[1-3]', string) # the same but as a range

['x1', 'x2', 'x3']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>[^123]</code></td>
    <td>set</td>
      <td>Matches any digits EXCEPT <code>1</code>, <code>2</code>, and <code>3</code>.</td>
  </tr>
  <tr>
    <td><code>[^1-3]</code></td>
    <td>range</td>
    <td>Matches any digits EXCEPT digits from <code>1</code> to <code>3</code>.</td>
  </tr>
</table>

In [429]:
re.findall('x[^123]', string)

['x4', 'x5']

In [430]:
re.findall('x[^1-3]', string) # the same but as a range

['x4', 'x5']

### Shorthands

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>\w</code></td>
    <td>word character</td>
    <td>Matches a word character, including digits and the underscore. This is equivalent to <code>[a-zA-Z0-9_]</code>.</td>
  </tr>
</table>

In [433]:
string = 'column1\tcolumn2\n'

# the same as above. Find names of colums using only \w. The tab \t and the newline \n are ignored.
re.findall('\w+', string)

['column1', 'column2']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>\W</code></td>
    <td>non-word character</td>
    <td>Matches a non-word character. This is equivalent to <code>[^a-zA-Z0-9_]</code>.</td>
  </tr>
</table>

In [434]:
# Now only tab and newline are found.
re.findall('\W+', string)

['\t', '\n']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>\d</code></td>
    <td>digit</td>
    <td>Matches a digit. This is equivalent to <code>[0-9]</code>.</td>
  </tr>
</table>

In [435]:
string = 'gravitational constant: 6.67408e-11 m^3 kg^-1 s^-2'

# Find value of the constant.
re.findall('\d+.\d+e-?\d+', string)

['6.67408e-11']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>\D</code></td>
    <td>non-digit</td>
    <td>Matches a non-digit. This is equivalent to <code>[^0-9]</code>.</td>
  </tr>
</table>

In [436]:
string = 'gravitational constant: 6.67408e-11 m^3 kg^-1 s^-2'

# Find all non-digits.
re.findall('\D+', string)

['gravitational constant: ', '.', 'e-', ' m^', ' kg^-', ' s^-']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>\s</code></td>
    <td>whitespace</td>
    <td>Matches a whitespace. This is equivalent to <code>[ \t\n\r\f\v]</code>.</td>
  </tr>
</table>

In [437]:
string = "   a sting with a neccessary trimming     "

# Select trimmed part of the string. 
re.findall('^[\s]*(.*?)[\s]*$', string)

['a sting with a neccessary trimming']

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>\S</code></td>
    <td>non-whitespace</td>
    <td>Matches a non-whitespace. This is equivalent to <code>[^ \t\n\r\f\v]</code>.</td>
  </tr>
</table>

In [438]:
string = 'column1 \t column2 \n'

# Find all "non-whitespaces".
re.findall('\S+', string)

['column1', 'column2']

### Parentheses

#### (x)
Capture and group

In [441]:
string = 'awesomepy.com'

# find and repalace 'com' to edu
re.sub('com', 'edu', string)

'awesomepy.edu'

In [442]:
string = 'awesomepy.com'

# now we keep 'awesomepy' and concatenate it with '.edu', using group 1
re.sub('(awesomepy).com', r'\1.edu', string)

'awesomepy.edu'

#### (?aiLmsux)x
Matching using inline flags.
 - a - ASCII-only matching
 - i - ignore case
 - L - locale dependent
 - m - multi-line
 - s - dot matches all
 - u - Unicode matching
 - x - verbose

a - ASCII-only matching ?????????????

In [443]:
string = 'awesomepy.cöm'
ascii_string = ascii(string)
print(ascii_string)

'awesomepy.c\xf6m'


In [444]:
# find and replace 'cöm' to 'com' using the normal way
re.sub('(?:(cöm))', 'com', string)

'awesomepy.com'

In [445]:
# find and replace 'cöm' to 'com' using ASCII-code instead
re.sub('(?a:(c\xf6m))', 'com', string)

'awesomepy.com'

In [446]:
# same without 'a' ????????
re.sub('(?:(c\xf6m))', 'com', string)

'awesomepy.com'

i - ignore case

In [447]:
string = 'awesomepy.com aWesomepy.com'

re.sub('(?i:(awesomepy)\.com)', r'\1.edu', string)

'awesomepy.edu aWesomepy.edu'

x - verbose

In [448]:
string = "electron mass: 9.10938356e-31 kg"

a = re.compile(r"""\d +  # the integral part
                   \.    # the decimal point
                   \d *  # some fractional digits""", re.X)

re.findall(a, string)

['9.10938356']

### Special Groups
They returning only the result: match or no match. They do not consume characters in the string, but only assert whether a match is possible or not. Lookaround allows to create regular expressions that are impossible to create without them.

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>x(?=y)</code></td>
    <td>Positive lookahead</td>
    <td>Matches a <code>x</code> that is followed by a <code>y</code>, without making the <code>y</code> part of the match.</td>
  </tr>
</table>

In [449]:
string = "streets"

# matches the second t in streets.
patt = re.compile('t(?=s)')
re.sub(patt, '*', string)

'stree*s'

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>x(?!y)</code></td>
    <td>Negative lookahead</td>
    <td>Matches a <code>x</code> that is NOT followed by a <code>y</code>, without making the <code>y</code> part of them.</td>
  </tr>
</table>

In [450]:
string = "streets"

# matches the first t in streets.
patt = re.compile('t(?!s)')
re.sub(patt, '*', string)

's*reets'

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>(?<=y)x</code></td>
    <td>Positive lookbehind</td>
    <td>Same as positive lookahead, but works backwards.</td>
  </tr>
</table>

In [451]:
string = 'streets'

# matches the first t in streets.
patt = re.compile('(?<=s)t')
re.sub(patt, '*', string)

's*reets'

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>(?<&#33;y)x</code></td>
    <td>Negative lookbehind</td>
    <td>Same as negative lookahead, but works backwards.</td>
  </tr>
</table>

In [452]:
string = 'streets'

# matches the second t in streets.
patt = re.compile('(?<!s)t')
re.sub(patt, '*', string)

'stree*s'

### Control Characters

<table class="w3-section w3-table w3-bordered">
  <tr>
    <th>Syntax</th>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>\t</code></td>
    <td>tab</td>
    <td>Matches a tab character (ASCII 0x09).</td>
  </tr>
  <tr>
    <td><code>\n</code></td>
    <td>newline</td>
    <td>Matches a new line character (ASCII 0x0A).</td>
  </tr>
  <tr>
    <td><code>\r</code></td>
    <td>carriage return</td>
    <td>Matches a carriage return character (ASCII 0x0D).</td>
  </tr>
  <tr>
    <td><code>\f</code></td>
    <td>form feed</td>
    <td>Matches a form feed character (ASCII 0x0C).</td>
  </tr>
  <tr>
    <td><code>\v</code></td>
    <td>vertical tab</td>
    <td>Matches a vertical tab character (ASCII 0x0B).</td>
  </tr>
  <tr>
    <td><code>\a</code></td>
    <td>alert control character</td>
    <td>Matches "alert" or "bell" control character (ASCII 0x07).</td>
  </tr>
  <tr>
    <td><code>\b</code></td>
    <td>backspace control character</td>
    <td>Matches "backspace" control character(ASCII 0x08).</td>
  </tr>
</table>

In [508]:
string = 'tab:\t, cariage return:\r, form feed:\f, vertical tab:\v newline:\n'

# find all names of the escape characters.
re.findall(r'\w+:(?=[\t\r\f\v\n])', string)

['tab:', 'return:', 'feed:', 'tab:', 'newline:']

## Cookbook

### Find IPv4 Address

In [453]:
strings = ['192.168.0.1',
           '255.255.255.0',
           '0.0.0.0',
           '127.0.0.1',
           '256.0.0.1']

for s in strings:
    print(re.search('^(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$', s))

<re.Match object; span=(0, 11), match='192.168.0.1'>
<re.Match object; span=(0, 13), match='255.255.255.0'>
<re.Match object; span=(0, 7), match='0.0.0.0'>
<re.Match object; span=(0, 9), match='127.0.0.1'>
None


### Password Validator

In [454]:
passwords = ['qwerty',
             'qwerty123',
             '12345678',
             'xbv5sFs_67TT',
             'AbCFfff&/FF678_']

patt = re.compile(r"""^                   # starts with
                      (?=.*[a-z])         # at least 1 lowercase alphabetical character
                      (?=.*[A-Z])         # at least 1 uppercase alphabetical character
                      (?=.*[0-9])         # at least 1 numeric character
                      (?=.*[!@#\$%\^&\*]) # at least 1 special character
                      (?=.{8,})           # at least 8 characters""", re.X)

for s in passwords:
    print(re.search(patt, s))

None
None
None
None
<re.Match object; span=(0, 0), match=''>


### Find URL (HTTP, HTTPS or FTP)

In [511]:
urls = ['example.com',
       'example.com/index.html',
       'not-valid-url@smt.com',
       'http://example.com',
       'ftp://example.com/contacts',
       'https://example.edu/send.php']

patt = re.compile(r'^(((https?|ftp):\/\/)?([\w\-\.])+(\.)([\w]){2,4}([\w\/+=%&_\.~?\-]*))*$')

for s in urls:
    print(re.search(patt, s))

<re.Match object; span=(0, 11), match='example.com'>
<re.Match object; span=(0, 22), match='example.com/index.html'>
None
<re.Match object; span=(0, 18), match='http://example.com'>
<re.Match object; span=(0, 26), match='ftp://example.com/contacts'>
<re.Match object; span=(0, 28), match='https://example.edu/send.php'>


### Find E-Mail Address

In [513]:
emails = ['name.surname@example.com',
         'name!notvalid@#redirect.it',
         'qwerty123445@sample.edu']

patt = re.compile(r'\b[\w.!#$%&’*+\/=?^`{|}~-]+@[\w-]+(?:\.[\w-]+)*\b')

for s in emails:
    print(re.search(patt, s))

<re.Match object; span=(0, 24), match='name.surname@example.com'>
None
<re.Match object; span=(0, 23), match='qwerty123445@sample.edu'>


### HTML File Validator

    <html>.*?<head>.*?<title>.*?</title>.*?</head>.*?<body[^>]*>.*?</body>.*?</html>

### Other Resources
- https://docs.python.org/3/library/re.html
- https://docs.python.org/3/howto/regex.html
- https://www.regular-expressions.info/
- https://www.w3schools.com/python/python_regex.asp