In [None]:
# Regular Expressions (Regex)

# Textual searching using a search pattern
# Checks if a string contains the specified pattern
# In Python, we use the re module

import re

In [None]:
# re methods
# re.findall() - returns a list of all matches
# re.search() - returns a Match object if there is a match
# re.split() returns a list where the string has been split at each match
# re.sub() replaces one or many matches in the string

txt = "Welcome to Skillstorm!"
match = re.search("^Welcome.*storm!$", txt)
match

<re.Match object; span=(0, 22), match='Welcome to Skillstorm!'>

In [None]:
# Regex Operators
# https://docs.python.org/3/library/re.html
# if you use an escape, make sure to make it a raw string with r"My String"

# \A or ^ - match at the beginning of the string
# \b - matches where characters are at the beginning or ending of a word
# \B - returns a match where characters are present not at the beginning or ending of a word
# \d - matches a digit
# \D - matches not a digit
# \s - matches a whitespace character
# \S - matches not whitespace
# \w - matches a-zA-Z0-9_
# \W - matches any non word character
# \Z or $ - matches at the end of the string

# Meta Characters
# [] - A set of characters
# . - any character
# ^ - starts with (if used in a set [] it becomes NOT)
# $ - ends with
# * - Zero or more of
# + - One or more of
# ? - Zero or one of
# {n} - matches exactly n occurences
# | - Either or
# () Capture and group

# So our last example we had:
# ^Welcome - String starts with 'Welcome'
# .* - Matches any number of characters
# storm!$ - String ends with 'storm!'

match = re.match(r"[\w]+", txt)
print(match)

match = re.findall(r"[\w]+", txt)
print(match)

<re.Match object; span=(0, 7), match='Welcome'>
['Welcome', 'to', 'Skillstorm']


In [None]:
# Look Behind and Look Ahead

# Some times we want to check for a pattern before or after our match
# For this we can use the Look-Behind and Look-Ahead operators
# (?<=abc) - matches a string that is preceded by 'abc'
# (?=abc) - matches a string that is followed by 'abc'

txt = 'Square: A plane figure with four equal straight sides and four right angles.'
matches = re.findall(r"(?<=: ).*", txt)
matches

['A plane figure with four equal straight sides and four right angles.']

In [1]:
# Getting started writing regular expressions
#
# It can be a bit complex at first.
# Fortunately we have some options.
# Common use cases have predefined patterns
# Emails - r'([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+'
# Or we can test our expressions with a tool
# https://regexr.com/

In [2]:
arr = [[]]*(5)
print(arr)

[[], [], [], [], []]


In [3]:
arr[3].append("at")

In [4]:
arr

[['at'], ['at'], ['at'], ['at'], ['at']]