# NLP Basics: Learning how to use regular expressions

### Using regular expressions in Python

Python's `re` package is the most commonly used regex resource. More details can be found [here](https://docs.python.org/3/library/re.html).

In [None]:
import re

re_test = 'This is a made up string to test 2 different regex methods'
re_test_messy = 'This      is a made up     string to test 2    different regex methods'
re_test_messy1 = 'This-is-a-made/up.string*to>>>>test----2""""""different~regex-methods'

### Splitting a sentence into a list of words

In [None]:
# split by white space
re.split('\s', re_test)

In [None]:
re.split('\s', re_test_messy)

In [None]:
re.split('\s+', re_test_messy)

### Replacing a specific string

In [None]:
re.split('\s+', re_test_messy1)

In [None]:
# search for any non-word character
re.split('\W+', re_test_messy1)

In [None]:
# capitalize => flipping the search
# search for 1 or more NON-space characters
re.findall('\S+', re_test)

In [None]:
re.findall('\S+', re_test_messy)

In [None]:
re.findall('\w+', re_test_messy1)

In [None]:
pep8_test = 'I try to follow PEP8 guidelines'
pep7_test = 'I try to follow PEP7 guidelines'
peep8_test = 'I try to follow PEEP8 guidelines'

In [None]:
re.findall('[a-z]+', pep8_test)

In [None]:
# now it captures 'I' and 'PEP' but not 8
re.findall('[A-Z]+', pep8_test)

In [None]:
# search for letters or numbers
re.findall('[A-Z0-9]+', pep8_test)

In [None]:
# search for letters AND numbers
re.findall('[A-Z]+[0-9]+', pep8_test)

In [None]:
re.findall('[A-Z]+[0-9]+', pep7_test)

In [None]:
re.findall('[A-Z]+[0-9]+', peep8_test)

In [None]:
re.sub('[A-Z]+[0-9]+', 'Leah\'s PEP8 Python styleguide', pep8_test)

In [None]:
re.sub('[A-Z]+[0-9]+', 'Leah\'s PEP8 Python styleguide', pep7_test)

In [None]:
re.sub('[A-Z]+[0-9]+', 'Leah\'s PEP8 Python styleguide', peep8_test)

### Other examples of regex methods

- re.search()
- re.match()
- re.fullmatch()
- re.finditer()
- re.escape()