##### Splitting Strings on Any of Multiple Delimiters

In [1]:
line = 'asdf fjdk; afed, fjek,asdf, foo'
line

'asdf fjdk; afed, fjek,asdf, foo'

In [8]:
import re
re.split(r';,', line)

['asdf fjdk; afed, fjek,asdf, foo']

In [10]:
re.split(r'[;,\s]', line)

['asdf', 'fjdk', '', 'afed', '', 'fjek', 'asdf', '', 'foo']

In [12]:
re.split(r'[;,\s]\s*', line)

['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

In [16]:
re.split(r'(;|,)', line)

['asdf fjdk', ';', ' afed', ',', ' fjek', ',', 'asdf', ',', ' foo']

##### check the start or end of a string for specific text patterns, such as filename extensions, URL schemes, and so on

In [19]:
import os
[f for f in os.listdir() if f.endswith('.ipynb')]

['Berkeley.ipynb', 'Last_N_items.ipynb', 'Strings.ipynb', 'Winery Data.ipynb']

In [20]:
### If there are any ipy notebook in current directory
any(f.endswith('.ipynb') for f in os.listdir())

True

In [25]:
from urllib.request import urlopen
def read_data(name):
    if name.startswith(('http:', 'https:', 'ftp:')):
        return urlopen(name).read()
    else:
        pass

read_data('http://www.bing.com/')



In [28]:
choices = ('http:', 'ftp:')
url = 'http://www.python.org'
url.startswith(choices)

True

In [31]:
import re
url = 'http://www.python.org'
re.match('http:|https:|ftp:', url)

<_sre.SRE_Match object; span=(0, 5), match='http:'>

##### Matching Strings Using Shell Wildcard Patterns

In [32]:
from fnmatch import fnmatch, fnmatchcase

fnmatch('foo.txt', '*.txt')

True

In [40]:
fnmatch('@1oo.txt', '??oo.txt')

True

In [47]:
fnmatch('Dat1235.txt', 'Dat[0-9]*.txt'), fnmatch('Dat1235.1txt', 'Dat[0-9]*.[0-9]txt')

(True, True)

In [49]:
addresses = [
'5412 N CLARK ST',
'1060 W ADDISON ST',
'1039 W GRANVILLE AVE',
'2122 N CLARK ST',
'4802 N BROADWAY',
]

[address for address in addresses if fnmatchcase(address, '10[0-9][0-9] *ST')]

['1060 W ADDISON ST']

##### Matching and Searching for Text Patterns

In [57]:
text1 = '11/27/2017'

import re
if re.match(r'\d+/\d+/\d+', text1):
    print('yes')
else:
    print('no')

yes


In [67]:
datepart = re.compile(r'(\d+)/(\d+)/(\d+)')
if datepart.match(text1):
    print("True")

True


In [68]:
m = datepart.match(text1)
m.group(0)
m.groups()

('11', '27', '2017')

In [69]:
m.group(1)

'11'

##### Searching and Replacing Case-Insensitive Text

In [72]:
text = 'UPPER PYTHON, lower python, Mixed Python'
re.findall('python', text, flags=re.IGNORECASE)

['PYTHON', 'python', 'Python']

In [73]:
# substitute python with snake
re.sub('python', 'snake', text, flags=re.IGNORECASE)

'UPPER snake, lower snake, Mixed snake'