In [1]:
import re

line = 'asdf fjdk; afed, fjek,asdf,   foo'
print(re.split(r'[;,\s]\s*', line))

['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']


In [2]:
from fnmatch import fnmatch, fnmatchcase

names = ['Dat1.csv', 'Dat2.csv', 'config.ini', 'foo.py']
print([name for name in names if fnmatch(name, 'Dat*.csv')])

addresses = [
    '5412 N CLARK ST',
    '1060 W ADDISON ST',
    '1039 W GRANVILLE AVE',
    '2122 N CLARK ST',
    '4802 N BROADWAY',
]

print([addr for addr in addresses if fnmatchcase(addr, '* ST')])
print([addr for addr in addresses if fnmatchcase(addr, '54[0-9][0-9] *CLARK*')])

['Dat1.csv', 'Dat2.csv']
['5412 N CLARK ST', '1060 W ADDISON ST', '2122 N CLARK ST']
['5412 N CLARK ST']


In [3]:
from calendar import month_abbr

text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
datepat = re.compile(r'\d+/\d+/\d+')
print(datepat.findall(text))
print(re.sub(r'(\d+)/(\d+)/(\d+)', r'\3-\1-\2', text))

def change_date(m):
    mon_name = month_abbr[int(m.group(1))]
    return f'{m.group(2)}, {mon_name}, {m.group(3)}'

datepat = re.compile(r'(\d+)/(\d+)/(\d+)')
print(datepat.sub(change_date, text))

['11/27/2012', '3/13/2013']
Today is 2012-11-27. PyCon starts 2013-3-13.
Today is 27, Nov, 2012. PyCon starts 13, Mar, 2013.


In [4]:
text = 'UPPER PYTHON, lower python, Mixed Python'
print(re.findall('python', text, flags=re.IGNORECASE))
print(re.sub('python', 'snake', text, flags=re.IGNORECASE))

def matchcase(word):
    def replace(m):
        text = m.group()
        if text.isupper():
            return word.upper()
        elif text.islower():
            return word.lower()
        elif text[0].isupper():
            return word.capitalize()
        else:
            return word
    return replace

print(re.sub('python', matchcase('snake'), text, flags=re.IGNORECASE))

['PYTHON', 'python', 'Python']
UPPER snake, lower snake, Mixed snake
UPPER SNAKE, lower snake, Mixed Snake


In [5]:
text1 = 'Computer says "no."'
text2 = 'Computer says "no." Phone says "yes."'

str_pat = re.compile(r'\"(.*)\"')
print(str_pat.findall(text1))
print(str_pat.findall(text2))

str_pat2 = re.compile(r'\"(.*?)\"')
print(str_pat2.findall(text2))

['no.']
['no." Phone says "yes.']
['no.', 'yes.']


In [10]:
text1 = '/* this is a comment */'
text2 = ''''/* this is a
 multiline comment */
'''
comment = re.compile(r'/\*(.*?)\*/')
print(comment.findall(text1))
print(comment.findall(text2))

comment2 = re.compile(r'/\*((?:.|\n)*?)\*/')
print(comment2.findall(text2))

comment3 = re.compile(r'/\*(.*?)\*/', re.DOTALL)
print(comment3.findall(text2))

[' this is a comment ']
[]
[' this is a\n multiline comment ']
[' this is a\n multiline comment ']


In [21]:
import unicodedata

a = 'pýtĥöñ is awesome\n'
b = unicodedata.normalize('NFD', a)
print(b.encode('ascii', 'ignore').decode('ascii'))

python is awesome

