# 7.2 태그를 남기지 않는 그룹

## 7.2.2 태깅 문제 고치기

In [1]:
import re

pat = r'\d{1,3}(?:,\d{3})*(?:\.\d*)?\b'
s = '12,000 monkeys on 100 typewriters for 53.12 days.'
lst = re.findall(pat, s)
for item in lst:
    print(item)

12,000
100
53.12


# 7.3 탐욕적 일치 vs 게으른 일치

In [2]:
import re

pat = r'<.*>'  # 탐욕적 일치
the_line = '<h1>This is an HTML heading.<\h1>'
m = re.match(pat, the_line)
print(m.group())

<h1>This is an HTML heading.<\h1>


In [3]:
pat = r'<.*?>' # 게으른 일치
the_line = '<h1>This is an HTML heading.<\h1>'
m = re.match(pat, the_line)
print(m.group())

<h1>


In [4]:
s = r'''<h1>This is the first heading<\h1>
<h1>This is the second heading<\h1>
<b>This is in bold<\b>
'''

In [5]:
pat = r'<.*?>'  # 게으른 일치
lst = re.findall(pat, s, flags=re.DOTALL)
print('There are', len(lst), 'tags.')

There are 6 tags.


In [6]:
pat = r'<.*>'  # 탐욕적 일치
lst = re.findall(pat, s, flags=re.DOTALL)
print('There are', len(lst), 'tags.')

There are 1 tags.


In [7]:
s = '''Here is a single sentence. Here is
another sentence, ending in a period. And
here is yet another.
'''

In [8]:
pat = r'.*?[.?!]'
lst = re.findall(pat, s, flags=re.DOTALL)
print('There are ', len(lst), 'sentences.')

There are  3 sentences.


# 7.4 전방탐색 기능

In [1]:
s = '''See the U.S.A. today. It's right here, not
a world away. Average temp. is 66.5.'''

In [4]:
import re

pat = r'[A-Z].*?[.!?](?= [A-Z]|$)'
m = re.findall(pat, s, flags=re.DOTALL | re.MULTILINE)

for i in m:
    print('->', i)

-> See the U.S.A. today.
-> It's right here, not
a world away.
-> Average temp. is 66.5.


In [7]:
pat = r'[A-Z].*?[.!?] [A-Z]|$'
m = re.findall(pat, s, flags=re.DOTALL)
for i in m:
    print('->', i)

-> See the U.S.A. today. I
-> 


# 7.5 다중 패턴 확인하기(전방탐색)

In [8]:
pat1 = r'(\w|[!@#$%^&*+-]){8,12}$'
pat2 = r'(?=.*[a-zA-Z])'
pat3 = r'(?=.*\d)'
pat4 = r'(?=.*[!@#$%^&*+-])'

pat = pat2 + pat3 + pat4 + pat1

In [9]:
import re

passwd = 'HenryThe5!'
if re.match(pat, passwd):
    print('It passed the test!')
else:
    print('Insufficiently strong password')

It passed the test!


# 7.6 부정적 전방탐색

In [1]:
import re
pat = r'abc(?!abc)'
s = 'The magic of abcabc.'
m = re.findall(pat, s)
print(m)

['abc']


In [2]:
pat = r'abc(?!abc)'
s = 'The magic of abcABC.'
m = re.findall(pat, s, flags=re.I)
print(m)

['ABC']


In [3]:
s = '''See the U.S.A. today. It's right here, not
a world away. Average temp. is 70.5.'''

In [4]:
pat = r'[A-Z].*?[.!?](?! [a-z0-9]|\w)'
m = re.findall(pat, s, flags=re.DOTALL)
for i in m:
    print('->', i)

-> See the U.S.A. today.
-> It's right here, not
a world away.
-> Average temp. is 70.5.


In [5]:
s = re.sub(r'\n', '', s)
pat = r'[A-Z].*?[.!?](?! [a-z0-9]|\w)'
m = re.findall(pat, s, flags=re.DOTALL)
for i in m:
    print('->', i)

-> See the U.S.A. today.
-> It's right here, nota world away.
-> Average temp. is 70.5.


# 7.7 명명 그룹

In [6]:
pat = r'(?P<first>\w+) (?P<last>\w+)'

In [7]:
import re
s = 'Jane Austen'
m = re.match(pat, s)

In [8]:
print('first name = ', m.group('first'))
print('last name = ', m.group('last'))

first name =  Jane
last name =  Austen


In [9]:
print(m.group('last') + ', ' + m.group('first'))

Austen, Jane


In [10]:
pat = r'(?P<first>\w+) (?P<mid>\w\. )?(?P<last>\w+)'

In [11]:
def recorg_name(in_s):
    m = re.match(pat, in_s)
    s = m.group('last') + ', ' + m.group('first')
    if m.group('mid'):
        s += ' ' + m.group('mid')
    return s

In [12]:
pat = r'(?P<word>\w+) (?P=word)'
m = re.search(pat, 'The the dog.', flags=re.I)

# 7.8 re.split 함수

In [15]:
pat = r', *| +'

In [16]:
import re
lst = re.split(pat, '3, 5 7 8, 10, 11')
print(lst)

['3', '5', '7', '8', '10', '11']


In [18]:
s = '3 2 * 2 15 * + 4 +'
toks = re.split(pat, s)
print(s)

3 2 * 2 15 * + 4 +
