# Regex

    reference : https://wikidocs.net/1669

### Re module

In [1]:
import re

> re.match

    문자열과 처음부터 정규식과 매치되는지 조사
    -> 한번만 찾음

> re.search

    문자열 전체를 검색해 정규식과 매치되는지 조사

> re.findall

    정규식과 매치되는 모든 문자열을 list 형태로 return

> re.finditer

    정규식과 매치되는 모든 문자열을 iterator 형태로 return

#### Usage
pattern = re.compile(패턴)
matched = pattern.search(문자열)

> matched.group
    
    매치된 문자열 return

> start
    
    매치된 문자열의 first index

> end
    
    매치된 문자열의 last index

> span
    
    (first index, last index)

### Meta characters

    . ^ $ * + ? { } [ ] \ | ( )

In [2]:
def util(p, m):
    if m == None:
        print("Nothing matched")
    else:
        print(f"Pattern : {p.pattern}")
        print(f"Original String : {m.string}")
        print(f"Matched : {m.group()}")
        print(f"Start Index : {m.start()}")
        print(f"End Index : {m.end()}")

> Dot(.)

    개행문자(\n)을 제외한 모든 문자열과 매치

In [3]:
pattern = re.compile('k.r')
matched = pattern.search("karma")

util(pattern, matched)

Pattern : k.r
Original String : karma
Matched : kar
Start Index : 0
End Index : 3


In [4]:
pattern = re.compile('k.a')
matched = pattern.search("karma")

util(pattern, matched)

Nothing matched


> character class []

    대괄호 내부의 모든 문자열과 매치

In [5]:
pattern = re.compile('k[a,b,c,d]r')
matched = pattern.search("karma")

util(pattern, matched)

Pattern : k[a,b,c,d]r
Original String : karma
Matched : kar
Start Index : 0
End Index : 3


-을 사용해서 a-z, A-Z, 0-9와 같은 식으로 여러 문자를 표현할 수 있음

In [6]:
pattern = re.compile('k[a-z, A-Z, 0-9]r')
matched = pattern.search("karma")

util(pattern, matched)

Pattern : k[a-z, A-Z, 0-9]r
Original String : karma
Matched : kar
Start Index : 0
End Index : 3


In [7]:
pattern = re.compile('k[b,c,d]r')
matched = pattern.search("karma")

util(pattern, matched)

Nothing matched


> [.]

    pattern : a[.]b 
        acb ---> match
        a0b ---> not match

In [8]:
pattern = re.compile('a[.]')
matched = pattern.search("karma1221")

util(pattern, matched)

Nothing matched


> 반복

    + : 1번이상 반복되는 문자열과 매치됨
    * : 0번이상 반복되는 문자열과 매치됨
    {m, n} : m번이상 n번 이하 반복되는 문자열과 매치됨

In [9]:
pattern = re.compile('ab+c')
matched = pattern.search("aaabbbccc")

util(pattern, matched)

Pattern : ab+c
Original String : aaabbbccc
Matched : abbbc
Start Index : 2
End Index : 7


In [10]:
pattern = re.compile('ab*c')
matched = pattern.search("aaabbbccc")

util(pattern, matched)

Pattern : ab*c
Original String : aaabbbccc
Matched : abbbc
Start Index : 2
End Index : 7


In [11]:
pattern = re.compile('ab{1,3}c')
matched = pattern.search("aaabbbccc")

util(pattern, matched)

Pattern : ab{1,3}c
Original String : aaabbbccc
Matched : abbbc
Start Index : 2
End Index : 7


In [12]:
pattern = re.compile('ab{4,5}c')
matched = pattern.search("aaabbbccc")

util(pattern, matched)

Nothing matched


> ?

    {0, 1}과 동일한 의미 (0번이상 1번 이하 반복)

In [13]:
pattern = re.compile('b?c')
matched = pattern.search("aaabbbccc")

util(pattern, matched)

Pattern : b?c
Original String : aaabbbccc
Matched : bc
Start Index : 5
End Index : 7


In [14]:
pattern = re.compile('a?c')
matched = pattern.search("aaabbbccc")

util(pattern, matched)

Pattern : a?c
Original String : aaabbbccc
Matched : c
Start Index : 6
End Index : 7


> or(|)

In [15]:
pattern = re.compile('karma|veiga')
matched = pattern.search("My Favorite Champion : veiga")

util(pattern, matched)

Pattern : karma|veiga
Original String : My Favorite Champion : veiga
Matched : veiga
Start Index : 23
End Index : 28


^

    처음부터 일치하는 경우

In [16]:
pattern = re.compile('^010')
matched = pattern.search("010-1234-5678")

util(pattern, matched)

Pattern : ^010
Original String : 010-1234-5678
Matched : 010
Start Index : 0
End Index : 3


In [17]:
pattern = re.compile('^1234')
matched = pattern.search("010-1234-5678")

util(pattern, matched)

Nothing matched


> $

    마지막부터 매치

In [18]:
pattern = re.compile('5678$')
matched = pattern.search("010-1234-5678")

util(pattern, matched)

Pattern : 5678$
Original String : 010-1234-5678
Matched : 5678
Start Index : 9
End Index : 13


> Word Boundary(\b)

In [19]:
pattern = re.compile(r'\bName\b')
matched = pattern.search("My Name Is Karma")

util(pattern, matched)

Pattern : \bName\b
Original String : My Name Is Karma
Matched : Name
Start Index : 3
End Index : 7


In [20]:
pattern = re.compile(r'\bName\b')
matched = pattern.search("MyNameIs Karma")

util(pattern, matched)

Nothing matched


> Grouping()

In [21]:
pattern = re.compile('(1234)')
matched = pattern.search("010-1234-1234")

util(pattern, matched)

Pattern : (1234)
Original String : 010-1234-1234
Matched : 1234
Start Index : 4
End Index : 8
