In [1]:
import re

In [2]:
string = 'HELLO, There, How, Are, You...'

In [3]:
re.search("[A-Z]+",string)

<re.Match object; span=(0, 5), match='HELLO'>

In [4]:
re.findall("[A-Z]+",string)

['HELLO', 'T', 'H', 'A', 'Y']

In [7]:
re.findall("[A-Z]{2}",string)

['HE', 'LL']

In [8]:
re.findall("[A-Z]{2,}",string)

['HELLO']

In [9]:
re.findall("[A-Za-z\s,]",string)

['H',
 'E',
 'L',
 'L',
 'O',
 ',',
 ' ',
 'T',
 'h',
 'e',
 'r',
 'e',
 ',',
 ' ',
 'H',
 'o',
 'w',
 ',',
 ' ',
 'A',
 'r',
 'e',
 ',',
 ' ',
 'Y',
 'o',
 'u']

In [11]:
re.search("[A-Za-z\s,]",string).group()

'H'

In [12]:
re.search("[A-Za-z\s,]+",string).group()

'HELLO, There, How, Are, You'

In [15]:
re.findall("[A-Z?a-z\s,]+",string)

['HELLO, There, How, Are, You']

In [16]:
re.findall("[A-Z]?[a-z\s,]+",string)

['O, ', 'There, ', 'How, ', 'Are, ', 'You']

In [20]:
re.findall("[^A-Za-z,\s]+",string)

['...']

In [19]:
re.search("[^A-Za-z,\s]+",string).group()

'...'

In [21]:
re.findall("[^A-Z]+",string)

[', ', 'here, ', 'ow, ', 're, ', 'ou...']

# Groups

### groups allow us to pull out sections of a match and store them

In [22]:
string1 = 'John has 6 cats but I think my friend Susan has 3 dogs and Mike has 8 fishes'

In [25]:
re.findall("[A-Za-z]+ \w+ \d \w+",string1)

['John has 6 cats', 'Susan has 3 dogs', 'Mike has 8 fishes']

In [26]:
re.findall("([A-Za-z]+) \w+ \d \w+",string1)

['John', 'Susan', 'Mike']

In [27]:
re.findall("([A-Za-z]+) \w+ \d (\w+)",string1)

[('John', 'cats'), ('Susan', 'dogs'), ('Mike', 'fishes')]

In [29]:
info = re.findall("([A-Za-z]+) \w+ (\d) (\w+)",string1)
info

[('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]

In [30]:
zip(info)

<zip at 0x24de059b840>

In [31]:
zip(*info)

<zip at 0x24de05698c0>

In [32]:
list(zip(info))

[(('John', '6', 'cats'),),
 (('Susan', '3', 'dogs'),),
 (('Mike', '8', 'fishes'),)]

In [33]:
list(zip(*info))

[('John', 'Susan', 'Mike'), ('6', '3', '8'), ('cats', 'dogs', 'fishes')]

In [34]:
match = re.search("([A-Za-z]+) \w+ (\d) (\w+)",string1)
match

<re.Match object; span=(0, 15), match='John has 6 cats'>

In [35]:
match.group()

'John has 6 cats'

In [36]:
match.group(0)

'John has 6 cats'

In [37]:
match.groups()

('John', '6', 'cats')

In [38]:
match.group(1)

'John'

In [39]:
match.group(2)

'6'

In [40]:
match.group(3)

'cats'

In [42]:
match.group(1,3)

('John', 'cats')

In [43]:
match.span()

(0, 15)

In [44]:
match.span(1)

(0, 4)

In [45]:
match.span(2)

(9, 10)

In [46]:
match.span(3)

(11, 15)

In [47]:
match.start(3)

11

In [49]:
re.findall("([A-Za-z]+) \w+ (\d) (\w+)",string1)

[('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]

In [50]:
re.findall("([A-Za-z]+) \w+ (\d) (\w+)",string1).group()

AttributeError: 'list' object has no attribute 'group'

In [51]:
re.findall("([A-Za-z]+) \w+ (\d) (\w+)",string1)[0]

('John', '6', 'cats')

In [52]:
re.findall("([A-Za-z]+) \w+ (\d) (\w+)",string1)[0].group(1)

AttributeError: 'tuple' object has no attribute 'group'

In [53]:
re.findall("(([A-Za-z]+) \w+ (\d) (\w+))",string1)

[('John has 6 cats', 'John', '6', 'cats'),
 ('Susan has 3 dogs', 'Susan', '3', 'dogs'),
 ('Mike has 8 fishes', 'Mike', '8', 'fishes')]

In [54]:
data = re.findall("(([A-Za-z]+) \w+ (\d) (\w+))",string1)
data

[('John has 6 cats', 'John', '6', 'cats'),
 ('Susan has 3 dogs', 'Susan', '3', 'dogs'),
 ('Mike has 8 fishes', 'Mike', '8', 'fishes')]

In [55]:
for d in data:
    print(d[0])

John has 6 cats
Susan has 3 dogs
Mike has 8 fishes


In [80]:
it = re.finditer("([A-Za-z]+) \w+ (\d) (\w+)",string1)

In [65]:
next(it).group() #run 3 times

'Susan has 3 dogs'

In [72]:
next(it).groups() #run 3 times

('Mike', '8', 'fishes')

In [77]:
for element in it:
    print(element.group(1,3,2))

('John', 'cats', '6')
('Susan', 'dogs', '3')
('Mike', 'fishes', '8')


In [79]:
for element in it:
    print(element.group())

John has 6 cats
Susan has 3 dogs
Mike has 8 fishes


In [81]:
for element in it:
    print(element.groups())

('John', '6', 'cats')
('Susan', '3', 'dogs')
('Mike', '8', 'fishes')


# naming the groups

In [82]:
string2 = 'New York, New York 11369'

In [86]:
match1 = re.search("([A-Za-z\s]+), ([A-Za-z\s]+) (\d+)",string2)
match1

<re.Match object; span=(0, 24), match='New York, New York 11369'>

In [87]:
match1.group()

'New York, New York 11369'

In [88]:
match1.group(1)

'New York'

In [89]:
match1.group(2)

'New York'

In [90]:
match1.group(3)

'11369'

?P< >   #   to name a group-- group name inside the <>, followed by RE for group

(?P<City>)      (?P<State>)    (?P<ZipCode>)

In [92]:
re.search("(?P<City>[A-Za-z\s]+), (?P<State>[A-Za-z\s]+) (?P<Zipcode>\d+)",string2)

<re.Match object; span=(0, 24), match='New York, New York 11369'>

In [94]:
pattern = re.compile("(?P<City>[A-Za-z\s]+), (?P<State>[A-Za-z\s]+) (?P<Zipcode>\d+)")
pattern

re.compile(r'(?P<City>[A-Za-z\s]+), (?P<State>[A-Za-z\s]+) (?P<Zipcode>\d+)',
re.UNICODE)

In [95]:
match1 = re.search(pattern,string2)
match1

<re.Match object; span=(0, 24), match='New York, New York 11369'>

In [97]:
match1.group('City')

'New York'

In [99]:
match1.group("State")

'New York'

In [100]:
match1.group('Zipcode')

'11369'

In [102]:
match1.group(1)

'New York'

In [103]:
match1.group()

'New York, New York 11369'

In [104]:
match1.groups()

('New York', 'New York', '11369')

In [105]:
match1.groupdict()

{'City': 'New York', 'State': 'New York', 'Zipcode': '11369'}