In [1]:
import re

In [2]:
def show_all_matches(regexes, subject, re_length=6):
    print('Sentence:')
    print()
    print('    {}'.format(subject))
    print()
    print(' regexp{} | matches'.format(' ' * (re_length - 6)))
    print(' ------{} | -------'.format(' ' * (re_length - 6)))
    for regexp in regexes:
        fmt = ' {:<%d} | {!r}' % re_length
        matches = re.findall(regexp, subject)
        if len(matches) > 8:
            matches = matches[:8] + ['...']
        print(fmt.format(regexp, matches))


In [3]:
sentence = 'Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.'

show_all_matches([
    r'a',
    r'm',
    r'M',
    r'Mary',
    r'little',
    r'1',
    r'10',
    r'22'
], sentence)


Sentence:

    Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regexp | matches
 ------ | -------
 a      | ['a', 'a', 'a', 'a', 'a']
 m      | ['m', 'm']
 M      | ['M']
 Mary   | ['Mary']
 little | ['little', 'little']
 1      | ['1', '1', '1']
 10     | ['10']
 22     | ['22']


In [14]:
show_all_matches([
    r'\bo\w+', # any word that starts with an 'o'
    r'^\s', # starts with a space
    r'^M', # starts with 'M'
    r'\w+\.\b', # ends with a period
], sentence)


Sentence:

    Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regexp | matches
 ------ | -------
 \bo\w+ | ['one']
 ^\s    | []
 ^M     | ['M']
 \w+\.\b | []


In [34]:
re.findall(r'\w+\.', sentence)

['lamb.', 'lamb.', 'one.']

# Exercises

Using the repo setup directions, setup a new local and remote repository named natural-language-processing-exercises. The local version of your repo should live inside of ~/codeup-data-science. This repo should be named natural-language-processing-exercises

Save this work in your natural-language-processing-exercises repo. Then add, commit, and push your changes.

Unless a specific file extension is specified, you may do your work either in a python script (.py) or a jupyter notebook (.ipynb).

Do your work for this exercise in a file named regex_exercises.

### 1. Write a function named is_vowel. It should accept a string as input and use a regular expression to determine if the passed string is a vowel. While not explicity mentioned in the lesson, you can treat the result of re.search as a boolean value that indicates whether or not the regular expression matches the given string.

In [92]:
def is_vowel(character):
    regexp = "[aeiou]"
    
    return re.search(regexp, character) != None

In [93]:
is_vowel("a")

True

### 2. Write a function named is_valid_username that accepts a string as input. A valid username starts with a lowercase letter, and only consists of lowercase letters, numbers, or the _ character. It should also be no longer than 32 characters. The function should return either True or False depending on whether the passed string is a valid username.
```
>>> is_valid_username('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
False
>>> is_valid_username('codeup')
True
>>> is_valid_username('Codeup')
False
>>> is_valid_username('codeup123')
True
>>> is_valid_username('1codeup')
False
```

In [260]:
def is_valid_username(word):
    regexp = "^(?!(^-|-$|.{32,}))[a-z]\w+[^_]"
    return re.search(regexp,word) != None

In [261]:
is_valid_username('1codeup')

False

In [262]:
is_valid_username('1_codeup')

False

In [263]:
is_valid_username('cod_eup')

True

### 3. Write a regular expression to capture phone numbers. It should match all of the following:
```
(210) 867 5309
+1 210.867.5309
867-5309
210-867-5309
```

In [234]:
word = '''(210) 867 5309
+1 210.867.5309
867-5309
210-867-5309'''
regexp = "\+{1}?(?:[ ()-\.]*\d){10,11}|(?:[ ()-\.]*\d){10}|(?:[ ()-\.]*\d){7}"

results = re.findall(regexp,word)
for i in results:
    print(i)

(210) 867 5309
+1 210.867.5309
867-5309
210-867-5309


### 4. Use regular expressions to convert the dates below to the standardized year-month-day format.
```
02/04/19
02/05/19
02/06/19
02/07/19
02/08/19
02/09/19
02/10/19
```

In [235]:
wrong_dates = '''
02/04/19
02/05/19
02/06/19
02/07/19
02/08/19
02/09/19
02/10/19
'''
regexp = "(\d+)/(\d+)/(\d+)"
month, day, year = re.search(regexp, wrong_dates).groups()
print(year)

19


### 5. Write a regex to extract the various parts of these logfile lines:
```
    GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58
    POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58
    GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58
```

In [236]:
log ='''
    GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58
    POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58
    GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58
'''
regexp = re.compile('''
(?P<day>\d{2})/
(?P<month>[A-Z]\w{2})/
(?P<year>\d{4})
''',re.VERBOSE)
[re.search(regexp, log).groupdict() for dates in log]

[{'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '2019'},
 {'day': '16', 'month': 'Apr', 'year': '

### Bonus Exercise

### You can find a list of words on your mac at /usr/share/dict/words. Use this file to answer the following questions:

```
- How many words have at least 3 vowels?
- How many words have at least 3 vowels in a row?
- How many words have at least 4 consonants in a row?
- How many words start and end with the same letter?
- How many words start and end with a vowel?
- How many words contain the same letter 3 times in a row?
- What other interesting patterns in words can you find?
```