# Regular Expressions Exercises


## Imports

In [1]:
import re

import pandas as pd

## 1. Write a function named `is_vowel`. It should accept a string as input and use a regular expression to determine if the passed string is a vowel. While not explicity mentioned in the lesson, you can treat the result of `re.search` as a boolean value that indicates whether or not the regular expression matches the given string.

In [2]:
vowel_re = r'^[AEIOUaeiou]{1}$'

print(bool(re.search(vowel_re, 'A')))
print(bool(re.search(vowel_re, 'B')))
print(bool(re.search(vowel_re, 'Hello')))

True
False
False


* ^ asserts position at start of a line
* Match a single character present in the list [AEIOUaeiou] 
* {1} matches the previous token exactly one time
* $ asserts position at the end of a line

In [3]:
def is_vowel(string):
    vowel_re = r'^[AEIOUaeiou]$'
    
    return bool(re.search(vowel_re, string))

In [4]:
is_vowel('A')

True

In [5]:
is_vowel('B')

False

In [6]:
is_vowel('Codeup')

False

In [7]:
is_vowel('apple')

False

## 2. Write a function named `is_valid_username` that accepts a string as input. A valid username starts with a lowercase letter, and only consists of lowercase letters, numbers, or the `_` character. It should also be no longer than 32 characters. The function should return either `True` or `False` depending on whether the passed string is a valid username.

```python
>>> is_valid_username('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
False
>>> is_valid_username('codeup')
True
>>> is_valid_username('Codeup')
False
>>> is_valid_username('codeup123')
True
>>> is_valid_username('1codeup')
False
```

In [8]:
def is_valid_username(string):
    username_re = '^[a-z][a-z0-9_]{,31}$'
    
    return bool(re.search(username_re, string))
                
assert is_valid_username('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') == False
assert is_valid_username('codeup') == True
assert is_valid_username('Codeup') == False
assert is_valid_username('codeup123') == True
assert is_valid_username('1codeup') == False
assert is_valid_username('code_up') == True

## 3. Write a regular expression to capture phone numbers. It should match all of the following:

```
(210) 867 5309
+1 210.867.5309
867-5309
210-867-5309
```

In [9]:
def is_phone_number(string):
    phone_number_re = "(\+?\d+)?.?(\(?\d{3}\)?)?.?\d{3}.?\d{4}"
    
    return bool(re.search(phone_number_re, string))

assert is_phone_number('(210) 867 5309') == True
assert is_phone_number('+1 210.867.5309') == True
assert is_phone_number('867-5309') == True
assert is_phone_number('210-867-5309') == True

## 4. Use regular expressions to convert the dates below to the standardized year-month-day format.

```
02/04/19
02/05/19
02/06/19
02/07/19
02/08/19
02/09/19
02/10/19
```

In [10]:
date_list = ["02/04/19",
             "02/05/19",
             "02/06/19",
             "02/07/19",
             "02/08/19",
             "02/09/19",
             "02/10/19",
            ]

date_list

['02/04/19',
 '02/05/19',
 '02/06/19',
 '02/07/19',
 '02/08/19',
 '02/09/19',
 '02/10/19']

In [11]:
dates = pd.Series(date_list)
dates

0    02/04/19
1    02/05/19
2    02/06/19
3    02/07/19
4    02/08/19
5    02/09/19
6    02/10/19
dtype: object

In [12]:
dates.str.replace(r'(\d{2})/(\d{2})/(\d{2})', r'20\3-\1-\2', regex=True)

0    2019-02-04
1    2019-02-05
2    2019-02-06
3    2019-02-07
4    2019-02-08
5    2019-02-09
6    2019-02-10
dtype: object

## 5. Write a regex to extract the various parts of these logfile lines:

```
GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58

POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58

GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58
```

In [13]:
logfile_re = r'''
^(?P<method>GET|POST)
\s+
(?P<path>.*?)
\s+
\[(?P<timestamp>.*?)\]
\s+
(?P<http_version>.*?)
\s+
\{(?P<status>\d+)\}
\s+
(?P<bytes>\d+)
\s+
"(?P<user_agent>.*)"
\s+
(?P<ip>.*)$
'''

lines = pd.Series([
    'GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58',
    'POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58',
    'GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58',
])
lines.str.extract(logfile_re, re.VERBOSE)

Unnamed: 0,method,path,timestamp,http_version,status,bytes,user_agent,ip
0,GET,/api/v1/sales?page=86,16/Apr/2019:193452+0000,HTTP/1.1,200,510348,python-requests/2.21.0,97.105.19.58
1,POST,/users_accounts/file-upload,16/Apr/2019:193452+0000,HTTP/1.1,201,42,User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; ...,97.105.19.58
2,GET,/api/v1/items?page=3,16/Apr/2019:193453+0000,HTTP/1.1,429,3561,python-requests/2.21.0,97.105.19.58
