# Regular Expressions

In [1]:
import re

In [2]:
example = 'Hello World'
print(re.findall('Hello', example))
print(re.findall('Hello|World', example))
print(re.findall('hello|world', example))

['Hello']
['Hello', 'World']
[]


In [3]:
example = 'AbcDeF12'

In [4]:
print('All chars: ', re.findall(r'[A-z]', example))
print('Upper-case chars: ', re.findall(r'[A-Z]', example))
print('Lower-case chars: ', re.findall(r'[a-z]', example))
print('Digits chars: ', re.findall(r'[0-9]', example))

All chars:  ['A', 'b', 'c', 'D', 'e', 'F']
Upper-case chars:  ['A', 'D', 'F']
Lower-case chars:  ['b', 'c', 'e']
Digits chars:  ['1', '2']


In [5]:
print('Number: ', re.findall(r'[0-9]+', example))
print('Number: ', re.findall(r'\d+', example))

Number:  ['12']
Number:  ['12']


In [6]:
example = 'Hello World'
print(re.findall(r'^He', example))
print(re.findall(r'ld$', example))
print(re.findall(r'[^ld]', example))

['He']
['ld']
['H', 'e', 'o', ' ', 'W', 'o', 'r']


In [7]:
example = 'abcabc'
print(re.findall(r'abc', example))
print(re.findall(r'abc{2}', example))
print(re.findall(r'(abc){2}', example))

['abc', 'abc']
[]
['abc']


In [8]:
print(re.findall(r'a.c', example))
print(re.findall(r'(abc)+', example))
print(re.findall(r'(abc)?', example))

['abc', 'abc']
['abc']
['abc', 'abc', '']


In [9]:
print(re.findall(r's?he|it', 'she he it they'))

['she', 'he', 'it', 'he']


## Tokenizer

In [10]:
example = 'Welcome to Bilgi University !'
tokens = example.split(' ')
print(tokens)

['Welcome', 'to', 'Bilgi', 'University', '!']


In [11]:
example = 'Welcome to Bilgi University!'
tokens = example.split(' ')
print(tokens)

['Welcome', 'to', 'Bilgi', 'University!']


In [12]:
formatted_example = example.replace('!', ' !')
tokens = formatted_example.split(' ')
print(tokens)

['Welcome', 'to', 'Bilgi', 'University', '!']


In [13]:
tokens = re.findall(r'\w+|\S+', example)
print(tokens)

['Welcome', 'to', 'Bilgi', 'University', '!']


In [14]:
tokens = re.findall(r'\w+', example)
print(tokens)

['Welcome', 'to', 'Bilgi', 'University']


In [15]:
tokens = re.findall(r'\S', example)
print(tokens)

['W', 'e', 'l', 'c', 'o', 'm', 'e', 't', 'o', 'B', 'i', 'l', 'g', 'i', 'U', 'n', 'i', 'v', 'e', 'r', 's', 'i', 't', 'y', '!']


## Email Checker

In [16]:
email = 'ozgur.ozdemir@gmail.com'
if re.search(r'@\w+.\w+', email):
    print('The email format is correct')
else:
    print('The email format is incorrect')

The email format is correct


In [17]:
email = 'ozgur_ozdemir@gmail.com'
if re.search(r'(\w+[.-_]?)+@\w+.\w+', email):
    print('The email format is correct')
else:
    print('The email format is incorrect')

The email format is correct


## Phone Checker

In [18]:
phone = '5551112233'
if re.search(r'\d{10}', phone):
    print('The phone format is correct')
else:
    print('The phone format is incorrect')

The phone format is correct


In [19]:
phone = '(555)1112233'
if re.search(r'\(\d{3}\)\d{7}', phone):
    print('The phone format is correct')
else:
    print('The phone format is incorrect')

The phone format is correct


## Date Checker

In [20]:
date = '10-10-1955'
if re.search(r'[0-9]+-[0-9]+-19(5[1-9]|[6-9][0-9])', date):
    print(f'The date is later than 1950')
else:
    print(f'The date is earlier than 1950')

The date is later than 1950
