In [1]:
import re

## Basic Syntax

- `.`: Matches any single character except newline
- `^`: Matches the start of the string
- `$`: Matches the end of the string
- `*`: Matches 0 or more repetitions of the preceding element
- `+`: Matches 1 or more repetitions of the preceding element
- `?`: Matches 0 or 1 repetition of the preceding element
- `{n}`: Matches exactly n repetitions of the preceding element
- `{n,}`: Matches at least n repetitions of the preceding element
- `{n,m}`: Matches between n and m repetitions of the preceding element
- `|`: Alternation, matches either the pattern before or the pattern after the symbol

## Character Classes

- `[abc]`: Matches any one of the characters a, b, or c
- `[^abc]`: Matches any character that is not a, b, or c
- `[a-z]`: Matches any character from a to z
- `[A-Z]`: Matches any character from A to Z
- `[0-9]`: Matches any digit
- `\d`: Matches any digit (equivalent to [0-9])
- `\D`: Matches any non-digit
- `\w`: Matches any word character (equivalent to [a-zA-Z0-9_])
- `\W`: Matches any non-word character
- `\s`: Matches any whitespace character
- `\S`: Matches any non-whitespace character

## Special Characters

- `\`: Escapes a special character
- `()` : Defines a group
- `(?:...)`: Non-capturing group
- `(?=...)`: Positive lookahead assertion
- `(?!...)`: Negative lookahead assertion

## Examples

- `abc`: Matches the string "abc"
- `abc|def`: Matches "abc" or "def"
- `^abc`: Matches any string that starts with "abc"
- `abc$`: Matches a string that ends with "abc"
- `a.b`: Matches any string containing "a", any character, then "b"
- `a*`: Matches 0 or more 'a's
- `a+`: Matches 1 or more 'a's
- `a?`: Matches 0 or 1 'a'
- `\d{2,4}`: Matches between 2 and 4 digits

In [2]:
s = '''
<a class="product-link" href="https://amazon.com/categories/ski">Ski</a>
<a class="product-link" href="https://amazon.com/p/1234567890/awesome-product-1">Coffee beans</a>
<a class="product-link" href="https://amazon.com/p/6454343333/ok-product-2">Backcountry Ski</a>
<a class="product-link" href="https://amazon.com/p/6543565454/great-product-1">Book</a>
<a href="https://amazon.com/about-us">About Us</a>
'''

In [4]:
# Extract only the product links
# Expected output:
# https://amazon.com/p/1234567890/awesome-product-1
# https://amazon.com/p/6454343333/ok-product-2
# https://amazon.com/p/6543565454/great-product-1

re.findall(r'<a class="product-link" href="https://amazon.com/p/1234567890/awesome-product-1">', s)

['<a class="product-link" href="https://amazon.com/p/1234567890/awesome-product-1">']

In [9]:
re.findall(r'<a href="https://amazon.com/p/\d+/.+">', s)

['<a href="https://amazon.com/p/1234567890/awesome-product-1">',
 '<a href="https://amazon.com/p/6454343333/ok-product-2">',
 '<a href="https://amazon.com/p/6543565454/great-product-1">']

In [10]:
re.findall(r'<a href="(https://amazon.com/p/\d+/.+)">', s)

['https://amazon.com/p/1234567890/awesome-product-1',
 'https://amazon.com/p/6454343333/ok-product-2',
 'https://amazon.com/p/6543565454/great-product-1']

In [8]:
b = '''<a href="https://amazon.com/p/1234567890/awesome-product-1"> <img></img></a>'''

re.findall(r'<a href="(https://amazon.com/p/6546543565454/.+?)">', s)


[]

# Practice Problems

### Problem 1: Email Extraction

**Problem**: Extract emails from a given string.  
**String**: "Contact us at support@example.com or sales@example.org"

In [9]:
import re

def extract_emails(text):
    pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    emails = re.findall(pattern, text)
    return emails

# Example usage:
input_string = "Contact us at support@example.com or sales@example.com and I can be reached at rgulla@uw.edu."
result = extract_emails(input_string)

print(result)

['support@example.com', 'sales@example.com', 'rgulla@uw.edu']


### Problem 2: Phone Number Validation

**Problem**: Validate and extract US phone numbers in the format xxx-xxx-xxxx.  
**String**: "My numbers are 123-456-7890 or 333-333-3333"

In [10]:
def extract_phone_numbers(text):
    pattern = r'\b\d{3}-\d{3}-\d{4}\b'
    phone_numbers = re.findall(pattern, text)
    return phone_numbers

# Example usage:
input_string = "My numbers are 123-456-7890 or 333-333-3333."
result = extract_phone_numbers(input_string)

print(result)

['123-456-7890', '333-333-3333']


### Problem 3: Password Strength Check

**Problem**: Check if a password is at least 8 characters long, contains a digit, an uppercase, and a lowercase letter.  
**String**: "Password1"

In [22]:
import re

def is_valid_password(password):
    # Check length
    if len(password) < 8:
        return False
    
    # Check for at least one digit, one uppercase, and one lowercase letter using regular expressions
    pattern = re.compile(r'^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{8,}$')
    return bool(re.match(pattern, password))

# Example usage:
password_to_check = "Password1"
result = is_valid_password(password_to_check)

print(result)

password_to_check = "password!"
result = is_valid_password(password_to_check)

print(result)

True
False


### Problem 4: Extracting Domain Name

**Problem**: Extract the domain name from an email address.  
**String**: "user@example.com"

In [14]:
def extract_domain(email):
    pattern = re.compile(r'@([a-zA-Z0-9.-]+)$')
    match = re.search(pattern, email)
    
    if match:
        return match.group(1)
    else:
        return None

# Example usage:
email_address = "user@example.com"
domain_name = extract_domain(email_address)

print(domain_name)

example.com


### Problem 5: Validating an IP Address

**Problem**: Check if a string is a valid IPv4 address.  
**String**: "192.168.1.1"

In [19]:
import re

def is_valid_ipv4(ip_str):
    # Define the regular expression pattern for IPv4
    ipv4_pattern = re.compile(r'^(\d{1,3}\.){3}\d{1,3}$')

    # Check if the string matches the pattern
    if ipv4_pattern.match(ip_str):
        # Further validate each octet
        octets = ip_str.split('.')
        for octet in octets:
            if not (0 <= int(octet) <= 255):
                return False
        return True
    else:
        return False

# Test the function
ip_address = "192.168.1.1"
result = is_valid_ipv4(ip_address)

if result:
    print(f"{ip_address} is a valid IPv4 address.")
else:
    print(f"{ip_address} is not a valid IPv4 address.")
ip_address = "192.168.1_1"
result = is_valid_ipv4(ip_address)

if result:
    print(f"{ip_address} is a valid IPv4 address.")
else:
    print(f"{ip_address} is not a valid IPv4 address.")
    
    

192.168.1.1 is a valid IPv4 address.
192.168.1_1 is not a valid IPv4 address.


In [21]:
import datetime
import pytz

def world_clock():
    time_zones = ['America/New_York', 'Europe/London', 'Asia/Tokyo', 'Australia/Sydney']

    for tz in time_zones:
        current_time = datetime.datetime.now(pytz.timezone(tz))
        print(f"{tz}: {current_time.strftime('%Y-%m-%d %H:%M:%S')}")

if __name__ == "__main__":
    world_clock()

America/New_York: 2024-01-30 18:58:34
Europe/London: 2024-01-30 23:58:34
Asia/Tokyo: 2024-01-31 08:58:34
Australia/Sydney: 2024-01-31 10:58:34
