In [1]:
import re

def clean_text(text):
    """
    Cleans the given text by removing special characters and converting it to lowercase.

    Args:
        text (str): The text to be cleaned.

    Returns:
        str: The cleaned text.
    """
    # Remove special characters using a regular expression
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    # Convert text to lowercase
    text = text.lower()
    return text

def extract_emails(text):
    """
    Extracts all email addresses from the given text using regular expressions.

    Args:
        text (str): The text to extract email addresses from.

    Returns:
        list: A list of extracted email addresses.
    """
    # Regular expression to match email addresses
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    return re.findall(email_pattern, text)

# Test the functions
test_input_clean = 'Hello, World! Welcome to NLP 101.'
cleaned_text = clean_text(test_input_clean)
print(f"Original: {test_input_clean}")
print(f"Cleaned: {cleaned_text}")

# Test email extraction
test_input_emails = 'Contact us at support@example.com and sales@example.org.'
extracted_emails = extract_emails(test_input_emails)
print(f"Original: {test_input_emails}")
print(f"Extracted Emails: {extracted_emails}")


Original: Hello, World! Welcome to NLP 101.
Cleaned: hello world welcome to nlp 101
Original: Contact us at support@example.com and sales@example.org.
Extracted Emails: ['support@example.com', 'sales@example.org']
