In [1]:
import re
import requests
from bs4 import BeautifulSoup
from wordcloud import WordCloud
import matplotlib.pyplot as plt

def clean_text(text):
    """
    Cleans the given text by removing special characters and converting it to lowercase.

    Args:
        text (str): The text to be cleaned.

    Returns:
        str: The cleaned text.
    """
    # Remove special characters using a regular expression
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    # Convert text to lowercase
    text = text.lower()
    return text

def extract_emails(text):
    """
    Extracts all email addresses from the given text using regular expressions.

    Args:
        text (str): The text to extract email addresses from.

    Returns:
        list: A list of extracted email addresses.
    """
    # Regular expression to match email addresses
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    return re.findall(email_pattern, text)

def fetch_webpage_title(url):
    """
    Fetches and prints the title of a webpage.

    Args:
        url (str): The URL of the webpage.

    Returns:
        str: The title of the webpage.
    """
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        title = soup.title.string if soup.title else 'No title found'
        return title
    except requests.exceptions.RequestException as e:
        return f"Error fetching the webpage: {e}"

def generate_wordcloud(text, output_filename):
    """
    Generates a WordCloud from the given text and saves it as an image.

    Args:
        text (str): The text to generate the WordCloud from.
        output_filename (str): The filename to save the WordCloud image.
    """
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    wordcloud.to_file(output_filename)
    print(f"WordCloud saved as {output_filename}")

# Test the functions
test_input_clean = 'Hello, World! Welcome to NLP 101.'
cleaned_text = clean_text(test_input_clean)
print(f"Original: {test_input_clean}")
print(f"Cleaned: {cleaned_text}")

# Test email extraction
test_input_emails = 'Contact us at support@example.com and sales@example.org.'
extracted_emails = extract_emails(test_input_emails)
print(f"Original: {test_input_emails}")
print(f"Extracted Emails: {extracted_emails}")

# Test webpage title fetching
test_url = 'https://example.com'
webpage_title = fetch_webpage_title(test_url)
print(f"URL: {test_url}")
print(f"Webpage Title: {webpage_title}")

# Generate WordCloud
test_wordcloud_text = 'data science machine learning artificial intelligence'
generate_wordcloud(test_wordcloud_text, 'wordcloud.png')


Original: Hello, World! Welcome to NLP 101.
Cleaned: hello world welcome to nlp 101
Original: Contact us at support@example.com and sales@example.org.
Extracted Emails: ['support@example.com', 'sales@example.org']
URL: https://example.com
Webpage Title: Example Domain
WordCloud saved as wordcloud.png
