# 1.1 Removing HTML Tags: Using RE

In [1]:
import re

def remove_html_tags(text):
    clean = re.compile('<.*?>')  # Regular expression to find HTML tags
    return re.sub(clean, '', text)

html_text = "<h1>This is a Heading</h1><p>This is a paragraph.</p>"
clean_text = remove_html_tags(html_text)

print(clean_text)


This is a HeadingThis is a paragraph.


#1.2 Removing HTML Tags: Using HTML parser like BeautifulSoup

In [2]:
from bs4 import BeautifulSoup

def remove_html_tags(text):
    soup = BeautifulSoup(text, "html.parser")
    return soup.get_text()

html_text = "<h1>This is a Heading</h1><p>This is a paragraph.</p>"
clean_text = remove_html_tags(html_text)

print(clean_text)


This is a HeadingThis is a paragraph.


#2.1 Removing Emojies: Using Regular Expressions (Unicode Ranges for Emojis)

In [3]:
import re

def remove_emojis(text):
    # Define a regex pattern that matches emoji characters
    emoji_pattern = re.compile(
        "["
        u"\U0001F600-\U0001F64F"  # Emoticons
        u"\U0001F300-\U0001F5FF"  # Miscellaneous Symbols and Pictographs
        u"\U0001F680-\U0001F6FF"  # Transport and Map Symbols
        u"\U0001F1E0-\U0001F1FF"  # Flags (iOS)
        u"\U00002702-\U000027B0"  # Dingbats
        u"\U000024C2-\U0001F251"  # Enclosed characters
        "]+", flags=re.UNICODE
    )
    return emoji_pattern.sub(r'', text)

# Example usage
text_with_emojis = "Hello 😀! How are you? 🚀💡"
clean_text = remove_emojis(text_with_emojis)

print(f"Original: {text_with_emojis}")
print(f"Without Emojis: {clean_text}")


Original: Hello 😀! How are you? 🚀💡
Without Emojis: Hello ! How are you? 


#2.2 Removing Emojies: Using emoji library (If you prefer third-party library)

In [None]:
!pip install emoji

In [5]:
import emoji

def remove_emojis(text):
    return emoji.replace_emoji(text, replace='')

# Example usage
text_with_emojis = "Hello 😀! How are you? 🚀💡"
clean_text = remove_emojis(text_with_emojis)

print(f"Original: {text_with_emojis}")
print(f"Without Emojis: {clean_text}")


Original: Hello 😀! How are you? 🚀💡
Without Emojis: Hello ! How are you? 


# 3.1 Spelling Check: Using TextBlob

In [None]:
!pip install textblob


In [7]:
from textblob import TextBlob

def correct_spelling(text):
    blob = TextBlob(text)
    corrected_text = blob.correct()
    return str(corrected_text)

# Example usage
text_with_typos = "I havv a speling misstake in this sentnce."
corrected_text = correct_spelling(text_with_typos)

print(f"Original: {text_with_typos}")
print(f"Corrected: {corrected_text}")


Original: I havv a speling misstake in this sentnce.
Corrected: I have a spelling mistake in this sentence.


# 3.2 Spelling Check: Using pyspellchecker

In [None]:
!pip install pyspellchecker

In [9]:
from spellchecker import SpellChecker

def correct_spelling(text):
    spell = SpellChecker()
    words = text.split()

    # Find misspelled words
    misspelled = spell.unknown(words)

    # Correct the misspelled words
    corrected_words = [spell.correction(word) if word in misspelled else word for word in words]

    return ' '.join(corrected_words)

# Example usage
text_with_typos = "I havv a speling misstake in this sentnce."
corrected_text = correct_spelling(text_with_typos)

print(f"Original: {text_with_typos}")
print(f"Corrected: {corrected_text}")


Original: I havv a speling misstake in this sentnce.
Corrected: I have a spelling mistake in this sentence
