# **Tokenization**

In [1]:
import re

text = "This is a sample sentence. Tokenization is the first step in NLP!"
tokens = re.findall(r'\w+', text.lower())
print(tokens)

['this', 'is', 'a', 'sample', 'sentence', 'tokenization', 'is', 'the', 'first', 'step', 'in', 'nlp']


# **SINGULAR TO PLURAL CONVERSION**

In [2]:
import re

def pluralize(word):
    # List of irregular noun forms and their plural counterparts
    irregulars = {
        "child": "children",
        "man": "men",
        "woman": "women",
        "tooth": "teeth",
        "foot": "feet",
        "mouse": "mice",
        "ox": "oxen"
        # Add more irregular forms as needed
    }

    if word in irregulars:
        return irregulars[word]

    rules = [
        (r's$', 's'),            # plural already ends in s
        (r'(ax|test)is$', 'es'), # singular words ending in ax or test
        (r'(octop|vir)us$', 'i'),# singular words ending in us
        (r'(?i)(quiz)$', 'zes'), # singular words ending in quiz (case insensitive)
        (r'([m|l])ouse$', 'ice'),# singular words ending in mouse or louse
        (r'(matr|vert|ind)ix|ex$', 'ices'), # matrices, vertices, indices, reflexes
        (r'(x|ch|ss|sh)$', 'es'), # singular words ending in x, ch, ss, sh
        (r'([^aeiouy]|qu)y$', 'ies'), # singular words ending in consonant + y
        (r'(hive)$', 's'),       # irregular endings
        (r'([^f])fe$', 'ives'),   # singular words ending in (not f)fe
        (r'f$', 'ves'),          # singular words ending in f
        # (r'(.+)fe$', r'\1ves'),  # singular words ending in (any)fe
        (r'(buffal|tomat|potat)o$', r'\1oes'), # buffalo, tomato
        (r'([ti])um$', r'\1a'),  # singular words ending in (t, i)um
        (r'(p)erson$', r'\1eople'), # person
        (r'(m)an$', r'\1en'),    # man
    ]

    for pattern, replacement in rules:
        if re.search(pattern, word):
            return re.sub(pattern, replacement, word)

    return word + 's'

# Get user input
input_word = input("Enter a singular word: ")
plural_word = pluralize(input_word)

print(f"Singular: {input_word} => Plural: {plural_word}")


Enter a singular word: leaf
Singular: leaf => Plural: leaves


# **Case Conversion and Stemming**

In [3]:
import re
from nltk.stem import PorterStemmer

def switch_case(text):
    # Function to switch the case of each word in a text
    words = re.findall(r'\b\w+\b', text)
    switched_text = ' '.join([word.lower() if word.isupper() else word.upper() for word in words])
    return switched_text

def stemming(text):
    # Function to perform stemming on a text
    ps = PorterStemmer()
    stemmed_words = [ps.stem(word) for word in re.findall(r'\b\w+\b', text)]
    stemmed_text = ' '.join(stemmed_words)
    return stemmed_text

# Example Text
input_text = "Natural Language Processing is a fascinating field."

# Switching Case
switched_text = switch_case(input_text)
print("Switched Case Text:", switched_text)

# Performing Stemming
stemmed_text = stemming(input_text)
print("Stemmed Text:", stemmed_text)

Switched Case Text: NATURAL LANGUAGE PROCESSING IS A FASCINATING FIELD
Stemmed Text: natur languag process is a fascin field
