In [2]:
# Download necessary NLTK data
nltk.download('treebank')
nltk.download('punkt')

[nltk_data] Downloading package treebank to
[nltk_data]     C:\Users\Sahyadri\AppData\Roaming\nltk_data...
[nltk_data]   Package treebank is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Sahyadri\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [7]:
import nltk
from nltk.corpus import treebank
from nltk.tag import UnigramTagger, BigramTagger, RegexpTagger
from nltk.tokenize import word_tokenize

# Sample corpus - using treebank corpus
corpus = treebank.tagged_sents()[:300]

# Training data
train_data = corpus[:250]
test_data = corpus[250:]

# Define patterns for rule-based tagger
patterns = [
    (r'.*ing$', 'VBG'),    # gerunds
    (r'.*ed$', 'VBD'),     # past tense verbs
    (r'.*es$', 'VBZ'),     # 3rd person singular present verbs
    (r'.*ould$', 'MD'),    # modals
    (r'.*\'s$', 'POS'),    # possessive nouns
    (r'.*s$', 'NNS'),      # plural nouns
    (r'^-?[0-9]+(.[0-9]+)?$', 'CD'),  # cardinal numbers
    (r'.*', 'NN')          # nouns (default)
]

# Create a rule-based tagger
rule_based_tagger = RegexpTagger(patterns)

# Create a stochastic tagger using Unigram and Bigram taggers
unigram_tagger = UnigramTagger(train_data, backoff=rule_based_tagger)
bigram_tagger = BigramTagger(train_data, backoff=unigram_tagger)

# Function to tag a sentence using the taggers
def tag_sentence(sentence, tagger):
    tokens = word_tokenize(sentence)
    return tagger.tag(tokens)

# test sentences
sentences = [
    "He is reading a book",
    "She watched the movie yesterday",
    "Dogs are barking loudly"
]

# Tagging the test sentences
for sentence in sentences:
    rule_based_tags = tag_sentence(sentence, rule_based_tagger)
    stochastic_tags = tag_sentence(sentence, bigram_tagger)
    print(f"Sentence: {sentence}")
    print(f"Rule-Based Tags: {rule_based_tags}")
    print(f"Stochastic Tags: {stochastic_tags}")
    print()


Sentence: He is reading a book
Rule-Based Tags: [('He', 'NN'), ('is', 'NNS'), ('reading', 'VBG'), ('a', 'NN'), ('book', 'NN')]
Stochastic Tags: [('He', 'PRP'), ('is', 'VBZ'), ('reading', 'VBG'), ('a', 'DT'), ('book', 'NN')]

Sentence: She watched the movie yesterday
Rule-Based Tags: [('She', 'NN'), ('watched', 'VBD'), ('the', 'NN'), ('movie', 'NN'), ('yesterday', 'NN')]
Stochastic Tags: [('She', 'PRP'), ('watched', 'VBD'), ('the', 'DT'), ('movie', 'NN'), ('yesterday', 'NN')]

Sentence: Dogs are barking loudly
Rule-Based Tags: [('Dogs', 'NNS'), ('are', 'NN'), ('barking', 'VBG'), ('loudly', 'NN')]
Stochastic Tags: [('Dogs', 'NNS'), ('are', 'VBP'), ('barking', 'VBG'), ('loudly', 'NN')]

