In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag

# Sample text
text = "This is a sample sentence to demonstrate PoS tagging."

# Tokenize the text into words
words = word_tokenize(text)

# Perform PoS tagging
pos_tags = pos_tag(words)

# Display the PoS tags
print(pos_tags)


[('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('sample', 'JJ'), ('sentence', 'NN'), ('to', 'TO'), ('demonstrate', 'VB'), ('PoS', 'NNP'), ('tagging', 'NN'), ('.', '.')]


In [2]:
import nltk

def find_word_with_most_tags():
    # Load the Brown corpus
    nltk.download('brown')
    corpus = nltk.corpus.brown.tagged_words()

    # Create a dictionary to store the tags for each word
    word_tags = {}

    # Iterate through the tagged words and populate the dictionary
    for word, tag in corpus:
        if word not in word_tags:
            word_tags[word] = set()
        word_tags[word].add(tag)

    # Find the word with the most distinct tags
    max_tags_word = max(word_tags, key=lambda word: len(word_tags[word]))
    max_tags_count = len(word_tags[max_tags_word])
    max_tags = word_tags[max_tags_word]

    return max_tags_word, max_tags_count, max_tags

# Find the word with the most distinct tags
word, count, tags = find_word_with_most_tags()

# Print the result
print(f"The word '{word}' has the greatest number of distinct tags, which are: {tags}")
print(f"It has {count} distinct tags.")


[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!


The word 'that' has the greatest number of distinct tags, which are: {'WPS', 'QL', 'CS-HL', 'DT', 'NIL', 'WPO-NC', 'WPS-HL', 'WPO', 'CS', 'WPS-NC', 'DT-NC', 'CS-NC'}
It has 12 distinct tags.


In [7]:
import nltk
nltk.download('tagsets')


[nltk_data] Downloading package tagsets to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping help\tagsets.zip.


True

In [8]:
import nltk

def list_tags_in_decreasing_frequency():
    # Load the Brown corpus
    nltk.download('brown')
    corpus = nltk.corpus.brown.tagged_words()

    # Calculate frequency distribution of tags
    tag_freq_dist = nltk.FreqDist(tag for (word, tag) in corpus)

    # List tags in decreasing frequency
    tags_in_decreasing_frequency = tag_freq_dist.most_common()

    return tags_in_decreasing_frequency

# Get the list of tags in decreasing frequency
tags_in_decreasing_frequency = list_tags_in_decreasing_frequency()

# Print the 20 most frequent tags and their meanings
print("20 most frequent tags and their meanings:")
for tag, frequency in tags_in_decreasing_frequency[:20]:
    print(f"Tag: {tag}, Frequency: {frequency}")
    nltk.help.upenn_tagset(tag)
    print()


[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!


20 most frequent tags and their meanings:
Tag: NN, Frequency: 152470
NN: noun, common, singular or mass
    common-carrier cabbage knuckle-duster Casino afghan shed thermostat
    investment slide humour falloff slick wind hyena override subhumanity
    machinist ...

Tag: IN, Frequency: 120557
IN: preposition or conjunction, subordinating
    astride among uppon whether out inside pro despite on by throughout
    below within for towards near behind atop around if like until below
    next into if beside ...

Tag: AT, Frequency: 97959
No matching tags found.

Tag: JJ, Frequency: 64028
JJ: adjective or numeral, ordinal
    third ill-mannered pre-war regrettable oiled calamitous first separable
    ectoplasmic battery-powered participatory fourth still-to-be-named
    multilingual multi-disciplinary ...

Tag: ., Frequency: 60638
.: sentence terminator
    . ! ?

Tag: ,, Frequency: 58156
,: comma
    ,

Tag: NNS, Frequency: 55110
NNS: noun, common, plural
    undergraduates scotches bric

In [14]:
import nltk
import re

def is_valid_tag(tag):
    # Regular expression to match valid tags (uppercase letters and symbols)
    return re.match(r'^[A-Z$-]+$', tag) is not None

def find_nouns_following_tags():
    # Load the Brown corpus
    nltk.download('brown')
    corpus = nltk.corpus.brown.tagged_words()

    # Create a dictionary to store nouns following tags
    nouns_following_tags = {}

    # Iterate over the tagged words in the corpus
    for i in range(len(corpus) - 1):
        # Check if the current word is a noun
        if corpus[i][1].startswith('N'):
            # Get the tag of the following word
            following_tag = corpus[i+1][1]
            # Add the following tag to the dictionary if it's valid
            if is_valid_tag(following_tag):
                if following_tag not in nouns_following_tags:
                    nouns_following_tags[following_tag] = 1
                else:
                    nouns_following_tags[following_tag] += 1

    return nouns_following_tags

# Get the nouns following tags
nouns_following_tags = find_nouns_following_tags()

# Sort the dictionary by frequency
sorted_nouns_following_tags = {k: v for k, v in sorted(nouns_following_tags.items(), key=lambda item: item[1], reverse=True)}

# Print the results
print("Tags most commonly found after nouns and their frequencies:")
for tag, frequency in sorted_nouns_following_tags.items():
    try:
        meaning = nltk.help.upenn_tagset(tag)
        print(f"Tag: {tag}, Frequency: {frequency}, Meaning: {meaning}")
    except AttributeError:
        print(f"Tag: {tag}, Frequency: {frequency}, Meaning: Not Found")


[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!


Tags most commonly found after nouns and their frequencies:
IN: preposition or conjunction, subordinating
    astride among uppon whether out inside pro despite on by throughout
    below within for towards near behind atop around if like until below
    next into if beside ...
Tag: IN, Frequency: 60188, Meaning: None
NN: noun, common, singular or mass
    common-carrier cabbage knuckle-duster Casino afghan shed thermostat
    investment slide humour falloff slick wind hyena override subhumanity
    machinist ...
Tag: NN, Frequency: 16824, Meaning: None
CC: conjunction, coordinating
    & 'n and both but either et for less minus neither nor or plus so
    therefore times v. versus vs. whether yet
Tag: CC, Frequency: 16080, Meaning: None
No matching tags found.
Tag: NP, Frequency: 9661, Meaning: None
VBD: verb, past tense
    dipped pleaded swiped regummed soaked tidied convened halted registered
    cushioned exacted snubbed strode aimed adopted belied figgered
    speculated wore appr