In [3]:
import nltk
from nltk.corpus import brown

try:
    nltk.data.find('corpora/brown')
except LookupError:
    nltk.download('brown')

try:
    nltk.data.find('taggers/averaged_perceptron_tagger_eng')
except LookupError:
    nltk.download('averaged_perceptron_tagger_eng')



def pos_tag_brown(text=None):
    """
    Performs Part-of-Speech (POS) tagging on text from the Brown Corpus.

    Args:
        text (str, optional): The input text. If None, uses a random sentence from Brown.

    Returns:
        list: A list of (word, tag) tuples.
        or None if there is an issue or the text is empty
    """

    if text is None:
        # Get a random sentence from the Brown Corpus
        sentences = brown.sents()
        if not sentences:  # Check if sentences is empty
            print("Brown corpus is empty or not loaded correctly.")
            return None
        
        text = " ".join(sentences[0]) # Join the first sentence for demonstration.  You can change this.


    if not text:  # Check if text is empty
        print("Input text is empty.")
        return None

    tokens = nltk.word_tokenize(text)  # Tokenize the text
    tagged_tokens = nltk.pos_tag(tokens)  # Perform POS tagging

    return tagged_tokens


# Example usage:
tagged_text = pos_tag_brown()  # Using a random sentence from Brown
if tagged_text:
    print("Tagged Text (using random sentence from Brown):")
    print(tagged_text)

# Or use your own text:
my_text = "The quick brown fox jumps over the lazy dog."
tagged_my_text = pos_tag_brown(my_text)
if tagged_my_text:
    print("\nTagged My Text:")
    print(tagged_my_text)


# Demonstrating using a specific section of the brown corpus.
# You can specify which fileids to use.  See brown.fileids() for the options.
# Here's an example of using the 'ca01' fileid.

ca01_words = brown.words(fileids=['ca01'])
ca01_text = " ".join(ca01_words[:100]) # using first 100 words. Adjust as needed.
tagged_ca01 = pos_tag_brown(ca01_text)
if tagged_ca01:
    print("\nTagged 'ca01' section:")
    print(tagged_ca01)


# Example of iterating through the tagged words and tags:
if tagged_my_text:
    for word, tag in tagged_my_text:
        print(f"Word: {word}, Tag: {tag}")

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /Users/robbieardison/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


Tagged Text (using random sentence from Brown):
[('The', 'DT'), ('Fulton', 'NNP'), ('County', 'NNP'), ('Grand', 'NNP'), ('Jury', 'NNP'), ('said', 'VBD'), ('Friday', 'NNP'), ('an', 'DT'), ('investigation', 'NN'), ('of', 'IN'), ('Atlanta', 'NNP'), ("'s", 'POS'), ('recent', 'JJ'), ('primary', 'JJ'), ('election', 'NN'), ('produced', 'VBD'), ('``', '``'), ('no', 'DT'), ('evidence', 'NN'), ('``', '``'), ('that', 'IN'), ('any', 'DT'), ('irregularities', 'NNS'), ('took', 'VBD'), ('place', 'NN'), ('.', '.')]

Tagged My Text:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]

Tagged 'ca01' section:
[('The', 'DT'), ('Fulton', 'NNP'), ('County', 'NNP'), ('Grand', 'NNP'), ('Jury', 'NNP'), ('said', 'VBD'), ('Friday', 'NNP'), ('an', 'DT'), ('investigation', 'NN'), ('of', 'IN'), ('Atlanta', 'NNP'), ("'s", 'POS'), ('recent', 'JJ'), ('primary', 'JJ'), ('election', 'NN'), ('produced', 'VBD'), ('``',