<a href="https://colab.research.google.com/github/rahul0772/python-ml-ai-relearning/blob/main/Python%20Basics/day_50_word_analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ==========================================
# ðŸ§  ADVANCED INTERMEDIATE PYTHON PROBLEM
# WORD FREQUENCY ANALYZER
# ==========================================

# ---------------------------------------------------------
# STEP 1: DEFINE A FUNCTION
# ---------------------------------------------------------
# A function is like a MACHINE.
# You give it something â†’ it works â†’ it gives result back.

def word_frequency_analyzer(text):
    """
    This function takes a paragraph (text),
    cleans it,
    counts word frequencies,
    and returns a sorted list of words by frequency.
    """

    # -----------------------------------------------------
    # STEP 2: MAKE TEXT LOWERCASE
    # -----------------------------------------------------
    # Why?
    # Because "Apple" and "apple" should be counted same.
    # lower() converts ALL letters to small letters.
    text = text.lower()

    # -----------------------------------------------------
    # STEP 3: REMOVE PUNCTUATION
    # -----------------------------------------------------
    # Punctuation means symbols like:
    # . , ! ? ; :
    # We donâ€™t want them because:
    # "apple," and "apple" should be same word.

    cleaned_text = ""  # empty string to build clean text

    # Loop through every character in text
    for char in text:

        # isalpha() â†’ checks if character is a letter (a-z)
        # or space " "
        if char.isalpha() or char == " ":
            cleaned_text += char  # keep it

        # else:
        # we ignore punctuation completely

    # -----------------------------------------------------
    # STEP 4: SPLIT INTO WORDS
    # -----------------------------------------------------
    # split() breaks sentence into list of words
    words = cleaned_text.split()

    # Example:
    # "i love python"
    # becomes ["i", "love", "python"]

    # -----------------------------------------------------
    # STEP 5: CREATE EMPTY DICTIONARY
    # -----------------------------------------------------
    # Dictionary stores:
    # word â†’ number of times it appears

    word_count = {}

    # -----------------------------------------------------
    # STEP 6: COUNT WORD FREQUENCIES
    # -----------------------------------------------------
    # Loop through each word
    for word in words:

        # If word already exists in dictionary
        if word in word_count:
            word_count[word] += 1  # increase count

        else:
            word_count[word] = 1  # first time seeing word

    # Now dictionary looks like:
    # {"python": 3, "is": 2, "great": 1}

    # -----------------------------------------------------
    # STEP 7: SORT WORDS BY FREQUENCY
    # -----------------------------------------------------
    # sorted() sorts things
    # items() gives (key, value) pairs
    # key=lambda item: item[1]
    # means: sort by the COUNT (value)

    sorted_words = sorted(
        word_count.items(),
        key=lambda item: item[1],  # sort using frequency
        reverse=True               # biggest first
    )

    # -----------------------------------------------------
    # STEP 8: RETURN RESULT
    # -----------------------------------------------------
    return sorted_words


# ==========================================
# TEST OUR FUNCTION
# ==========================================

paragraph = """
Python is amazing. Python is powerful.
Python is easy to learn and powerful to use.
Learning Python makes coding powerful and fun.
"""

# Call our function
result = word_frequency_analyzer(paragraph)

# Print results
print("Word Frequencies (Highest First):")
print("-----------------------------------")

for word, count in result:
    print(word, "â†’", count)

# ==========================================
# EXTRA: FIND MOST COMMON WORD
# ==========================================

most_common_word = result[0]  # first element (highest frequency)

print("\nMost common word:")
print(most_common_word[0], "appears", most_common_word[1], "times")

Word Frequencies (Highest First):
-----------------------------------
python â†’ 3
is â†’ 3
to â†’ 2
and â†’ 2
powerful â†’ 2
amazing â†’ 1
powerfulpython â†’ 1
easy â†’ 1
learn â†’ 1
uselearning â†’ 1
makes â†’ 1
coding â†’ 1
fun â†’ 1

Most common word:
python appears 3 times
