## Count Words

Use the code below to count words in the markdown cells of a notebook or in an HTML document.  You can `!pip install nbformat` or `!pip install bs4` if necessary.

In [6]:
import nbformat
from bs4 import BeautifulSoup
import re

### Count Words in Markdown Cells in a Notebook

In [7]:
# Part 1: Count words in Markdown cells of a Jupyter Notebook
def count_markdown_words(notebook_path):
    with open(notebook_path, "r", encoding="utf-8") as f:
        nb = nbformat.read(f, as_version=4)
    
    word_counts = [len(cell["source"].split()) for cell in nb["cells"] if cell["cell_type"] == "markdown"]
    total_words = sum(word_counts)
    
    print(f"Total words in Markdown cells: {total_words}")
    return total_words


In [8]:
# Count words in Jupyter Notebook markdown cells
count_markdown_words('./Homework_09_Report.ipynb')

Total words in Markdown cells: 32


32

### Count Visible Words in an HTML File

In [10]:
def count_visible_words(html_path):
    with open(html_path, "r", encoding="utf-8") as f:
        soup = BeautifulSoup(f, "html.parser")
    
    # Remove script and style elements
    for script in soup(["script", "style"]):
        script.decompose()
    
    # Get visible text
    visible_text = soup.get_text(separator=" ")
    visible_text = re.sub(r'\s+', ' ', visible_text).strip()
    
    word_count = len(visible_text.split())
    print(f"Total visible words in HTML: {word_count}")
    return word_count

In [11]:
count_visible_words('./Homework_09_Report.html')


Total visible words in HTML: 33


33