# Quantifying Shakespeare
Analyzing the frequencies of different letters in Shakespeare's work.

Data from: bit.ly/shakespeare-txt

In this Notebook, we will
1. Read all of Shakespeare's works into a list of strings.
2. Count the frequencies of letters used.
3. Visualize the frequencies with a bar graph.

In [1]:
from io import TextIOWrapper

def read_lines(filename: str) -> list[str]:
    """Read a .txt file into a list of strings for each line."""
    lines: list[str] = []
    file_handle: TextIOWrapper = open(filename, "r")
    for line in file_handle:
        # strip leading and trailing whitespace: "    a dog    "  -> "a dog"
        line = line.strip()
        # make everything lowercase
        line = line.lower
        lines.append(line)
    file_handle.close()
    return lines

shakes_lines: list[str] = read_lines("../data/t8.shakespeare.txt")
print(len(shakes_lines))


124456


In [2]:
def tally(counts: dict[str, int], key:str) -> None:
    """Counts the number of letters in a line."""
    if key in counts:
        counts[key] += 1
    else:
        counts[key] = 1

In [3]:
d: dict[str, int] = {}
tally(d, "a")
#d = {"a": 1}
print(d)
tally(d, "a")
tally(d, "b")
print(d)
#d = {"a": 2, "b": 1}

{'a': 1}
{'a': 2, 'b': 1}


In [4]:
def count_letters(lines: list[str]) -> dict[str, int]:
    """Loops through every character and tallys how may of each letter there is inside the text."""
    counts: dict[str, int] = {}
    for line in lines:
        for char in line:
            if char.isalpha():
                tally(counts, char)
    return counts

shakes_letters: dict[str, int] = count_letters(shakes_lines)
print(shakes_letters)

TypeError: 'builtin_function_or_method' object is not iterable

# Sorting in Lists and Dictionaries


In [None]:
# Get list of items of a dictionary
shakes_letters_list = list(shakes_letters.items())

shakes_letters_list = sorted(shakes_letters_list)

shakes_letters_sorted = dict(shakes_letters_list)
shakes_letters_sorted


# Visualize Data in Bar Graph

In [None]:
from matplotlib import pyplot

pyplot.title("Letter Frequencies")
pyplot.xlabel("Letters")
pyplot.ylabel("Frequency")
x_values: list[str] = shakes_letters_sorted.keys() #my letters
y_values: list[int] = shakes_letters_sorted.values() # my letter counts
pyplot.bar(x_values, y_values)