# Quantifying Shakespeare
Analyzing the frequencies of different letters in Shakespeare's work. 

(Data from: bit.ly/shakespeare-txt)

In this Notebook, we will: 
1. Read all of Shakespeare's works into a list of strings
2. Count the frequencies of letters used
3. Visualize the frequencies with a bar graph 

## Reading Lines 

In [None]:
from io import TextIOWrapper

def read_lines(filename: str) -> list[str]:
    """Read a text file into a list of strings for each line"""
    lines: list[str] = []
    file_handle: TextIOWrapper = open(filename, "r")
    for line in file_handle:
        # strip leading and tailing white spaces " a " -> "a"
        line = line.strip()
        # make every character lowercase
        line = line.lower()
        lines.append(line)
    file_handle.close()
    return lines

shakespeare_lines: list[str] = read_lines("../data/t8.shakespeare.txt")
print(len(shakespeare_lines))

In [None]:
def tally(counts: dict[str, int], key: str) -> None:
    """Muttates counts by increasing the value stored at key by 1"""
    if key in counts:
        counts[key] = counts[key] + 1
    else:
        counts[key] = 1

In [None]:
d: dict = {}
tally(d, "a")
#d = {"a": 1}
print(d)
tally(d, "a")
#d = {"a": 2}
print(d)
tally(d, "b")
#d = {"a": 2, "b": 1}
print(d)

In [None]:
def count_letters(lines: list[str]) -> dict[str, int]:
    """Count the frequencies of all letters in a list of strings"""
    counts: dict[str, int] = {}
    for line in lines: # line is a string
        for letter in line:
            if letter.isalpha():
                tally(counts, letter)
    return counts

shakes_letters: dict[str, int] = count_letters(shakespeare_lines)
print(shakes_letters)

## Sorting Dictionaries

In [None]:
# Make my dictionary a list
print(shakes_letters)
list_version_shakes: list[tuple[str, int]] = list(shakes_letters.items())
print(list_version_shakes)

# Sort a list
sorted_letters_list: list[tuple[str, int]] = sorted(list_version_shakes)
print(sorted_letters_list)
sorted_letters_dict = dict(sorted_letters_list)
print(sorted_letters_dict)

In [None]:
from matplotlib import pyplot

pyplot.title("Letters Frequency")
pyplot.xlabel("Letters")
pyplot.ylabel("Frequency")
labels: list[str] = list(sorted_letters_dict.keys())
values: list[int] = list(sorted_letters_dict.values())
pyplot.bar(labels, values)