
This module contains a code example related to<br>
Think Python, 2nd Edition<br>
by Allen Downey<br>
http://thinkpython2.com<br>
Copyright 2015 Allen Downey<br>
License: http://creativecommons.org/licenses/by/4.0/<br>


In [None]:
from __future__ import print_function, division

In [None]:
import sys

In [None]:
import matplotlib.pyplot as plt

In [None]:
from analyze_book1 import process_file

In [None]:
def rank_freq(hist):
    """Returns a list of (rank, freq) tuples.
    hist: map from word to frequency
    returns: list of (rank, freq) tuples
    """
    # sort the list of frequencies in decreasing order
    freqs = list(hist.values())
    freqs.sort(reverse=True)

    # enumerate the ranks and frequencies 
    rf = [(r+1, f) for r, f in enumerate(freqs)]
    return rf

In [None]:
def print_ranks(hist):
    """Prints the rank vs. frequency data.
    hist: map from word to frequency
    """
    for r, f in rank_freq(hist):
        print(r, f)

In [None]:
def plot_ranks(hist, scale='log'):
    """Plots frequency vs. rank.
    hist: map from word to frequency
    scale: string 'linear' or 'log'
    """
    t = rank_freq(hist)
    rs, fs = zip(*t)
    plt.clf()
    plt.xscale(scale)
    plt.yscale(scale)
    plt.title('Zipf plot')
    plt.xlabel('rank')
    plt.ylabel('frequency')
    plt.plot(rs, fs, 'r-', linewidth=3)
    plt.show()

In [None]:
def main(script, filename='emma.txt', flag='plot'):
    hist = process_file(filename, skip_header=True)

    # either print the results or plot them
    if flag == 'print':
        print_ranks(hist)
    elif flag == 'plot':
        plot_ranks(hist)
    else:
        print('Usage: zipf.py filename [print|plot]')

In [None]:
if __name__ == '__main__':
    main(*sys.argv)