-
Notifications
You must be signed in to change notification settings - Fork 61
/
doit3.py
64 lines (52 loc) · 2.6 KB
/
doit3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# examples taken from http://joshh.ug/SRPC/assignments/assignment3.html
# datafiles taken from http://joshh.ug/SRPC/assignments/datafiles3.zip
# for this one (but not #4) we need all_words.tsv
# with a few typo's fixed, as indicated below
# <mise-en-place>
# needed imports from 2 solution modules
# 1.
# def read_entire_wfile(wfile): Returns the counts and years for all words
# def read_wfile(word, year_range, wfile): Returns the counts and years for the word
# def read_total_counts(tfile): Returns the total number of words
import one_gram_reader
from one_gram_reader import *
# 2.
# def total_occurrences(word_data, word): Returns total occurrences of word
# def count_letters(word_data): Returns a list of length 26 corresponding to letter freqs
# def bar_plot_of_letter_frequencies(word_data) Plots frequencies of letters
### Task 1: Plotting letter frequencies
# def plot_aggregate_counts(word_data, words): Plots distribution of word frequencies, and annotates words
### Task 2: Plotting aggregate word counts
# def get_occurences_in_year(word_data, word, year): Gets number of occurrences of word during year specified
# def get_average_word_length(word_data, year): Gets the average length of all words from year specified
# def plot_average_word_length(word_data, year_range):Make a plot of average word length of year range specified
### Task 3: Plotting average word lengths vs. time
# def normalize_counts(years, counts, total): Returns the normalized count
# def plot_words(words, year_range, wfile, tfile): Plots the relative popularity of words over range specified
import one_gram_plotter
from one_gram_plotter import *
# </mise-en-place>
# example1
years, counts = read_wfile("request", [2005, 2007], "very_short.tsv")
print(years)
print(counts)
# example2
total_counts = read_total_counts("total_counts.csv")
print(total_counts[1507])
print(total_counts[1525])
# example3
## import one_gram_reader
years, counts = one_gram_reader.read_wfile("request", [2000, 2010], "very_short.tsv")
total = one_gram_reader.read_total_counts("total_counts.csv")
print(years)
first_observed_year = years[0]
print(first_observed_year)
print(counts[0])
print(total[first_observed_year])
normalized_counts = normalize_counts(years, counts, total)
print(normalized_counts[0]) #equal to 646179 / 14425183957
# example4
## SLOW:
## plot_words(["horse", "fish", "dog"], [1800, 2000], "all_words.tsv", "total_counts.csv")
# example5
plot_words(["horse", "fish", "pain", "joy"], [1800, 2000], "all_words.tsv", "total_counts.csv")