/
frequencies.py
60 lines (51 loc) · 2.01 KB
/
frequencies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
from util import map_colors_serial
import collections
import json
import logging
import pickle
log = logging.getLogger('frequencies')
log.setLevel(logging.DEBUG)
streamHandler = logging.StreamHandler()
log.addHandler(streamHandler)
COLORS_FILE = 'data/colors.txt'
FREQ_BY_PIXEL_COUNT_TXT = 'data/freq_by_pixel_count.txt'
FREQ_BY_PIXEL_COUNT_PKL = 'data/freq_by_pixel_count.pkl'
FREQ_BY_OCCURRENCE_TXT = 'data/freq_by_occurrence.txt'
FREQ_BY_OCCURRENCE_PKL = 'data/freq_by_occurrence.pkl'
def get_frequencies():
by_pixel_count = collections.Counter()
by_occurrence = collections.Counter()
def fn(url, colors):
for entry in colors:
color = tuple(entry[1])
by_pixel_count[color] += entry[0]
by_occurrence[color] += 1
log.debug('Indexed {}'.format(url))
map_colors_serial(fn)
return by_pixel_count, by_occurrence
def load_freq_by_pixel_count():
with open(FREQ_BY_PIXEL_COUNT_PKL, 'rb') as f:
return pickle.load(f)
def load_freq_by_occurrence():
with open(FREQ_BY_OCCURRENCE_PKL, 'rb') as f:
return pickle.load(f)
def write_to_files():
by_pixel_count, by_occurrence = get_frequencies()
sorted_keys = sorted(by_pixel_count.keys(), key=lambda x: by_pixel_count[x])
with open(FREQ_BY_PIXEL_COUNT_TXT, 'w') as f:
for key in sorted_keys:
f.write('{}\t{}\n'.format(key, by_pixel_count[key]))
log.debug('Wrote {}...'.format(FREQ_BY_PIXEL_COUNT_TXT))
with open(FREQ_BY_OCCURRENCE_TXT, 'w') as f:
for key in sorted_keys:
f.write('{}\t{}\n'.format(key, by_occurrence[key]))
log.debug('Wrote {}...'.format(FREQ_BY_OCCURRENCE_TXT))
with open(FREQ_BY_PIXEL_COUNT_PKL, 'wb') as f:
pickle.dump(by_pixel_count, f)
log.debug('Wrote {}...'.format(FREQ_BY_PIXEL_COUNT_PKL))
with open(FREQ_BY_OCCURRENCE_PKL, 'wb') as f:
pickle.dump(by_occurrence, f)
log.debug('Wrote {}...'.format(FREQ_BY_OCCURRENCE_PKL))
if __name__ == '__main__':
write_to_files()