-
Notifications
You must be signed in to change notification settings - Fork 35
/
tasks.py
41 lines (31 loc) · 1.03 KB
/
tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# tasks.py
import os
import json
import time
import uuid
import collections
from nltk.corpus import stopwords
COMMON_WORDS = set(stopwords.words('english'))
DATA_DIRECTORY = os.path.join(os.path.dirname(__file__), 'data')
OUTPUT_DIRECTORY = os.path.join(os.path.dirname(__file__), 'output')
def save_file(filename, data):
random_str = uuid.uuid4().hex
outfile = f'{filename}_{random_str}.txt'
with open(os.path.join(OUTPUT_DIRECTORY, outfile), 'w') as outfile:
outfile.write(data)
def get_word_counts(filename):
wordcount = collections.Counter()
# get counts
with open(os.path.join(DATA_DIRECTORY, filename), 'r') as f:
for line in f:
wordcount.update(line.split())
for word in set(COMMON_WORDS):
del wordcount[word]
# save file
save_file(filename, json.dumps(dict(wordcount.most_common(20))))
# simulate long-running task
time.sleep(2)
proc = os.getpid()
print(f'Processed {filename} with process id: {proc}')
if __name__ == '__main__':
get_word_counts()