-
Notifications
You must be signed in to change notification settings - Fork 0
/
logMapper.py
42 lines (31 loc) · 880 Bytes
/
logMapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#/usr/bin/env python
#coding:gbk
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
word2count = {}
fread = open('query-log.txt','r')
fwrite = open('result.txt','w')
for lines in fread:
lines = lines.strip()
lines = lines.decode('gbk','ignore')
uid = lines.split()[1]
word = lines.split()[2]
word2count.setdefault(uid,word)
#for item in word2count.items():
# item = list(set(item))
for line in word2count:
line = line.strip()
word,count = line.split('\t',1)
try:
count = int(count)
word2count[word] = word2count.get(word,0)+count
except ValueError:
pass
fread.close()
for word in word2count:
#fwrite.write(word)
#fwrite.write('\t')
#fwrite.write(str(word2count[word]).encode('utf-8'))
#fwrite.write('\n')
print word,word2count[word]