diff --git a/hist.py b/hist.py index defe6f977..c4a0a8361 100644 --- a/hist.py +++ b/hist.py @@ -2,14 +2,14 @@ from itertools import takewhile import sys, re -def token(s): - if re.search(r'[A-Z0-9\'\"\?]', s): return False - return True - t = [] for s in sys.stdin: - if s[0] == '*': print s.strip() - t.append(' '.join(list(takewhile(token, s.split())))) + s = re.sub(r'''['"].*''', '', s) + s = re.sub(r' (for|to|is|at|from|then).*', '', s) + s = re.sub(r' [0-9].*', '', s) + s = re.sub(r' c[0-9a-f\?]{2}.*', '', s) + s = re.sub(r'\s+$', '', s) + t.append(s) for s, c in Counter(t).most_common(): print '%d\t"%s"' % (c, s)