Permalink
Browse files

refactored and output cleaned up

  • Loading branch information...
1 parent e6c01f4 commit a245966990ec06e981555aee2b3163545aa87b3b @ptwobrussell committed Dec 23, 2010
Showing with 18 additions and 8 deletions.
  1. +18 −8 python_code/the_tweet__count_retweets.py
@@ -3,10 +3,17 @@
import sys
import couchdb
from couchdb.design import ViewDefinition
+from prettytable import PrettyTable
-server = couchdb.Server('http://localhost:5984')
DB = sys.argv[1]
-db = server[DB]
+
+try:
+ server = couchdb.Server('http://localhost:5984')
+ db = server[DB]
+except couchdb.http.ResourceNotFound, e:
+ print """CouchDB database '%s' not found.
+Please check that the database exists and try again.""" % DB
+ sys.exit(1)
if len(sys.argv) > 2 and sys.argv[2].isdigit():
FREQ_THRESHOLD = int(sys.argv[2])
@@ -15,7 +22,6 @@
# Map entities in tweets to the docs that they appear in
-
def entityCountMapper(doc):
if doc.get('text'):
import re
@@ -39,15 +45,19 @@ def summingReducer(keys, values, rereduce):
reduce_fun=summingReducer, language='python')
view.sync(db)
-# Print out a nicely formatted table. Sorting by value in the client is cheap and easy
+# Sorting by value in the client is cheap and easy
# if you're dealing with hundreds or low thousands of tweets
entities_freqs = sorted([(row.key, row.value) for row in
db.view('index/retweet_entity_count_by_doc',
- group=True)], key=lambda x: x[1])
+ group=True)], key=lambda x: x[1], reverse=True)
+
+fields = ['Entity', 'Count']
+pt = PrettyTable(fields=fields)
+[pt.set_field_align(f, 'l') for f in fields]
-print 'Entity'.ljust(100), 'Count'.rjust(5)
-print '-' * 110
for (entity, freq) in entities_freqs:
if freq > FREQ_THRESHOLD and entity != '@':
- print entity.ljust(100), str(freq).rjust(5)
+ pt.add_row([entity, freq])
+
+pt.printt()

0 comments on commit a245966

Please sign in to comment.