Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Update the existing.py with better logging and bring it down for prod…

…uction single core system
  • Loading branch information...
commit d75a06892e2ac3fed6f9d49a87015c6c0dca5041 1 parent 8424c21
@mitechie authored
Showing with 15 additions and 3 deletions.
  1. +15 −3 scripts/readability/existing.py
View
18 scripts/readability/existing.py
@@ -8,6 +8,7 @@
import transaction
from ConfigParser import ConfigParser
+from logging.handlers import TimedRotatingFileHandler
from os import path
from Queue import Queue
@@ -17,11 +18,13 @@
from bookie.models import Bmark
from bookie.models import Readable
-PER_TRANS = 24
+PER_TRANS = 9
LOG = logging.getLogger(__name__)
+LOG.setLevel(logging.DEBUG)
+LOG.addHandler(TimedRotatingFileHandler('existing.log', when='midnight'))
# Set up some global variables
-num_fetch_threads = 8
+num_fetch_threads = 3
def parse_args():
@@ -61,8 +64,9 @@ def fetch_content(i, q):
"""Our threaded worker to fetch the url contents"""
while True:
hash_id, url = q.get()
- print 'Q' + str(i) + ' getting content for ' + hash_id + ' ' + url
+ LOG.debug("Q%d getting content for %s %s" % (i, hash_id, url))
read = ReadUrl.parse(url)
+ LOG.debug("Q%d completed parsing for %s %s" % (i, hash_id, url))
parsed[hash_id] = read
q.task_done()
@@ -159,6 +163,13 @@ def fetch_content(i, q):
read = parsed[bmark.hash_id]
+ LOG.debug("%s: %s %d %s %s" % (
+ hashed.hash_id,
+ read.url,
+ len(read.content) if read.content else -1,
+ read.is_error(),
+ read.status_message))
+
if not read.is_image():
if not bmark.readable:
bmark.readable = Readable()
@@ -175,5 +186,6 @@ def fetch_content(i, q):
bmark.readable.status_message = read.status_message
# let's do some count/transaction maint
+ LOG.debug('COMMIT')
transaction.commit()
transaction.begin()
Please sign in to comment.
Something went wrong with that request. Please try again.