Permalink
Browse files

Merge pull request #6 from joemcmahon/master

Sort threads by URL, then time, to better group conversations.
  • Loading branch information...
2 parents 4095694 + f160619 commit c823fac8d3526bf030b4d5cf12c06db1f0778a2c @lehrblogger committed Jan 4, 2013
Showing with 2 additions and 2 deletions.
  1. +2 −2 okc_arrow_fetcher.py
@@ -98,7 +98,7 @@ def strptime(self, string, format='%b %d, %Y – %I:%M%p'):
return datetime.strptime(string.strip(), format)
def write_messages(self, file_name):
- self.messages.sort(key = lambda message: message.timestamp) # sort by time
+ self.messages.sort(key = lambda message: (message.thread_url, message.timestamp)) # sort by sender, then time
f = codecs.open(file_name, encoding='utf-8', mode='w') # ugh, otherwise i think it will try to write ascii
for message in self.messages:
print "writing message for thread: " + message.thread_url
@@ -155,7 +155,7 @@ def _fetch_thread(self, thread_url):
return message_list
# http://stackoverflow.com/questions/1765848/remove-a-tag-using-beautifulsoup-but-keep-its-contents/1766002#1766002
- def _strip_tags(self, html, invalid_tags=['a', 'span', 'strong', 'div']):
+ def _strip_tags(self, html, invalid_tags=['em', 'a', 'span', 'strong', 'div']):
soup = BeautifulSoup(html)
for tag in soup.findAll(True):
if tag.name in invalid_tags:

0 comments on commit c823fac

Please sign in to comment.