initial reddit_top import

pkrumins · Nov 29, 2009 · 4bcd2db · 4bcd2db
commit 4bcd2db
Show file tree

Hide file tree

Showing 3 changed files with 1,016 additions and 0 deletions.
diff --git a/pyredditstories.py b/pyredditstories.py
@@ -0,0 +1,195 @@
+#!/usr/bin/python
+# 
+# Peteris Krumins (peter@catonmat.net)
+# http://www.catonmat.net  --  good coders code, great reuse
+#
+# Released under GNU GPL
+#
+# Developed as a part of reddit top program.
+# Read how it was designed:
+# http://www.catonmat.net/blog/follow-reddit-from-the-console
+#
+
+import re
+import sys
+import time
+import socket
+import urllib2
+import datetime
+import simplejson as json
+
+version = "1.0"
+
+reddit_url = 'http://www.reddit.com/'
+subreddit_url = 'http://www.reddit.com/r/%s/'
+
+class RedesignError(Exception):
+    """
+    An exception class thrown when it seems that Reddit has redesigned
+    """
+    pass
+
+class SeriousError(Exception):
+    """
+    An exception class thrown when something unexpected happened
+    """
+    pass
+
+class Story(dict):
+    """
+    Encapsulates the information about a single Reddit story.
+
+    After the object is constructed it contains the following attributes:
+    * position
+    * reddit_name
+    * id
+    * title
+    * url
+    * user
+    * score
+    * human_time
+    * unix_time
+    * comments
+    """
+
+    def __repr__(self):
+        inner = ', '.join([repr(x) for x in (self.position, str(self.reddit_name),
+            str(self.id), str(self.title),
+            str(self.url), str(self.user), self.score, str(self.human_time),
+            self.unix_time, self.comments)])
+        return ''.join(('{', inner, '}'))
+
+def stories_per_page():
+    """ Returns stories per single web page """
+    return 25
+
+def get_stories(subreddit='front_page', pages=1, new=False):
+    """
+    Finds all stories accross 'pages' pages on a 'subreddit' and returns a
+    list of Story objects representing stories.
+
+    If the 'subreddit' is 'front_page' gets stories from http://www.reddit.com/
+    Otherwise gets stories from http://www.reddit.com/r/<subreddit>/
+
+    If 'new' is True, gets new stories from http://www.reddit.com/new/
+    If 'new' is True and 'subreddit' is set, gets stories from
+    http://www.reddit.com/r/<subreddit>/new/
+    """
+
+    stories = [] 
+    if subreddit == 'front_page':
+        url = reddit_url
+    else:
+        url = subreddit_url % subreddit
+    if new: url += 'new/'
+    url += '.json'
+    base_url = url
+
+    for i in range(pages):
+        content = _get_page(url)
+        entries = _extract_stories(content)
+        stories.extend(entries)
+        url = _get_next_page(content, base_url)
+        if not url:
+            break
+
+    for pos, story in enumerate(stories):
+        story.position = pos+1
+        story.reddit_name = subreddit
+
+    return stories;
+
+def _extract_stories(content):
+    """
+    Given a Reddit JSON page, extract stories and return a list of Story objects
+    """
+
+    stories = []
+    reddit_json = json.loads(content)
+
+    items = reddit_json['data']['children']
+    for pos, item in enumerate(items):
+        data = item['data']
+
+        story = Story()
+        story.id         = data['id']
+        story.title      = data['title']
+        story.url        = data['url']
+        story.user       = data['author']
+        story.score      = int(data['score'])
+        story.unix_time  = int(data['created_utc'])
+        story.human_time = time.ctime(story.unix_time)
+        story.comments   = int(data['num_comments'])
+
+        stories.append(story)
+
+    return stories
+
+def _get_page(url, timeout=10):
+    """ Gets and returns a web page at url with timeout 'timeout'. """
+
+    old_timeout = socket.setdefaulttimeout(timeout)
+
+    request = urllib2.Request(url)
+    request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')
+
+    try:
+        response = urllib2.urlopen(request)
+        content = response.read()
+    except (urllib2.HTTPError, urllib2.URLError, socket.error, socket.sslerror), e:
+        socket.setdefaulttimeout(old_timeout)
+        raise SeriousError, e
+
+    socket.setdefaulttimeout(old_timeout)
+    return content
+
+def _get_next_page(content, base_url):
+    reddit_json = json.loads(content)
+    after = reddit_json['data']['after']
+    if after:
+        return base_url + '?after=' + after
+
+def print_stories_paragraph(stories):
+    """
+    Given a list of Stories, prints them out paragraph by paragraph
+    """
+
+    for story in stories:
+        print 'position:',    story.position
+        print 'reddit_name:', story.reddit_name.encode('utf-8')
+        print 'id:',          story.id
+        print 'title:',       story.title.encode('utf-8')
+        print 'url:',         story.url.encode('utf-8')
+        print 'score:',       story.score
+        print 'comments:',    story.comments
+        print 'user:',        story.user.encode('utf-8')
+        print 'unix_time:',   story.unix_time
+        print 'human_time:',  story.human_time
+        print
+
+if __name__ == '__main__':
+    from optparse import OptionParser
+
+    description = "A program by Peteris Krumins (http://www.catonmat.net)"
+    usage = "%prog [options]"
+
+    parser = OptionParser(description=description, usage=usage)
+    parser.add_option("-s", action="store", dest="subreddit", default="front_page",
+                      help="Subreddit to retrieve stories from. Default: front_page.")
+    parser.add_option("-p", action="store", type="int", dest="pages",
+                      default=1, help="How many pages of stories to output. Default: 1.")
+    parser.add_option("-n", action="store_true", dest="new", 
+                      help="Retrieve new stories. Default: nope.")
+    options, args = parser.parse_args()
+
+    try:
+        stories = get_stories(options.subreddit, options.pages, options.new)
+    except RedesignError, e:
+        print >>sys.stderr, "Reddit has redesigned: %s!" % e
+        sys.exit(1)
+    except SeriousError, e:
+        print >>sys.stderr, "Serious error: %s!" % e
+        sys.exit(1)
+
+    print_stories_paragraph(stories)
+
diff --git a/readme.txt b/readme.txt
@@ -0,0 +1,107 @@
+This is the Reddit Top program. It's a top-like program for monitoring stories
+on reddit.com from the console.
+
+It was written by Peteris Krumins (peter@catonmat.net).
+His blog is at http://www.catonmat.net  --  good coders code, great reuse.
+
+The code is licensed under the GNU GPL license.
+
+The code was written as a part of the article "Follow Reddit from the Console"
+on my website. The whole article can be read at:
+
+    http://www.catonmat.net/blog/follow-reddit-from-the-console/
+
+I explained some parts of the code in this program in another article "How
+Reddit Top and Hacker Top Programs Were Made". It can be read here:
+
+http://www.catonmat.net/blog/how-reddit-top-and-hacker-top-programs-were-made/
+
+------------------------------------------------------------------------------
+
+Table of contents:
+
+    [1] The Reddit Top program.
+    [2] Program's usage.
+    [3] Keyboard shortcuts.
+    [4] Future TODO improvements.
+
+
+[1]-The-Reddit-Top-program----------------------------------------------------
+
+This program monitors Reddit ( http://reddit.com ) for new stories and
+displays them in the console via ncurses.
+
+The program is written in Python programming language and is supposed to
+be run on Unix type operating systems, such as Linux.
+
+It uses one external Python module:
+
+    * simplejson - for parsing the reddit stories.
+    It can be installed via `easy_install simplejson` command or can be
+    downloaded from http://undefined.org/python/#simplejson
+
+See my original article for a screenshot:
+
+    http://www.catonmat.net/blog/follow-reddit-from-the-console/
+
+
+[2]-Reddit-Top-usage----------------------------------------------------------
+
+Usage: ./reddit_top.py [-h|--help] - displays help message
+
+Usage: ./reddit_top.py [-s|--subreddit subreddit]
+                       [-i|--interval interval]
+                       [-u|--utf8 <on|off>]
+                       [-n|--new]
+
+    -s or --subreddit specifies which subreddit to monitor.
+    The default is Reddit's front page - http://www.reddit.com
+    Some examples are 'programming', 'science', 'wtf', 'linux' and others.
+    See http://www.reddit.com/reddits/ for all the possible subreddits!
+
+    -i or --interval specifies refresh interval.
+    The default refresh interval is 1 minute. Here are a few
+    examples:  10s (10 seconds), 12m (12 minutes), 2h (2 hours). 
+
+    -u or --utf8 turns on utf8 output mode.
+    Default: off. Use this if you know for sure that your
+    terminal supports it, otherwise your terminal might turn into garbage.
+
+    -n or --new follows only the newest (just submitted) reddit stories.
+    Default: follow front page stories.
+
+
+[3]-Keyboard-shortcuts--------------------------------------------------------
+
+q - quits the program.
+u - forces an update of the stories.
+m - changes the display mode.
+up/down arrows (or j/k) - scrolls the news list up or down.
+
+
+[4]-Future-TODO-improvements--------------------------------------------------
+
+* Add a feature to open a story in web browser. (Someone suggested to use
+  webbrowser module)
+
+* Fix it to work on Windows. (Perhaps try the Console module)
+
+* Merge it with "Hacker Top" program (see below) and create "Social Top"
+  program. Then write plugins for Digg, and other websites.
+
+  Hacker Top is here:
+  http://www.catonmat.net/blog/follow-hacker-news-from-the-console/
+
+* Add ability to login and vote for the favorite stories.
+
+
+------------------------------------------------------------------------------
+
+
+Have fun using it!
+
+
+Sincerely,
+Peteris Krumins
+http://www.catonmat.net
+