Skip to content

Commit

Permalink
initial reddit_top import
Browse files Browse the repository at this point in the history
  • Loading branch information
pkrumins committed Nov 29, 2009
0 parents commit 4bcd2db
Show file tree
Hide file tree
Showing 3 changed files with 1,016 additions and 0 deletions.
195 changes: 195 additions & 0 deletions pyredditstories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#!/usr/bin/python
#
# Peteris Krumins (peter@catonmat.net)
# http://www.catonmat.net -- good coders code, great reuse
#
# Released under GNU GPL
#
# Developed as a part of reddit top program.
# Read how it was designed:
# http://www.catonmat.net/blog/follow-reddit-from-the-console
#

import re
import sys
import time
import socket
import urllib2
import datetime
import simplejson as json

version = "1.0"

reddit_url = 'http://www.reddit.com/'
subreddit_url = 'http://www.reddit.com/r/%s/'

class RedesignError(Exception):
"""
An exception class thrown when it seems that Reddit has redesigned
"""
pass

class SeriousError(Exception):
"""
An exception class thrown when something unexpected happened
"""
pass

class Story(dict):
"""
Encapsulates the information about a single Reddit story.
After the object is constructed it contains the following attributes:
* position
* reddit_name
* id
* title
* url
* user
* score
* human_time
* unix_time
* comments
"""

def __repr__(self):
inner = ', '.join([repr(x) for x in (self.position, str(self.reddit_name),
str(self.id), str(self.title),
str(self.url), str(self.user), self.score, str(self.human_time),
self.unix_time, self.comments)])
return ''.join(('{', inner, '}'))

def stories_per_page():
""" Returns stories per single web page """
return 25

def get_stories(subreddit='front_page', pages=1, new=False):
"""
Finds all stories accross 'pages' pages on a 'subreddit' and returns a
list of Story objects representing stories.
If the 'subreddit' is 'front_page' gets stories from http://www.reddit.com/
Otherwise gets stories from http://www.reddit.com/r/<subreddit>/
If 'new' is True, gets new stories from http://www.reddit.com/new/
If 'new' is True and 'subreddit' is set, gets stories from
http://www.reddit.com/r/<subreddit>/new/
"""

stories = []
if subreddit == 'front_page':
url = reddit_url
else:
url = subreddit_url % subreddit
if new: url += 'new/'
url += '.json'
base_url = url

for i in range(pages):
content = _get_page(url)
entries = _extract_stories(content)
stories.extend(entries)
url = _get_next_page(content, base_url)
if not url:
break

for pos, story in enumerate(stories):
story.position = pos+1
story.reddit_name = subreddit

return stories;

def _extract_stories(content):
"""
Given a Reddit JSON page, extract stories and return a list of Story objects
"""

stories = []
reddit_json = json.loads(content)

items = reddit_json['data']['children']
for pos, item in enumerate(items):
data = item['data']

story = Story()
story.id = data['id']
story.title = data['title']
story.url = data['url']
story.user = data['author']
story.score = int(data['score'])
story.unix_time = int(data['created_utc'])
story.human_time = time.ctime(story.unix_time)
story.comments = int(data['num_comments'])

stories.append(story)

return stories

def _get_page(url, timeout=10):
""" Gets and returns a web page at url with timeout 'timeout'. """

old_timeout = socket.setdefaulttimeout(timeout)

request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')

try:
response = urllib2.urlopen(request)
content = response.read()
except (urllib2.HTTPError, urllib2.URLError, socket.error, socket.sslerror), e:
socket.setdefaulttimeout(old_timeout)
raise SeriousError, e

socket.setdefaulttimeout(old_timeout)
return content

def _get_next_page(content, base_url):
reddit_json = json.loads(content)
after = reddit_json['data']['after']
if after:
return base_url + '?after=' + after

def print_stories_paragraph(stories):
"""
Given a list of Stories, prints them out paragraph by paragraph
"""

for story in stories:
print 'position:', story.position
print 'reddit_name:', story.reddit_name.encode('utf-8')
print 'id:', story.id
print 'title:', story.title.encode('utf-8')
print 'url:', story.url.encode('utf-8')
print 'score:', story.score
print 'comments:', story.comments
print 'user:', story.user.encode('utf-8')
print 'unix_time:', story.unix_time
print 'human_time:', story.human_time
print

if __name__ == '__main__':
from optparse import OptionParser

description = "A program by Peteris Krumins (http://www.catonmat.net)"
usage = "%prog [options]"

parser = OptionParser(description=description, usage=usage)
parser.add_option("-s", action="store", dest="subreddit", default="front_page",
help="Subreddit to retrieve stories from. Default: front_page.")
parser.add_option("-p", action="store", type="int", dest="pages",
default=1, help="How many pages of stories to output. Default: 1.")
parser.add_option("-n", action="store_true", dest="new",
help="Retrieve new stories. Default: nope.")
options, args = parser.parse_args()

try:
stories = get_stories(options.subreddit, options.pages, options.new)
except RedesignError, e:
print >>sys.stderr, "Reddit has redesigned: %s!" % e
sys.exit(1)
except SeriousError, e:
print >>sys.stderr, "Serious error: %s!" % e
sys.exit(1)

print_stories_paragraph(stories)

107 changes: 107 additions & 0 deletions readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
This is the Reddit Top program. It's a top-like program for monitoring stories
on reddit.com from the console.

It was written by Peteris Krumins (peter@catonmat.net).
His blog is at http://www.catonmat.net -- good coders code, great reuse.

The code is licensed under the GNU GPL license.

The code was written as a part of the article "Follow Reddit from the Console"
on my website. The whole article can be read at:

http://www.catonmat.net/blog/follow-reddit-from-the-console/

I explained some parts of the code in this program in another article "How
Reddit Top and Hacker Top Programs Were Made". It can be read here:

http://www.catonmat.net/blog/how-reddit-top-and-hacker-top-programs-were-made/

------------------------------------------------------------------------------

Table of contents:

[1] The Reddit Top program.
[2] Program's usage.
[3] Keyboard shortcuts.
[4] Future TODO improvements.


[1]-The-Reddit-Top-program----------------------------------------------------

This program monitors Reddit ( http://reddit.com ) for new stories and
displays them in the console via ncurses.

The program is written in Python programming language and is supposed to
be run on Unix type operating systems, such as Linux.

It uses one external Python module:

* simplejson - for parsing the reddit stories.
It can be installed via `easy_install simplejson` command or can be
downloaded from http://undefined.org/python/#simplejson

See my original article for a screenshot:

http://www.catonmat.net/blog/follow-reddit-from-the-console/


[2]-Reddit-Top-usage----------------------------------------------------------

Usage: ./reddit_top.py [-h|--help] - displays help message

Usage: ./reddit_top.py [-s|--subreddit subreddit]
[-i|--interval interval]
[-u|--utf8 <on|off>]
[-n|--new]

-s or --subreddit specifies which subreddit to monitor.
The default is Reddit's front page - http://www.reddit.com
Some examples are 'programming', 'science', 'wtf', 'linux' and others.
See http://www.reddit.com/reddits/ for all the possible subreddits!

-i or --interval specifies refresh interval.
The default refresh interval is 1 minute. Here are a few
examples: 10s (10 seconds), 12m (12 minutes), 2h (2 hours).

-u or --utf8 turns on utf8 output mode.
Default: off. Use this if you know for sure that your
terminal supports it, otherwise your terminal might turn into garbage.

-n or --new follows only the newest (just submitted) reddit stories.
Default: follow front page stories.


[3]-Keyboard-shortcuts--------------------------------------------------------

q - quits the program.
u - forces an update of the stories.
m - changes the display mode.
up/down arrows (or j/k) - scrolls the news list up or down.


[4]-Future-TODO-improvements--------------------------------------------------

* Add a feature to open a story in web browser. (Someone suggested to use
webbrowser module)

* Fix it to work on Windows. (Perhaps try the Console module)

* Merge it with "Hacker Top" program (see below) and create "Social Top"
program. Then write plugins for Digg, and other websites.

Hacker Top is here:
http://www.catonmat.net/blog/follow-hacker-news-from-the-console/

* Add ability to login and vote for the favorite stories.


------------------------------------------------------------------------------


Have fun using it!


Sincerely,
Peteris Krumins
http://www.catonmat.net

Loading

0 comments on commit 4bcd2db

Please sign in to comment.