Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Branch: master
Fetching contributors…

Cannot retrieve contributors at this time

executable file 200 lines (162 sloc) 5.813 kB
#!/usr/bin/python
#
# Peteris Krumins (peter@catonmat.net)
# http://www.catonmat.net -- good coders code, great reuse
#
# Released under GNU GPL
#
# Developed as a part of reddit top program.
# Read how it was designed:
# http://www.catonmat.net/blog/follow-reddit-from-the-console
#
import re
import sys
import time
import socket
import urllib2
import datetime
try:
import json
except:
import simplejson as json
version = "1.0"
reddit_url = 'http://www.reddit.com/'
subreddit_url = 'http://www.reddit.com/r/%s/'
class RedesignError(Exception):
"""
An exception class thrown when it seems that Reddit has redesigned
"""
pass
class SeriousError(Exception):
"""
An exception class thrown when something unexpected happened
"""
pass
class Story(dict):
"""
Encapsulates the information about a single Reddit story.
After the object is constructed it contains the following attributes:
* position
* reddit_name
* id
* title
* url
* user
* score
* human_time
* unix_time
* comments
"""
def __repr__(self):
inner = ', '.join([repr(x) for x in (self.position, str(self.reddit_name),
str(self.id), str(self.title),
str(self.url), str(self.user), self.score, str(self.human_time),
self.unix_time, self.comments)])
return ''.join(('{', inner, '}'))
def stories_per_page():
""" Returns stories per single web page """
return 25
def get_stories(subreddit='front_page', pages=1, new=False):
"""
Finds all stories accross 'pages' pages on a 'subreddit' and returns a
list of Story objects representing stories.
If the 'subreddit' is 'front_page' gets stories from http://www.reddit.com/
Otherwise gets stories from http://www.reddit.com/r/<subreddit>/
If 'new' is True, gets new stories from http://www.reddit.com/new/
If 'new' is True and 'subreddit' is set, gets stories from
http://www.reddit.com/r/<subreddit>/new/
"""
stories = []
if subreddit == 'front_page':
url = reddit_url
else:
url = subreddit_url % subreddit
if new: url += 'new/'
url += '.json'
base_url = url
for i in range(pages):
content = _get_page(url)
entries = _extract_stories(content)
stories.extend(entries)
url = _get_next_page(content, base_url)
if not url:
break
for pos, story in enumerate(stories):
story.position = pos+1
story.reddit_name = subreddit
return stories;
def _extract_stories(content):
"""
Given a Reddit JSON page, extract stories and return a list of Story objects
"""
stories = []
reddit_json = json.loads(content)
items = reddit_json['data']['children']
for pos, item in enumerate(items):
data = item['data']
story = Story()
story.id = data['id']
story.title = data['title']
story.url = data['url']
story.user = data['author']
story.score = int(data['score'])
story.unix_time = int(data['created_utc'])
story.human_time = time.ctime(story.unix_time)
story.comments = int(data['num_comments'])
stories.append(story)
return stories
def _get_page(url, timeout=10):
""" Gets and returns a web page at url with timeout 'timeout'. """
old_timeout = socket.setdefaulttimeout(timeout)
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')
try:
response = urllib2.urlopen(request)
content = response.read()
except (urllib2.HTTPError, urllib2.URLError, socket.error, socket.sslerror), e:
socket.setdefaulttimeout(old_timeout)
raise SeriousError, e
socket.setdefaulttimeout(old_timeout)
return content
def _get_next_page(content, base_url):
reddit_json = json.loads(content)
after = reddit_json['data']['after']
if after:
return base_url + '?after=' + after
def print_stories_paragraph(stories):
"""
Given a list of Stories, prints them out paragraph by paragraph
"""
for story in stories:
print 'position:', story.position
print 'reddit_name:', story.reddit_name.encode('utf-8')
print 'id:', story.id
print 'title:', story.title.encode('utf-8')
print 'url:', story.url.encode('utf-8')
print 'score:', story.score
print 'comments:', story.comments
print 'user:', story.user.encode('utf-8')
print 'unix_time:', story.unix_time
print 'human_time:', story.human_time
print
if __name__ == '__main__':
from optparse import OptionParser
description = "A program by Peteris Krumins (http://www.catonmat.net)"
usage = "%prog [options]"
parser = OptionParser(description=description, usage=usage)
parser.add_option("-s", action="store", dest="subreddit", default="front_page",
help="Subreddit to retrieve stories from. Default: front_page.")
parser.add_option("-p", action="store", type="int", dest="pages",
default=1, help="How many pages of stories to output. Default: 1.")
parser.add_option("-n", action="store_true", dest="new",
help="Retrieve new stories. Default: nope.")
options, args = parser.parse_args()
try:
stories = get_stories(options.subreddit, options.pages, options.new)
except RedesignError, e:
print >>sys.stderr, "Reddit has redesigned: %s!" % e
sys.exit(1)
except SeriousError, e:
print >>sys.stderr, "Serious error: %s!" % e
sys.exit(1)
print_stories_paragraph(stories)
Jump to Line
Something went wrong with that request. Please try again.