-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 4bcd2db
Showing
3 changed files
with
1,016 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
#!/usr/bin/python | ||
# | ||
# Peteris Krumins (peter@catonmat.net) | ||
# http://www.catonmat.net -- good coders code, great reuse | ||
# | ||
# Released under GNU GPL | ||
# | ||
# Developed as a part of reddit top program. | ||
# Read how it was designed: | ||
# http://www.catonmat.net/blog/follow-reddit-from-the-console | ||
# | ||
|
||
import re | ||
import sys | ||
import time | ||
import socket | ||
import urllib2 | ||
import datetime | ||
import simplejson as json | ||
|
||
version = "1.0" | ||
|
||
reddit_url = 'http://www.reddit.com/' | ||
subreddit_url = 'http://www.reddit.com/r/%s/' | ||
|
||
class RedesignError(Exception): | ||
""" | ||
An exception class thrown when it seems that Reddit has redesigned | ||
""" | ||
pass | ||
|
||
class SeriousError(Exception): | ||
""" | ||
An exception class thrown when something unexpected happened | ||
""" | ||
pass | ||
|
||
class Story(dict): | ||
""" | ||
Encapsulates the information about a single Reddit story. | ||
After the object is constructed it contains the following attributes: | ||
* position | ||
* reddit_name | ||
* id | ||
* title | ||
* url | ||
* user | ||
* score | ||
* human_time | ||
* unix_time | ||
* comments | ||
""" | ||
|
||
def __repr__(self): | ||
inner = ', '.join([repr(x) for x in (self.position, str(self.reddit_name), | ||
str(self.id), str(self.title), | ||
str(self.url), str(self.user), self.score, str(self.human_time), | ||
self.unix_time, self.comments)]) | ||
return ''.join(('{', inner, '}')) | ||
|
||
def stories_per_page(): | ||
""" Returns stories per single web page """ | ||
return 25 | ||
|
||
def get_stories(subreddit='front_page', pages=1, new=False): | ||
""" | ||
Finds all stories accross 'pages' pages on a 'subreddit' and returns a | ||
list of Story objects representing stories. | ||
If the 'subreddit' is 'front_page' gets stories from http://www.reddit.com/ | ||
Otherwise gets stories from http://www.reddit.com/r/<subreddit>/ | ||
If 'new' is True, gets new stories from http://www.reddit.com/new/ | ||
If 'new' is True and 'subreddit' is set, gets stories from | ||
http://www.reddit.com/r/<subreddit>/new/ | ||
""" | ||
|
||
stories = [] | ||
if subreddit == 'front_page': | ||
url = reddit_url | ||
else: | ||
url = subreddit_url % subreddit | ||
if new: url += 'new/' | ||
url += '.json' | ||
base_url = url | ||
|
||
for i in range(pages): | ||
content = _get_page(url) | ||
entries = _extract_stories(content) | ||
stories.extend(entries) | ||
url = _get_next_page(content, base_url) | ||
if not url: | ||
break | ||
|
||
for pos, story in enumerate(stories): | ||
story.position = pos+1 | ||
story.reddit_name = subreddit | ||
|
||
return stories; | ||
|
||
def _extract_stories(content): | ||
""" | ||
Given a Reddit JSON page, extract stories and return a list of Story objects | ||
""" | ||
|
||
stories = [] | ||
reddit_json = json.loads(content) | ||
|
||
items = reddit_json['data']['children'] | ||
for pos, item in enumerate(items): | ||
data = item['data'] | ||
|
||
story = Story() | ||
story.id = data['id'] | ||
story.title = data['title'] | ||
story.url = data['url'] | ||
story.user = data['author'] | ||
story.score = int(data['score']) | ||
story.unix_time = int(data['created_utc']) | ||
story.human_time = time.ctime(story.unix_time) | ||
story.comments = int(data['num_comments']) | ||
|
||
stories.append(story) | ||
|
||
return stories | ||
|
||
def _get_page(url, timeout=10): | ||
""" Gets and returns a web page at url with timeout 'timeout'. """ | ||
|
||
old_timeout = socket.setdefaulttimeout(timeout) | ||
|
||
request = urllib2.Request(url) | ||
request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)') | ||
|
||
try: | ||
response = urllib2.urlopen(request) | ||
content = response.read() | ||
except (urllib2.HTTPError, urllib2.URLError, socket.error, socket.sslerror), e: | ||
socket.setdefaulttimeout(old_timeout) | ||
raise SeriousError, e | ||
|
||
socket.setdefaulttimeout(old_timeout) | ||
return content | ||
|
||
def _get_next_page(content, base_url): | ||
reddit_json = json.loads(content) | ||
after = reddit_json['data']['after'] | ||
if after: | ||
return base_url + '?after=' + after | ||
|
||
def print_stories_paragraph(stories): | ||
""" | ||
Given a list of Stories, prints them out paragraph by paragraph | ||
""" | ||
|
||
for story in stories: | ||
print 'position:', story.position | ||
print 'reddit_name:', story.reddit_name.encode('utf-8') | ||
print 'id:', story.id | ||
print 'title:', story.title.encode('utf-8') | ||
print 'url:', story.url.encode('utf-8') | ||
print 'score:', story.score | ||
print 'comments:', story.comments | ||
print 'user:', story.user.encode('utf-8') | ||
print 'unix_time:', story.unix_time | ||
print 'human_time:', story.human_time | ||
|
||
if __name__ == '__main__': | ||
from optparse import OptionParser | ||
|
||
description = "A program by Peteris Krumins (http://www.catonmat.net)" | ||
usage = "%prog [options]" | ||
|
||
parser = OptionParser(description=description, usage=usage) | ||
parser.add_option("-s", action="store", dest="subreddit", default="front_page", | ||
help="Subreddit to retrieve stories from. Default: front_page.") | ||
parser.add_option("-p", action="store", type="int", dest="pages", | ||
default=1, help="How many pages of stories to output. Default: 1.") | ||
parser.add_option("-n", action="store_true", dest="new", | ||
help="Retrieve new stories. Default: nope.") | ||
options, args = parser.parse_args() | ||
|
||
try: | ||
stories = get_stories(options.subreddit, options.pages, options.new) | ||
except RedesignError, e: | ||
print >>sys.stderr, "Reddit has redesigned: %s!" % e | ||
sys.exit(1) | ||
except SeriousError, e: | ||
print >>sys.stderr, "Serious error: %s!" % e | ||
sys.exit(1) | ||
|
||
print_stories_paragraph(stories) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
This is the Reddit Top program. It's a top-like program for monitoring stories | ||
on reddit.com from the console. | ||
|
||
It was written by Peteris Krumins (peter@catonmat.net). | ||
His blog is at http://www.catonmat.net -- good coders code, great reuse. | ||
|
||
The code is licensed under the GNU GPL license. | ||
|
||
The code was written as a part of the article "Follow Reddit from the Console" | ||
on my website. The whole article can be read at: | ||
|
||
http://www.catonmat.net/blog/follow-reddit-from-the-console/ | ||
|
||
I explained some parts of the code in this program in another article "How | ||
Reddit Top and Hacker Top Programs Were Made". It can be read here: | ||
|
||
http://www.catonmat.net/blog/how-reddit-top-and-hacker-top-programs-were-made/ | ||
|
||
------------------------------------------------------------------------------ | ||
|
||
Table of contents: | ||
|
||
[1] The Reddit Top program. | ||
[2] Program's usage. | ||
[3] Keyboard shortcuts. | ||
[4] Future TODO improvements. | ||
|
||
|
||
[1]-The-Reddit-Top-program---------------------------------------------------- | ||
|
||
This program monitors Reddit ( http://reddit.com ) for new stories and | ||
displays them in the console via ncurses. | ||
|
||
The program is written in Python programming language and is supposed to | ||
be run on Unix type operating systems, such as Linux. | ||
|
||
It uses one external Python module: | ||
|
||
* simplejson - for parsing the reddit stories. | ||
It can be installed via `easy_install simplejson` command or can be | ||
downloaded from http://undefined.org/python/#simplejson | ||
|
||
See my original article for a screenshot: | ||
|
||
http://www.catonmat.net/blog/follow-reddit-from-the-console/ | ||
|
||
|
||
[2]-Reddit-Top-usage---------------------------------------------------------- | ||
|
||
Usage: ./reddit_top.py [-h|--help] - displays help message | ||
|
||
Usage: ./reddit_top.py [-s|--subreddit subreddit] | ||
[-i|--interval interval] | ||
[-u|--utf8 <on|off>] | ||
[-n|--new] | ||
|
||
-s or --subreddit specifies which subreddit to monitor. | ||
The default is Reddit's front page - http://www.reddit.com | ||
Some examples are 'programming', 'science', 'wtf', 'linux' and others. | ||
See http://www.reddit.com/reddits/ for all the possible subreddits! | ||
|
||
-i or --interval specifies refresh interval. | ||
The default refresh interval is 1 minute. Here are a few | ||
examples: 10s (10 seconds), 12m (12 minutes), 2h (2 hours). | ||
|
||
-u or --utf8 turns on utf8 output mode. | ||
Default: off. Use this if you know for sure that your | ||
terminal supports it, otherwise your terminal might turn into garbage. | ||
|
||
-n or --new follows only the newest (just submitted) reddit stories. | ||
Default: follow front page stories. | ||
|
||
|
||
[3]-Keyboard-shortcuts-------------------------------------------------------- | ||
|
||
q - quits the program. | ||
u - forces an update of the stories. | ||
m - changes the display mode. | ||
up/down arrows (or j/k) - scrolls the news list up or down. | ||
|
||
|
||
[4]-Future-TODO-improvements-------------------------------------------------- | ||
|
||
* Add a feature to open a story in web browser. (Someone suggested to use | ||
webbrowser module) | ||
|
||
* Fix it to work on Windows. (Perhaps try the Console module) | ||
|
||
* Merge it with "Hacker Top" program (see below) and create "Social Top" | ||
program. Then write plugins for Digg, and other websites. | ||
|
||
Hacker Top is here: | ||
http://www.catonmat.net/blog/follow-hacker-news-from-the-console/ | ||
|
||
* Add ability to login and vote for the favorite stories. | ||
|
||
|
||
------------------------------------------------------------------------------ | ||
|
||
|
||
Have fun using it! | ||
|
||
|
||
Sincerely, | ||
Peteris Krumins | ||
http://www.catonmat.net | ||
|
Oops, something went wrong.