Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

lots of logic changes, bug fixes

  • Loading branch information...
commit 8d4d1fd2f031afb706af83fe9664dd101f59eed0 1 parent d97d89d
@sparr authored
Showing with 155 additions and 46 deletions.
  1. +1 −0  README
  2. +82 −0 exception_handler.py
  3. +72 −46 reddit-link-unscripter.py
View
1  README
@@ -1,4 +1,5 @@
All of the code contained in this repository is licensed under the GNU GPL v3
+unless otherwise stated in the source file
reddit-link-unscripter is a bot written in Python using the reddit_api module.
Its purpose is to find posts and comments that contain links to websites that
View
82 exception_handler.py
@@ -0,0 +1,82 @@
+#!/usr/bin/python2.4
+
+# An exception handling idiom using decorators.
+
+# taken from http://code.activestate.com/recipes/408937-basic-exception-handling-idiom-using-decorators/
+# licensed under the PSF license, per that URL
+
+__author__ = "Anand Pillai"
+
+def ExpHandler(*posargs):
+
+ def nestedhandler(func,exptuple, *pargs, **kwargs):
+ """ Function that creates a nested exception handler from
+ the passed exception tuple """
+
+ exp, handler = exptuple[0]
+ try:
+ if len(exptuple)==1:
+ func(*pargs, **kwargs)
+ else:
+ nestedhandler(func,exptuple[1:], *pargs, **kwargs)
+ except exp, e:
+ if handler:
+ handler(e)
+ else:
+ print e.__class__.__name__,':',e
+
+ def wrapper(f):
+ def newfunc(*pargs, **kwargs):
+ if len(posargs)<2:
+ t = tuple(item for item in posargs[0] if issubclass(item,Exception) or (Exception,))
+ try:
+ f(*pargs, **kwargs)
+ except t, e:
+ print e.__class__.__name__,':',e
+ else:
+ t1, t2 =posargs[0], posargs[1]
+ l=[]
+ for x in xrange(len(t1)):
+ try:
+ l.append((t1[x],t2[x]))
+ except:
+ l.append((t1[x],None))
+
+ # Reverse list so that exceptions will
+ # be caught in order.
+ l.reverse()
+ t = tuple(l)
+ nestedhandler(f,t,*pargs,**kwargs)
+
+ return newfunc
+
+ return wrapper
+
+def ExpHandlerDefault(e):
+ print 'Caught exception!', e
+
+# Examples
+# Specify exceptions in order, first one is handled first
+# last one last.
+@ExpHandler((ZeroDivisionError,ValueError), (None,ExpHandlerDefault))
+def ExpHandler_f1():
+ 1/0
+
+@ExpHandler((TypeError, ValueError, StandardError), (ExpHandlerDefault,)*3)
+def ExpHandler_f2(*pargs, **kwargs):
+ print pargs
+ x = pargs[0]
+ y = x[0]
+ y += x[1]
+
+@ExpHandler((ValueError, Exception))
+def ExpHandler_f3(*pargs):
+ l = pargs[0]
+ return l.index(10)
+
+if __name__=="__main__":
+ ExpHandler_f1()
+ # Calls exception handler
+ ExpHandler_f2('Python', 1)
+ # Calls exception handler
+ ExpHandler_f3(range(5),)
View
118 reddit-link-unscripter.py
@@ -10,6 +10,10 @@
import time
import re
+from exception_handler import ExpHandler
+
+import pprint
+
r = reddit.Reddit(user_agent='reddit-link-unscripter')
print r
r.login() # credentials are in reddit_api.cfg
@@ -23,64 +27,86 @@ def process_submission(submission):
True
# reply with a modified URL if there's a match
-# fetch our last 100 comments, so we can try not to double-comment when starting up
+#TODO: handle network failures here
+def post_comment(parent,comment):
+ result = parent.add_comment(comment)
+ return result
+
+def persistent_post_comment(parent,comment,retries=3,debug=False):
+ while retries >= 0:
+ try:
+ if debug:
+ print "debug: comment: " + comment
+ else:
+ print "comment: " + comment
+ result = post_comment(parent,comment)
+ except reddit.errors.RateLimitExceeded as e:
+ print "Rate limited for " + str(e.sleep_time) + " seconds, sleeping"
+ time.sleep(e.sleep_time)
+ continue
+ except Exception as e:
+ print e.__class__.__name__,':',e
+ print "retrying " + str(retries) + " more times"
+ retries -= 1
+ continue
+ break
+ return result
+
+commented_count = 0
+post_time_mark = 0
+
+# fetch our last comment, which will dictate how far back we go on the first pass
+# TODO: get the creation time of the post, not my reply
my_comments = None
try:
- my_comments = r.get_content("http://www.reddit.com/user/" + r.config.user + "/comments.json",100)
+ my_comments = list(r.user.get_comments(limit=1))
except:
- pass
-my_comment_parents = set(comment.parent_id for comment in my_comments)
-
-place_holder = None
-commented_count = 0
+ #TODO: handle some exceptions here!
+ raise
+
+if len(my_comments) > 0:
+ if my_comments[0].created_utc > post_time_mark:
+ post_time_mark = my_comments[0].created_utc
# fetch new submissions, check them, reply
while True:
+
submissions = None
# right now we are just pulling twitter link posts
- try:
- if place_holder is None:
- submissions = r.get_content("http://www.reddit.com/domain/twitter.com/new.json",10,{'sort':'new'})
- else:
- submissions = r.get_content("http://www.reddit.com/domain/twitter.com/new.json",100,{'sort':'new'},place_holder)
+ submissions = r.get_content("http://www.reddit.com/domain/twitter.com/new.json",100,{'sort':'new'})
- retrieved_count = 0
-
- for s in submissions:
- retrieved_count += 1
- # stop if we get to the first post from the previous list
- # TODO: keep going in case we missed a post in a previous pass
- if s.name == place_holder:
- break
- if retrieved_count == 1:
- place_holder = s.name
- # stop if we already replied to this post
- if place_holder == None and s.name in my_comment_parents:
- #FIXME: some posts are still getting duplicate comments!
- print "Already commented on " + s.name
- continue
+ retrieved_count = 0
+ new_time_mark = post_time_mark
+ print "Starting loop at " + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
+ for s in submissions:
+ retrieved_count += 1
+ # skip posts older than our previous "newest post" timestamp
+ if s.created_utc > post_time_mark:
new_url = re.sub('(https?://)(?:www.)?(twitter.com/)#!?/(.*)',r'\1m.\2\3',s.url)
if new_url != s.url:
- try:
- #print 'This [mobile twitter link](' + new_url + ') does not require javascript to view.'
- s.add_comment('This [mobile twitter link](' + new_url + ') does not require javascript to view.')
- #FIXME: some posts are still getting duplicate comments!
- my_comment_parents.add(s.name)
- print "commented on " + s.name
- print "original URL: " + s.url
- print "replaced URL: " + new_url
- commented_count += 1
- except:
- pass
-
- print str(retrieved_count) + " submissions considered at " + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
- print str(commented_count) + " submissions replied to so far this run"
+ comment = 'This [mobile twitter link](' + new_url + ") will work without requiring javascript.\n\nThis comment generated by an automated bot."
+ result = persistent_post_comment(s,comment)
+ #TODO: handle failures like being banned here
+ print "commented on " + s.name
+ pprint.pprint(vars(result))
+ print "original URL: " + s.url
+ print "replaced URL: " + new_url
+ commented_count += 1
+ if s.created_utc > new_time_mark:
+ print "New time mark, " + str(s.created_utc)
+ new_time_mark = s.created_utc
+ else:
+ #print "Nothing to do for " + s.name
+ pass
+ print "sleeping for 5 seconds, interrupt if you must"
+ time.sleep(5)
- except:
- # if the request failed, try again
- print "Failed to retrieve post list"
- pass
+ print str(retrieved_count) + " submissions considered at " + time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
+ print str(commented_count) + " submissions replied to so far this run"
+ post_time_mark = new_time_mark
+
# sleep for 30 seconds between refreshes of the same page request, per the API rules
+ # 300 seconds instead, for now, in case the bot malfunctions, 1/10th as many duplicate posts
print "sleeping"
- time.sleep(30)
+ time.sleep(300)
Please sign in to comment.
Something went wrong with that request. Please try again.