Skip to content

Commit

Permalink
Finish greenlet implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
hidde-jan committed Mar 19, 2018
1 parent 3d9edf8 commit 20c5351
Showing 1 changed file with 47 additions and 23 deletions.
70 changes: 47 additions & 23 deletions snapshill.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from gevent.pool import Pool
monkey.patch_all(thread=False, select=False)

import timeit

import logging
import os
import praw
Expand Down Expand Up @@ -102,6 +104,26 @@ def log_error(e):
################
# IO Functions #
################
def handle_post(post, snapshillbot, reddit_pool = None):
jobs = []

if reddit_pool is None:
reddit_pool = Pool(1)

for link in post.links:
if link.is_reddit():
jobs.append(reddit_pool.spawn(create_reddit_archives, link))
else:
for archive in link.archives:
jobs.append(create_archive(archive))

gevent.joinall(jobs)

comment = Notification(post, snapshillbot.get_header(post.submission.subreddit)).notify()

if comment:
store_notification(post.name, comment.name)

def create_archive(archive):
return gevent.spawn(archive.archive)

Expand Down Expand Up @@ -175,7 +197,7 @@ def _archive(self):

return found[0]

def error_link(self):
def resubmit_link(self):
pairs = {"url": self.url, "run": 1}
return "https://archive.is/?" + urlencode(pairs)

Expand All @@ -193,13 +215,13 @@ def _archive(self):
try:
s.get("https://web.archive.org/save/" + self.url)
except RECOVERABLE_EXC as e:
if isinstance(e, HTTPError) and e.status_code == 403:
return None
return False

date = time.strftime(ARCHIVE_ORG_FORMAT, time.gmtime())

return "https://web.archive.org/" + date + "/" + self.url

def error_link(self):
def resubmit_link(self):
return "https://web.archive.org/save/" + self.url


Expand All @@ -213,6 +235,11 @@ def _archive(self):
discrepancy will give an error when trying to view it.
:return: URL of the archive, or False if an error occurred.
"""

# Megalodon.jp sucks and errors out every single time. We'll just let
# users archive it themselves if they want to.
return False

pairs = {"url": self.url}

try:
Expand All @@ -225,15 +252,15 @@ def _archive(self):

return res.url

def error_link(self):
def resubmit_link(self):
return "http://megalodon.jp/pc/get_simple/decide?url={}".format(self.url)


class GoldfishArchive(Archive):
site_name = "snew.github.io"
site_name = "removeddit"

def _archive(self):
return re.sub(REDDIT_PATTERN, "https://snew.github.io", self.url)
return re.sub(REDDIT_PATTERN, "https://removeddit.com", self.url)

class RemovedditArchive(NameMixin):
site_name = "removeddit.com"
Expand Down Expand Up @@ -336,7 +363,7 @@ def build(self):
return self._formatted

def add_comment(self, *args, **kwargs):
self.submission.reply(*args, **kwargs)
return self.submission.reply(*args, **kwargs)


class Notification:
Expand Down Expand Up @@ -444,33 +471,30 @@ def run(self):
if not self._setup:
raise Exception("Snapshiller not ready yet!")

start = timeit.default_timer()
count = 0

submissions = r.front.new(limit=self.limit)
post_pool = Pool(4)
reddit_pool = Pool(1)

for submission in submissions:
post = Post(submission)

# We only want to create the archives from one reddit link at a time.
reddit_pool = Pool(1)

log.debug("Found submission: {}".format(post.permalink))

if not should_notify(post.name):
log.debug("Skipping.")
continue

for link in post.links:
if link.is_reddit():
reddit_pool.spawn(create_reddit_archives, link)
else:
for archive in link.archives:
create_archive(archive)
count += 1
post_pool.spawn(handle_post, post, self, reddit_pool)

gevent.wait()
gevent.wait()

comment = Notification(post, self._get_header(submission.subreddit)).notify()
stop = timeit.default_timer()

if comment:
store_notification(post.name, comment.name)
log.debug("Handled {} submissions in {} seconds".format(count, stop - start))

def setup(self):
"""
Expand All @@ -492,14 +516,14 @@ def refresh(self):

for subreddit in r.user.subreddits():
if subreddit.user_is_banned:
log.debug("Banned from {}: unsubscribing!", subreddit)
log.debug("Banned from {}: unsubscribing!".format(subreddit))
subreddit.unsubscribe()
continue

name = subreddit.display_name.lower()
self.headers[name] = Header(self.settings_wiki, name)

def _get_header(self, subreddit):
def get_header(self, subreddit):
"""
Gets the correct Header object for this subreddit. If the one for 'all'
is not "!ignore", then this one will always be returned.
Expand Down

0 comments on commit 20c5351

Please sign in to comment.