Skip to content

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
...
  • 3 commits
  • 3 files changed
  • 0 commit comments
  • 1 contributor
Showing with 34 additions and 1 deletion.
  1. +21 −0 app/appengine_config.py
  2. +1 −1 app/birdfeeder/TODO
  3. +12 −0 app/datasources/thumbnails.py
View
21 app/appengine_config.py
@@ -1,4 +1,25 @@
+import logging
+
+appstats_RECORD_FRACTION = .1
+
+class BlockingMiddleware(object):
+ def __init__(self, app):
+ self._wrapped_app = app
+
+ def __call__(self, environ, start_response):
+ user_agent = environ.get('HTTP_USER_AGENT', None)
+ # Scraper running on EC2 (IP addresses 50.18.2.106 and 50.18.73.38)
+ # that's requesting lots of Tweet Digest pages.
+ if user_agent == 'Python-urllib/2.7':
+ logging.info('Blocked request')
+ start_response('403 Forbidden', [('Content-type','text/plain')])
+ return ['']
+ return self._wrapped_app(environ, start_response)
+
def webapp_add_wsgi_middleware(app):
from google.appengine.ext.appstats import recording
app = recording.appstats_wsgi_middleware(app)
+
+ app = BlockingMiddleware(app)
+
return app
View
2 app/birdfeeder/TODO
@@ -10,7 +10,6 @@ todo:
- remove URLs (especially if at end?) from title snippet
thumbnail URLs to support:
-- twitpic.com
- youtube.com/youtu.be
- vimeo.com
- 4sq.com
@@ -41,3 +40,4 @@ done:
- i.imgur.com
- why does /tweet-digest/digest?usernames=pogue&output=atom take 65ms (only 8ms in RPCs)
- make twitter API cache interface more efficient (right now it does two RPCs for cache hits, one to get the timestamp, one for the data)
+- twitpic.com
View
12 app/datasources/thumbnails.py
@@ -12,6 +12,7 @@
_FLICKR_PHOTO_PAGE_PATH_RE = re.compile('/photos/[^/]+/(\d+).*')
_FLICKR_SHORT_ID_ALPHABET ='123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
_IMGUR_PATH_RE = re.compile('/(\\w+)(\\....).*')
+_TWITPIC_PATH_RE = re.compile('/(\\w+).*')
def _get_short_flickr_photo_id(photo_id):
result = ''
@@ -99,5 +100,16 @@ def get_thumb_url_for_short_photo_id(short_photo_id):
thumb_url = '%s://%s/%s%s%s' % (
parsed_url.scheme, hostname, match.group(1),
(need_small and 's' or 'l'), match.group(2))
+ elif hostname == 'twitpic.com':
+ # See http://dev.twitpic.com/docs/thumbnails/ (the 'large' size isn't
+ # documented there, but it is what twitter.com seems to use).
+ match = _TWITPIC_PATH_RE.match(path)
+ if match:
+ thumb_url = 'http://twitpic.com/show/%s/%s' % (
+ need_small and 'thumb' or 'large',
+ match.group(1))
+ if need_small:
+ thumb_width = 150
+ thumb_height = 150
return thumb_url, thumb_width, thumb_height

No commit comments for this range

Something went wrong with that request. Please try again.