Skip to content
This repository has been archived by the owner on Apr 19, 2019. It is now read-only.

Commit

Permalink
url: Remove machinery moved to Little Brother
Browse files Browse the repository at this point in the history
  • Loading branch information
kxz committed Aug 1, 2015
1 parent 8cc846a commit f2eda26
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 146 deletions.
79 changes: 2 additions & 77 deletions omnipresence/plugins/url/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,7 @@


import re
import sys
from urlparse import urlparse

import ipaddress
from twisted.internet import defer, reactor, protocol
from twisted.web.client import IAgent
from zope.interface import implements


#
# Utility methods
#

# Based on django.utils.html.urlize from the Django project.
TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)', '"', "'", '!']
Expand All @@ -40,74 +29,10 @@ def extract_urls(text):
middle = middle[len(opening):]
lead = lead + opening
# Keep parentheses at the end only if they're balanced.
if (middle.endswith(closing)
and middle.count(closing) == middle.count(opening) + 1):
if (middle.endswith(closing) and
middle.count(closing) == middle.count(opening) + 1):
middle = middle[:-len(closing)]
trail = closing + trail
# Yield the resulting URL.
if SIMPLE_URL_RE.match(middle):
yield middle


#
# Twisted HTTP machinery
#

class TruncatingReadBodyProtocol(protocol.Protocol):
"""A protocol that collects data sent to it up to a maximum of
*max_bytes*, then discards the rest."""

def __init__(self, status, message, finished, max_bytes=None):
self.status = status
self.message = message
self.finished = finished
self.data_buffer = []
self.remaining = max_bytes or sys.maxsize

def dataReceived(self, data):
if self.remaining > 0:
to_buffer = data[:self.remaining]
self.data_buffer.append(to_buffer)
self.remaining -= len(to_buffer)
if self.remaining <= 0:
self.transport.loseConnection()

def connectionLost(self, reason):
if not self.finished.called:
self.finished.callback(''.join(self.data_buffer))


class BlacklistedHost(Exception):
"""Raised when a `BlacklistingAgent` attempts to request a
blacklisted resource."""

def __init__(self, hostname, ip):
self.hostname = hostname
self.ip = ip

def __str__(self):
return 'host {} corresponds to blacklisted IP {}'.format(
self.hostname, self.ip)


class BlacklistingAgent(object):
"""An `~twisted.web.client.Agent` wrapper that forbids requests to
loopback, private, and internal IP addresses."""
implements(IAgent)

def __init__(self, agent, resolve=None):
self.agent = agent
self.resolve = resolve or reactor.resolve

@defer.inlineCallbacks
def request(self, method, uri, headers=None, bodyProducer=None):
"""Issue a request to the server indicated by *uri*."""
hostname = urlparse(uri).hostname
ip_str = yield self.resolve(hostname)
# `ipaddress` takes a Unicode string and I don't really care to
# handle `UnicodeDecodeError` separately.
ip = ipaddress.ip_address(ip_str.decode('ascii', 'replace'))
if ip.is_private or ip.is_loopback or ip.is_link_local:
raise BlacklistedHost(hostname, ip)
response = yield self.agent.request(method, uri, headers, bodyProducer)
defer.returnValue(response)
1 change: 1 addition & 0 deletions omnipresence/plugins/url/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
littlebrother>=0.1
70 changes: 1 addition & 69 deletions omnipresence/plugins/url/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,9 @@
# pylint: disable=missing-docstring,too-few-public-methods


from twisted.internet.defer import Deferred, succeed
from twisted.python.failure import Failure
from twisted.test.proto_helpers import StringTransport
from twisted.trial import unittest
from twisted.web.client import Response
from twisted.web.http_headers import Headers
from twisted.web.test.test_agent import (AgentTestsMixin,
FakeReactorAndConnectMixin)

from . import (extract_urls, TruncatingReadBodyProtocol,
BlacklistingAgent, BlacklistedHost)
from . import extract_urls


class ExtractURLsTestCase(unittest.TestCase):
Expand Down Expand Up @@ -72,63 +64,3 @@ def test_catastrophic_backtracking(self):
self.assert_urls(
'http://i.ebayimg.com/00/s/MTAwOFgxMDI0/$(KGrHqYOKo0E6fEy4,lqBOt,yzoor!~~60_12.JPG',
['http://i.ebayimg.com/00/s/MTAwOFgxMDI0/$(KGrHqYOKo0E6fEy4,lqBOt,yzoor!~~60_12.JPG'])


class TruncatingReadBodyProtocolTestCase(unittest.TestCase):
def assert_delivery(self, data, expected):
finished = Deferred()
finished.addCallback(self.assertEqual, expected)
response = Response(('HTTP', 1, 1), 200, 'OK', Headers(),
StringTransport())
protocol = TruncatingReadBodyProtocol(
response.code, response.phrase, finished, max_bytes=8)
response.deliverBody(protocol)
response._bodyDataReceived(data)
response._bodyDataFinished()
return finished

def test_complete(self):
return self.assert_delivery('#' * 4, '#' * 4)

def test_truncated(self):
return self.assert_delivery('#' * 16, '#' * 8)


class BlacklistingAgentTestCase(unittest.TestCase,
FakeReactorAndConnectMixin, AgentTestsMixin):
# <https://twistedmatrix.com/trac/ticket/4024>... one wishes.
#
# Based in part on `twisted.web.test.test_agent.RedirectAgentTests`.

sample_hosts = ('localhost', '0.0.0.0', '10.0.0.1', '127.0.0.1',
'169.254.0.1', '172.16.0.1', '192.168.0.1')

@staticmethod
def resolve(hostname):
if hostname == 'localhost':
return succeed('127.0.0.1')
elif hostname == 'foo.test':
return succeed('8.8.8.8')
return succeed(hostname)

def makeAgent(self):
return BlacklistingAgent(self.buildAgentForWrapperTest(self.reactor),
resolve=self.resolve)

def setUp(self):
self.reactor = self.Reactor()
self.agent = self.makeAgent()

def test_no_blacklist(self):
self.agent.request('GET', 'http://foo.test/')

def assert_blacklist(self, method, uri):
d = self.agent.request(method, uri)
f = self.failureResultOf(d, BlacklistedHost)

def test_blacklist(self):
for protocol in ('http', 'https'):
for host in self.sample_hosts:
uri = '{}://{}/'.format(protocol, host)
for method in ('GET', 'POST'):
self.assert_blacklist(method, uri)

0 comments on commit f2eda26

Please sign in to comment.