Skip to content

Commit

Permalink
Merge pull request #53 from arisudesu/urlopen_timeout
Browse files Browse the repository at this point in the history
Replaced socket.setdefaulttimeout with urlopen timeout
  • Loading branch information
tuxdna committed Oct 3, 2017
2 parents 93790e6 + 3f9f744 commit ab3694d
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/boilerpipe/extract/__init__.py
Expand Up @@ -3,11 +3,11 @@
from urllib.request import Request, urlopen
except ImportError:
from urllib2 import Request, urlopen
import socket
import chardet
import threading

socket.setdefaulttimeout(15)
DEFAULT_URLOPEN_TIMEOUT = 15

lock = threading.Lock()

InputSource = jpype.JClass('org.xml.sax.InputSource')
Expand All @@ -33,10 +33,10 @@ class Extractor(object):
data = None
headers = {'User-Agent': 'Mozilla/5.0'}

def __init__(self, extractor='DefaultExtractor', **kwargs):
def __init__(self, extractor='DefaultExtractor', timeout=DEFAULT_URLOPEN_TIMEOUT, **kwargs):
if 'url' in kwargs:
request = Request(kwargs['url'], headers=self.headers)
connection = urlopen(request)
connection = urlopen(request, timeout=timeout)
self.data = connection.read()
encoding = connection.headers['content-type'].lower().split('charset=')[-1]
if encoding.lower() == 'text/html':
Expand Down

0 comments on commit ab3694d

Please sign in to comment.