Skip to content

Commit

Permalink
Remove charade in favour of chardet, the first is no longer maintained
Browse files Browse the repository at this point in the history
  • Loading branch information
aniav committed Oct 28, 2016
1 parent 0c447b0 commit 7ace593
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -40,7 +40,7 @@ def download_jars(datapath, version=boilerpipe_version):
},
install_requires=[
'JPype1',
'charade',
'chardet',
],
author='Misja Hoebe',
author_email='misja.hoebe@gmail.com',
Expand Down
8 changes: 4 additions & 4 deletions src/boilerpipe/extract/__init__.py
Expand Up @@ -4,7 +4,7 @@
except ImportError:
from urllib2 import Request, urlopen
import socket
import charade
import chardet
import threading

socket.setdefaulttimeout(15)
Expand Down Expand Up @@ -40,7 +40,7 @@ def __init__(self, extractor='DefaultExtractor', **kwargs):
self.data = connection.read()
encoding = connection.headers['content-type'].lower().split('charset=')[-1]
if encoding.lower() == 'text/html':
encoding = charade.detect(self.data)['encoding']
encoding = chardet.detect(self.data)['encoding']
try:
self.data = unicode(self.data, encoding)
except NameError:
Expand All @@ -49,10 +49,10 @@ def __init__(self, extractor='DefaultExtractor', **kwargs):
self.data = kwargs['html']
try:
if not isinstance(self.data, unicode):
self.data = unicode(self.data, charade.detect(self.data)['encoding'])
self.data = unicode(self.data, chardet.detect(self.data)['encoding'])
except NameError:
if not isinstance(self.data, str):
self.data = self.data.decode(charade.detect(self.data)['encoding'])
self.data = self.data.decode(chardet.detect(self.data)['encoding'])
else:
raise Exception('No text or url provided')

Expand Down

0 comments on commit 7ace593

Please sign in to comment.