Skip to content

Commit

Permalink
Set the default content_type
Browse files Browse the repository at this point in the history
  • Loading branch information
polyrabbit committed Oct 29, 2015
1 parent 5a2449d commit 342c8ce
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion page_content_extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def legendary_parser_factory(url):
except Exception as e:
logger.info('%s is not an embeddable, try another(%s)', resp.url, e)

ct = resp.headers.get('content-type', '').lower()
# if no content-type is provided, Chrome set as an html
ct = resp.headers.get('content-type', 'text').lower()
if ct.startswith('text'):
logger.info('Get an %s to parse', ct)
return HtmlContentExtractor(resp.text, resp.url)
Expand Down

0 comments on commit 342c8ce

Please sign in to comment.