diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
index f2f3ef46657..fa65f66ed41 100644
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@@ -805,6 +805,7 @@ The :class:`MetaRefreshMiddleware` can be configured through the following
settings (see the settings documentation for more info):
* :setting:`METAREFRESH_ENABLED`
+* :setting:`METAREFRESH_IGNORE_TAGS`
* :setting:`METAREFRESH_MAXDELAY`
This middleware obey :setting:`REDIRECT_MAX_TIMES` setting, :reqmeta:`dont_redirect`,
@@ -826,6 +827,15 @@ Default: ``True``
Whether the Meta Refresh middleware will be enabled.
+.. setting:: METAREFRESH_IGNORE_TAGS
+
+METAREFRESH_IGNORE_TAGS
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Default: ``['script', 'noscript']``
+
+Meta tags within these tags are ignored.
+
.. setting:: METAREFRESH_MAXDELAY
METAREFRESH_MAXDELAY
diff --git a/scrapy/downloadermiddlewares/redirect.py b/scrapy/downloadermiddlewares/redirect.py
index cb59d3fd2bd..49468a2e486 100644
--- a/scrapy/downloadermiddlewares/redirect.py
+++ b/scrapy/downloadermiddlewares/redirect.py
@@ -88,6 +88,7 @@ class MetaRefreshMiddleware(BaseRedirectMiddleware):
def __init__(self, settings):
super(MetaRefreshMiddleware, self).__init__(settings)
+ self._ignore_tags = settings.getlist('METAREFRESH_IGNORE_TAGS')
self._maxdelay = settings.getint('REDIRECT_MAX_METAREFRESH_DELAY',
settings.getint('METAREFRESH_MAXDELAY'))
@@ -96,7 +97,8 @@ def process_response(self, request, response, spider):
not isinstance(response, HtmlResponse):
return response
- interval, url = get_meta_refresh(response)
+ interval, url = get_meta_refresh(response,
+ ignore_tags=self._ignore_tags)
if url and interval < self._maxdelay:
redirected = self._redirect_request_using_get(request, url)
return self._redirect(redirected, request, spider, 'meta refresh')
diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py
index 9986827d82e..1ce1516e56b 100644
--- a/scrapy/settings/default_settings.py
+++ b/scrapy/settings/default_settings.py
@@ -221,6 +221,7 @@
MEMUSAGE_WARNING_MB = 0
METAREFRESH_ENABLED = True
+METAREFRESH_IGNORE_TAGS = ['script', 'noscript']
METAREFRESH_MAXDELAY = 100
NEWSPIDER_MODULE = ''
diff --git a/scrapy/utils/response.py b/scrapy/utils/response.py
index bf276b5caa9..122af28b001 100644
--- a/scrapy/utils/response.py
+++ b/scrapy/utils/response.py
@@ -31,12 +31,12 @@ def get_base_url(response):
_metaref_cache = weakref.WeakKeyDictionary()
-def get_meta_refresh(response):
+def get_meta_refresh(response, ignore_tags=('script', 'noscript')):
"""Parse the http-equiv refrsh parameter from the given response"""
if response not in _metaref_cache:
text = response.text[0:4096]
_metaref_cache[response] = html.get_meta_refresh(text, response.url,
- response.encoding, ignore_tags=('script', 'noscript'))
+ response.encoding, ignore_tags=ignore_tags)
return _metaref_cache[response]
diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py
index 6c81c94ca31..0e841489d9a 100644
--- a/tests/test_downloadermiddleware_redirect.py
+++ b/tests/test_downloadermiddleware_redirect.py
@@ -279,5 +279,24 @@ def test_redirect_reasons(self):
self.assertEqual(req2.meta['redirect_reasons'], ['meta refresh'])
self.assertEqual(req3.meta['redirect_reasons'], ['meta refresh', 'meta refresh'])
+ def test_ignore_tags_default(self):
+ req = Request(url='http://example.org')
+ body = ('''''')
+ rsp = HtmlResponse(req.url, body=body.encode())
+ response = self.mw.process_response(req, rsp, self.spider)
+ assert isinstance(response, Response)
+
+ def test_ignore_tags_empty_list(self):
+ crawler = get_crawler(Spider, {'METAREFRESH_IGNORE_TAGS': []})
+ mw = MetaRefreshMiddleware.from_crawler(crawler)
+ req = Request(url='http://example.org')
+ body = ('''''')
+ rsp = HtmlResponse(req.url, body=body.encode())
+ req2 = mw.process_response(req, rsp, self.spider)
+ assert isinstance(req2, Request)
+ self.assertEqual(req2.url, 'http://example.org/newpage')
+
if __name__ == "__main__":
unittest.main()