Skip to content

Commit

Permalink
Don't generate http requests with two Host: headers
Browse files Browse the repository at this point in the history
This would cause http 400 requests at the server, unsurprisingly.
  • Loading branch information
mhagander committed Feb 7, 2012
1 parent 03794e3 commit f9486b5
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions tools/search/crawler/lib/basecrawler.py
Expand Up @@ -164,14 +164,14 @@ def fetch_page(self, url):
try: try:
# Unfortunatley, persistent connections seem quite unreliable, # Unfortunatley, persistent connections seem quite unreliable,
# so create a new one for each page. # so create a new one for each page.
h = httplib.HTTPConnection(host=self.serverip and self.serverip or self.hostname,
port=80,
strict=True,
timeout=10)
h.putrequest("GET", url)
h.putheader("User-agent","pgsearch/0.2")
if self.serverip: if self.serverip:
h = httplib.HTTPConnection(host=self.serverip, port=80, strict=True, timeout=10)
h.putrequest("GET", url, skip_host=1)
h.putheader("Host", self.hostname) h.putheader("Host", self.hostname)
else:
h = httplib.HTTPConnection(host=self.hostname, port=80, strict=True, timeout=10)
h.putrequest("GET", url)
h.putheader("User-agent","pgsearch/0.2")
h.putheader("Connection","close") h.putheader("Connection","close")
if self.scantimes.has_key(url): if self.scantimes.has_key(url):
h.putheader("If-Modified-Since", formatdate(time.mktime(self.scantimes[url].timetuple()))) h.putheader("If-Modified-Since", formatdate(time.mktime(self.scantimes[url].timetuple())))
Expand Down

0 comments on commit f9486b5

Please sign in to comment.