Skip to content

Commit

Permalink
bug 931803 - ftpscraper to ignore 404 errors, r=selenamarie
Browse files Browse the repository at this point in the history
  • Loading branch information
peterbe committed Oct 28, 2013
1 parent 7490cd5 commit 9e8bf28
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 4 deletions.
14 changes: 11 additions & 3 deletions socorro/cron/jobs/ftpscraper.py
Expand Up @@ -52,6 +52,8 @@ def patient_urlopen(url, max_attempts=4, sleep_time=20):
attempts += 1
page = urllib2.urlopen(url)
except urllib2.HTTPError, err:
if err.code == 404:
return
if err.code < 500:
raise
time.sleep(sleep_time)
Expand All @@ -68,6 +70,8 @@ def getLinks(url, startswith=None, endswith=None):
html = ''
results = []
content = patient_urlopen(url, sleep_time=30)
if not content:
return []
html = lxml.html.document_fromstring(content)

for element, attribute, link, pos in html.iterlinks():
Expand All @@ -82,9 +86,11 @@ def getLinks(url, startswith=None, endswith=None):

def parseInfoFile(url, nightly=False):
content = patient_urlopen(url)
contents = content.splitlines()
results = {}
bad_lines = []
if not content:
return results, bad_lines
contents = content.splitlines()
if nightly:
results = {'buildID': contents[0], 'rev': contents[1]}
if len(contents) > 2:
Expand All @@ -111,15 +117,17 @@ def parseB2GFile(url, nightly=False, logger=None):
TODO handle exception if file does not exist
"""
content = patient_urlopen(url)
if not content:
return
results = json.loads(content)

# bug 869564: Return None if update_channel is 'default'
if results['update_channel'] == 'default':
if results['update_channel'] == 'default' and logger:
logger.warning(
"Found default update_channel for buildid: %s. Skipping.",
results['buildid']
)
return None
return

# Default 'null' channels to nightly
results['build_type'] = results['update_channel'] or 'nightly'
Expand Down
50 changes: 49 additions & 1 deletion socorro/unittest/cron/jobs/test_ftpscraper.py
Expand Up @@ -152,7 +152,7 @@ def mocked_urlopener(url):
mock_calls.append(url)
if len(mock_calls) == 1:
raise urllib2.HTTPError(url, 500, "Server Error", {}, None)
raise urllib2.HTTPError(url, 404, "Page Not Found", {}, None)
raise urllib2.HTTPError(url, 400, "Bad Request", {}, None)

self.urllib2.side_effect = mocked_urlopener
# very impatient version
Expand All @@ -162,6 +162,19 @@ def mocked_urlopener(url):
'http://doesntmatt.er',
)

def test_patient_urlopen_pass_404_errors(self):
mock_calls = []

@stringioify
def mocked_urlopener(url):
mock_calls.append(url)
raise urllib2.HTTPError(url, 404, "Not Found", {}, None)

self.urllib2.side_effect = mocked_urlopener
response = ftpscraper.patient_urlopen('http://doesntmatt.er')
self.assertEqual(response, None)
assert len(mock_calls) == 1, mock_calls

@mock.patch('socorro.cron.jobs.ftpscraper.time')
def test_patient_urlopen_eventual_retriederror(self, mocked_time):

Expand Down Expand Up @@ -219,6 +232,17 @@ def mocked_urlopener(url):
[]
)

def test_getLinks_with_page_not_found(self):
@stringioify
def mocked_urlopener(url):
raise urllib2.HTTPError(url, 404, "Not Found", {}, None)

self.urllib2.side_effect = mocked_urlopener
self.assertEqual(
ftpscraper.getLinks('ONE'),
[]
)

def test_parseInfoFile(self):
@stringioify
def mocked_urlopener(url):
Expand Down Expand Up @@ -286,6 +310,19 @@ def mocked_urlopener(url):
({'BUILDID': '123'}, ['buildID'])
)

def test_parseInfoFile_with_page_not_found(self):

@stringioify
def mocked_urlopener(url):
raise urllib2.HTTPError(url, 404, "Not Found", {}, None)

self.urllib2.side_effect = mocked_urlopener

self.assertEqual(
ftpscraper.parseInfoFile('ONE'),
({}, [])
)

def test_getRelease(self):
@stringioify
def mocked_urlopener(url):
Expand Down Expand Up @@ -318,6 +355,17 @@ def mocked_urlopener(url):
[('linux', 'ONE', 'build-11', {'BUILDID': '123'}, [])]
)

def test_parseB2GFile_with_page_not_found(self):
@stringioify
def mocked_urlopener(url):
raise urllib2.HTTPError(url, 404, "Not Found", {}, None)
self.urllib2.side_effect = mocked_urlopener

self.assertEqual(
ftpscraper.parseB2GFile('FIVE', nightly=True),
None
)

def test_getNightly(self):
@stringioify
def mocked_urlopener(url):
Expand Down

0 comments on commit 9e8bf28

Please sign in to comment.