/
utils.py
124 lines (101 loc) · 3.94 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import re
import urllib
from django.conf import settings
def is_helpful_search_term(search_term):
# Search terms that will match too many bug summaries
# and so not result in useful suggestions.
search_term = search_term.strip()
blacklist = [
'automation.py',
'remoteautomation.py',
'Shutdown',
'undefined',
'Main app process exited normally',
'Traceback (most recent call last):',
'Return code: 0',
'Return code: 1',
'Return code: 2',
'Return code: 9',
'Return code: 10',
'Exiting 1',
'Exiting 9',
'CrashingThread(void *)',
'libSystem.B.dylib + 0xd7a',
'linux-gate.so + 0x424',
'TypeError: content is null',
'leakcheck'
]
return len(search_term) > 4 and not (search_term in blacklist)
LEAK_RE = re.compile('\d+ bytes leaked \((.+)\)$')
CRASH_RE = re.compile('.+ application crashed \[@ (.+)\]$')
def get_error_search_term(error_line):
"""
retrieves bug suggestions from bugscache using search_term
in a full_text search.
"""
if not error_line:
return None
# this is STRONGLY inspired to
# https://hg.mozilla.org/webtools/tbpl/file/tip/php/inc/AnnotatedSummaryGenerator.php#l73
tokens = error_line.split(" | ")
search_term = None
if len(tokens) >= 3:
# it's in the "FAILURE-TYPE | testNameOrFilePath | message" type format.
test_name_or_path = tokens[1]
message = tokens[2]
# Leak failure messages are of the form:
# leakcheck | .*leaked \d+ bytes (Object-1, Object-2, Object-3, ...)
match = LEAK_RE.match(message)
if match:
search_term = match.group(1)
else:
for splitter in ("/", "\\"):
# if this is a path, we are interested in the last part
test_name_or_path = test_name_or_path.split(splitter)[-1]
search_term = test_name_or_path
# If the failure line was not in the pipe symbol delimited format or the search term
# will likely return too many (or irrelevant) results (eg: too short or matches terms
# on the blacklist), then we fall back to searching for the entire failure line if
# it is suitable.
if not (search_term and is_helpful_search_term(search_term)):
search_term = error_line if is_helpful_search_term(error_line) else None
# Searching for extremely long search terms is undesirable, since:
# a) Bugzilla's max summary length is 256 characters, and once "Intermittent "
# and platform/suite information is prefixed, there are even fewer characters
# left for us to use for the failure string against which we need to match.
# b) For long search terms, the additional length does little to prevent against
# false positives, but means we're more susceptible to false negatives due to
# run-to-run variances in the error messages (eg paths, process IDs).
if search_term:
search_term = search_term[:100]
return search_term
def get_crash_signature(error_line):
"""
Detect if the error_line contains a crash signature
and return it if it's a helpful search term
"""
search_term = None
match = CRASH_RE.match(error_line)
if match and is_helpful_search_term(match.group(1)):
search_term = match.group(1)
return search_term
def get_bugs_for_search_term(search, base_uri):
"""
Fetch the base_uri endpoint filtering on search and status.
Status must be either 'open' or 'closed'
"""
from treeherder.etl.common import get_remote_content
params = {
'search': search
}
query_string = urllib.urlencode(params)
url = '{0}?{1}'.format(
base_uri,
query_string
)
return get_remote_content(url)
mozharness_pattern = re.compile(
'^\d+:\d+:\d+[ ]+(?:DEBUG|INFO|WARNING|ERROR|CRITICAL|FATAL) - [ ]?'
)
def get_mozharness_substring(line):
return mozharness_pattern.sub('', line).strip()