Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/marco-c/bugbug into cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ayush1999 committed Dec 13, 2018
2 parents 5555686 + 289ff7b commit 1470840
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 2 deletions.
2 changes: 1 addition & 1 deletion bugbug/bug_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def fit(self, x, y=None):
return self

def cleanup(self, text):
return re.sub(r"http\S+", "URL", text)
return re.sub(r'http\S+', 'URL', text)

def transform(self, bugs):
results = []
Expand Down
19 changes: 19 additions & 0 deletions bugbug/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,25 @@ def get_bugbug_labels(kind='bug', augmentation=False):
return {bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids}


def get_uplift_labels():
classes = {}

for bug_data in bugzilla.get_bugs():
bug_id = int(bug_data['id'])

for attachment in bug_data['attachments']:
for flag in attachment['flags']:
if not flag['name'].startswith('approval-mozilla-') or flag['status'] not in ['+', '-']:
continue

if flag['status'] == '+':
classes[bug_id] = True
elif flag['status'] == '-':
classes[bug_id] = False

return classes


def get_all_bug_ids():
bug_ids = set()

Expand Down
62 changes: 62 additions & 0 deletions bugbug/models/uplift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

import xgboost
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import FeatureUnion
from sklearn.pipeline import Pipeline

from bugbug import bug_features
from bugbug import labels
from bugbug.model import Model
from bugbug.utils import DictSelector


class UpliftModel(Model):
def __init__(self, lemmatization=False):
Model.__init__(self, lemmatization)

self.classes = labels.get_uplift_labels()

feature_extractors = [
bug_features.has_str(),
bug_features.has_regression_range(),
bug_features.severity(),
bug_features.keywords(),
bug_features.is_coverity_issue(),
bug_features.has_crash_signature(),
bug_features.has_url(),
bug_features.has_w3c_url(),
bug_features.has_github_url(),
bug_features.whiteboard(),
bug_features.patches(),
bug_features.landings(),
bug_features.title(),
bug_features.comments(),
]

self.extraction_pipeline = Pipeline([
('bug_extractor', bug_features.BugExtractor(feature_extractors)),
('union', FeatureUnion(
transformer_list=[
('data', Pipeline([
('selector', DictSelector(key='data')),
('vect', DictVectorizer()),
])),

('title', Pipeline([
('selector', DictSelector(key='title')),
('tfidf', self.text_vectorizer(stop_words='english')),
])),

('comments', Pipeline([
('selector', DictSelector(key='comments')),
('tfidf', self.text_vectorizer(stop_words='english')),
])),
],
)),
])

self.clf = xgboost.XGBClassifier(n_jobs=16)
5 changes: 4 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
parser.add_argument('--lemmatization', help='Perform lemmatization (using spaCy)', action='store_true')
parser.add_argument('--download', help='Download data required for training', action='store_true')
parser.add_argument('--train', help='Perform training', action='store_true')
parser.add_argument('--goal', help='Goal of the classifier', choices=['bug', 'regression', 'tracking', 'qaneeded'], default='bug')
parser.add_argument('--goal', help='Goal of the classifier', choices=['bug', 'regression', 'tracking', 'qaneeded', 'uplift'], default='bug')
args = parser.parse_args()

if args.download:
Expand All @@ -34,6 +34,9 @@
elif args.goal == 'qaneeded':
from bugbug.models.qaneeded import QANeededModel
model_class = QANeededModel
elif args.goal == 'uplift':
from bugbug.models.uplift import UpliftModel
model_class = UpliftModel

if args.train:
model = model_class(args.lemmatization)
Expand Down

0 comments on commit 1470840

Please sign in to comment.