Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bugbug/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"uplift": "bugbug.models.uplift.UpliftModel",
"worksforme": "bugbug.models.worksforme.WorksForMeModel",
"fenixcomponent": "bugbug.models.fenixcomponent.FenixComponentModel",
"componentspecific": "bugbug.models.component_specific.ComponentSpecificModel",
}


Expand Down
136 changes: 136 additions & 0 deletions bugbug/models/component_specific.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

import logging
from datetime import datetime, timezone

import dateutil.parser
import xgboost
from dateutil.relativedelta import relativedelta
from sklearn.compose import ColumnTransformer
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import Pipeline

from bugbug import bug_features, bugzilla, feature_cleanup, utils
from bugbug.model import BugModel

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class ComponentSpecificModel(BugModel):
def __init__(self, lemmatization=False, product="Firefox", component="General"):
BugModel.__init__(self, lemmatization)

self.product = product
self.component = component

feature_extractors = [
bug_features.HasSTR(),
bug_features.Severity(),
bug_features.Keywords(),
bug_features.HasCrashSignature(),
bug_features.HasURL(),
bug_features.HasW3CURL(),
bug_features.HasGithubURL(),
bug_features.Whiteboard(),
bug_features.Patches(),
bug_features.Landings(),
]

cleanup_functions = [
feature_cleanup.fileref(),
feature_cleanup.url(),
feature_cleanup.synonyms(),
]

self.extraction_pipeline = Pipeline(
[
(
"bug_extractor",
bug_features.BugExtractor(
feature_extractors, cleanup_functions, rollback=True
),
),
]
)

self.clf = Pipeline(
[
(
"union",
ColumnTransformer(
[
("data", DictVectorizer(), "data"),
("title", self.text_vectorizer(min_df=0.0001), "title"),
(
"comments",
self.text_vectorizer(min_df=0.0001),
"comments",
),
]
),
),
(
"estimator",
xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
),
]
)

def get_labels(self):
classes = {}

for bug_data in bugzilla.get_bugs():
if dateutil.parser.parse(bug_data["creation_time"]) < datetime.now(
timezone.utc
) - relativedelta(years=3):
continue

# Only bugs that were moved out of General and into a specific component
# Or the opposite

for history in bug_data["history"]:
to_product_firefox = False
to_component_general = False

from_product_firefox = False
from_component_general = False

for change in history["changes"]:
if change["field_name"] == "product":
if change["added"] == self.product:
to_product_firefox = True
elif change["removed"] == self.product:
from_product_firefox = True

if change["field_name"] == "component":
if change["added"] == self.component:
to_component_general = True
elif change["removed"] == self.component:
from_component_general = True

if from_product_firefox and from_component_general:
classes[bug_data["id"]] = 1
elif to_product_firefox and to_component_general:
classes[bug_data["id"]] = 0

logger.info(
"%d bugs were moved out of %s::%s",
sum(label == 1 for label in classes.values()),
self.product,
self.component,
)
logger.info(
"%d bugs were moved in %s::%s",
sum(label == 0 for label in classes.values()),
self.product,
self.component,
)

return classes, [0, 1]

def get_feature_names(self):
return self.clf.named_steps["union"].get_feature_names_out()
1 change: 1 addition & 0 deletions http_service/bugbug_http/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
MODELS_NAMES = [
"defectenhancementtask",
"component",
"componentspecific",
"invalidcompatibilityreport",
"needsdiagnosis",
"regression",
Expand Down
37 changes: 37 additions & 0 deletions infra/data-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,41 @@ tasks:
owner: bugbug-team@mozilla.com
source: ${repository}/raw/master/data-pipeline.yml

- ID: train-componentspecific
created: { $fromNow: "" }
deadline: { $fromNow: "3 days" }
expires: { $fromNow: "1 year" }
provisionerId: proj-bugbug
workerType: compute-small
dependencies:
- bugs-retrieval
payload:
maxRunTime: 25200
image: mozilla/bugbug-base:${version}
command:
- bugbug-train
- componentspecific

artifacts:
public/componentspecificmodel.tar.zst:
path: /componentspecificmodel.tar.zst
type: file
public/metrics.json:
path: /metrics.json
type: file

routes:
- notify.email.bugbug-team@mozilla.com.on-failed
- index.project.bugbug.train_componentspecific.${version}
- index.project.bugbug.train_componentspecific.per_version.${version}.${year}.${month}.${day}.${hour}.${minute}.${second}
- index.project.bugbug.train_componentspecific.per_date.${year}.${month}.${day}.${hour}.${minute}.${second}.${version}
- index.project.bugbug.train_componentspecific.latest
metadata:
name: bugbug train componentspecific model
description: bugbug train componentspecific model
owner: bugbug-team@mozilla.com
source: ${repository}/raw/master/data-pipeline.yml

- ID: train-defectenhancementtask
created: { $fromNow: "" }
deadline: { $fromNow: "3 days" }
Expand Down Expand Up @@ -1374,6 +1409,7 @@ tasks:
workerType: batch
dependencies:
- train-component
- train-componentspecific
- train-defectenhancementtask
- train-regression
- train-regressor
Expand Down Expand Up @@ -1416,6 +1452,7 @@ tasks:
dependencies:
- train-defectenhancementtask
- train-component
- train-componentspecific
- train-regression
- train-stepstoreproduce
- train-spambug
Expand Down