This repository has been archived by the owner. It is now read-only.
Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
786 lines (660 sloc)
20.9 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Identify issues that need triage.""" | |
from code_intelligence import graphql | |
from code_intelligence import util | |
import datetime | |
from dateutil import parser as dateutil_parser | |
import fire | |
import json | |
import logging | |
import os | |
import numpy as np | |
import pprint | |
import retrying | |
import json | |
PROJECT_CARD_ID = os.getenv('INPUT_NEEDS_TRIAGE_PROJECT_CARD_ID', "MDEzOlByb2plY3RDb2x1bW41OTM0MzEz") | |
# TODO(jlewi): If we make this an app maybe we should read this from a .github | |
# file | |
ALLOWED_PRIORITY = ["priority/p0", "priority/p1", "priority/p2", | |
"priority/p3"] | |
REQUIRES_PROJECT = ["priority/p0", "priority/p1"] | |
TRIAGE_PROJECT = "Needs Triage" | |
class TriageInfo: | |
"""Class describing whether an issue needs triage""" | |
def __init__(self): | |
self.issue = None | |
self.triage_project_card = None | |
# The times of various events | |
self.kind_time = None | |
self.priority_time = None | |
self.project_time = None | |
self.area_time = None | |
self.closed_at = None | |
self.requires_project = False | |
@classmethod | |
def from_issue(cls, issue): | |
"""Construct TriageInfo from the supplied issue""" | |
info = TriageInfo() | |
info.issue = issue | |
labels = graphql.unpack_and_split_nodes(issue, ["labels", "edges"]) | |
project_cards = graphql.unpack_and_split_nodes(issue, | |
["projectCards", "edges"]) | |
events = graphql.unpack_and_split_nodes(issue, | |
["timelineItems", "edges"]) | |
for l in labels: | |
name = l["name"] | |
if name in ALLOWED_PRIORITY: | |
info.requires_project = name in REQUIRES_PROJECT | |
for c in project_cards: | |
if c.get("project").get("name") == TRIAGE_PROJECT: | |
info.triage_project_card = c | |
break | |
# TODO(jlewi): Could we potentially miss some events since we aren't | |
# paginating through all events for an issue? This should no longer | |
# be an issue because _process_issue will call _get_issue and paginate | |
# through all results. | |
for e in events: | |
if not "createdAt" in e: | |
continue | |
t = dateutil_parser.parse(e.get("createdAt")) | |
if e.get("__typename") == "LabeledEvent": | |
name = e.get("label").get("name") | |
if name.startswith("kind"): | |
if info.kind_time: | |
continue | |
info.kind_time = t | |
if name.startswith("area") or name.startswith("platform"): | |
if info.area_time: | |
continue | |
info.area_time = t | |
if name in ALLOWED_PRIORITY: | |
if info.priority_time: | |
continue | |
info.priority_time = t | |
if e.get("__typename") == "AddedToProjectEvent": | |
if info.project_time: | |
continue | |
info.project_time = t | |
if issue.get("closedAt"): | |
info.closed_at = dateutil_parser.parse(issue.get("closedAt")) | |
return info | |
def __eq__(self, other): | |
for f in ["kind_time", "priority_time", "project_time", "area_time", | |
"closed_at", "in_triage_project", "requires_project"]: | |
if getattr(self, f) != getattr(other, f): | |
return False | |
if self.in_triage_project: | |
if self.triage_project_card["id"] != other.triage_project_card["id"]: | |
return False | |
return True | |
@property | |
def needs_triage(self): | |
"""Return true if the issue needs triage""" | |
# closed issues don't need triage | |
if self.issue["state"].lower() == "closed": | |
return False | |
# If any events are missing then we need triage | |
for f in ["kind_time", "priority_time", "area_time"]: | |
if not getattr(self, f): | |
return True | |
if self.requires_project and not self.project_time: | |
return True | |
return False | |
def __repr__(self): | |
pieces = ["needs_triage={0}".format(self.needs_triage)] | |
for f in ["kind_time", "priority_time", "project_time", "area_time", | |
"closed_at", "in_triage_project"]: | |
v = getattr(self, f) | |
if not v: | |
continue | |
if isinstance(v, datetime.datetime): | |
v = v.isoformat() | |
pieces.append("{0}={1}".format(f, v)) | |
return ";".join(pieces) | |
def message(self): | |
"""Return a human readable message.""" | |
if not self.needs_triage: | |
return "Issue doesn't need attention." | |
lines = [] | |
if self.needs_triage: | |
lines.append("Issue needs triage:") | |
if not self.kind_time: | |
lines.append("\t Issue needs a kind label") | |
if not self.priority_time: | |
lines.append("\t Issue needs one of the priorities {0}".format(ALLOWED_PRIORITY)) | |
if not self.area_time: | |
lines.append("\t Issue needs an area label") | |
if self.requires_project and not self.project_time: | |
lines.append("\t Issues with priority in {0} need to be assigned to a project".format(REQUIRES_PROJECT)) | |
return "\n".join(lines) | |
@property | |
def triaged_at(self): | |
"""Returns a datetime representing the time it was triage or None.""" | |
if self.needs_triage: | |
return None | |
# Determine whether issue was triaged by being closed or not | |
events = [self.kind_time, | |
self.priority_time, | |
self.area_time] | |
if self.requires_project: | |
events.append(self.project_time) | |
has_all_events = True | |
for e in events: | |
if not e: | |
has_all_events = False | |
if has_all_events: | |
events = sorted(events) | |
return events[-1] | |
else: | |
return self.closed_at | |
@property | |
def in_triage_project(self): | |
return self.triage_project_card is not None | |
class IssueTriage(object): | |
def __init__(self): | |
self._client = None | |
@property | |
def client(self): | |
if not self._client: | |
self._client = graphql.GraphQLClient() | |
return self._client | |
def _iter_issues(self, org, repo, issue_filter=None, output=None): | |
"""Iterate over issues in batches for a repository | |
Args: | |
org: The org that owns the repository | |
repo: The directory for the repository | |
output: The directory to write the results; if not specified results | |
are not downloaded | |
issue_filter: Used to filter issues to consider based on when they were | |
last updated | |
Writes the issues along with the first comments to a file in output | |
directory. | |
""" | |
client = graphql.GraphQLClient() | |
num_issues_per_page = 100 | |
if not issue_filter: | |
today = datetime.datetime.now() | |
today = datetime.datetime(year=today.year, month=today.month, day=today.day) | |
start_time = today - datetime.timedelta(days=60) | |
# Labels and projects are available via timeline events. | |
# However, in timeline events project info (e.g. actual project name) | |
# is only in developer preview. | |
# The advantage of using labels and projectCards (as opposed to timeline | |
# events) is that its much easier to bound the number of items we need | |
# to fetch in order to return all labels and projects | |
# for timeline items its much more likely the labels and projects we care | |
# about will require pagination. | |
# | |
# TODO(jlewi): We should add a method to fetch all issue timeline items | |
# via pagination in the case the number of items exceeds the page size. | |
# | |
# TODO(jlewi): We need to consider closed issues if we want to compute | |
# stats. | |
# | |
# TODO(jlewi): We should support fetching only OPEN issues; if we are | |
# deciding which issues need triage or have been triaged we really only | |
# need to look at open isues. Closed Issues will automatically move to | |
# the appropriate card in the Kanban board. | |
query = """query getIssues($org: String!, $repo: String!, $pageSize: Int, $issueCursor: String, $filter: IssueFilters) { | |
repository(owner: $org, name: $repo) { | |
issues(first: $pageSize, filterBy: $filter, after: $issueCursor) { | |
totalCount | |
pageInfo { | |
endCursor | |
hasNextPage | |
} | |
edges { | |
node { | |
author { | |
__typename | |
... on User { | |
login | |
} | |
... on Bot { | |
login | |
} | |
} | |
id | |
title | |
body | |
url | |
state | |
createdAt | |
closedAt | |
labels(first: 30) { | |
totalCount | |
edges { | |
node { | |
name | |
} | |
} | |
} | |
projectCards(first: 30) { | |
totalCount | |
pageInfo { | |
endCursor | |
hasNextPage | |
} | |
edges { | |
node { | |
id | |
project { | |
name | |
number | |
} | |
} | |
} | |
} | |
timelineItems(first: 30) { | |
totalCount | |
pageInfo { | |
endCursor | |
hasNextPage | |
} | |
edges { | |
node { | |
__typename | |
... on AddedToProjectEvent { | |
createdAt | |
} | |
... on LabeledEvent { | |
createdAt | |
label { | |
name | |
} | |
} | |
... on ClosedEvent { | |
createdAt | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
""" | |
shard = 0 | |
num_pages = None | |
if output and not os.path.exists(output): | |
os.makedirs(output) | |
total_issues = None | |
has_next_issues_page = True | |
# TODO(jlewi): We should persist the cursors to disk so we can resume | |
# after errors | |
issues_cursor = None | |
shard_writer = None | |
if not issue_filter: | |
start_time = datetime.datetime.now() - datetime.timedelta(weeks=24) | |
issue_filter = { | |
"since": start_time.isoformat(), | |
} | |
while has_next_issues_page: | |
variables = { | |
"org": org, | |
"repo": repo, | |
"pageSize": num_issues_per_page, | |
"issueCursor": issues_cursor, | |
"filter": issue_filter, | |
} | |
results = client.run_query(query, variables=variables) | |
if results.get("errors"): | |
message = json.dumps(results.get("errors")) | |
logging.error(f"There was a problem issuing the query; errors:\n{message}\n") | |
return | |
if not total_issues: | |
total_issues = results["data"]["repository"]["issues"]["totalCount"] | |
num_pages = int(np.ceil(total_issues/float(num_issues_per_page))) | |
logging.info("%s/%s has a total of %s issues", org, repo, total_issues) | |
if output and not shard_writer: | |
logging.info("initializing the shard writer") | |
shard_writer = graphql.ShardWriter(num_pages, output, | |
prefix="issues-{0}-{1}".format(org, repo)) | |
issues = graphql.unpack_and_split_nodes( | |
results, ["data", "repository", "issues", "edges"]) | |
yield issues | |
if shard_writer: | |
shard_writer.write_shard(issues) | |
page_info = results["data"]["repository"]["issues"]["pageInfo"] | |
issues_cursor = page_info["endCursor"] | |
has_next_issues_page = page_info["hasNextPage"] | |
def download_issues(self, repo, output, issue_filter=None): | |
"""Download the issues to the specified directory | |
Args: | |
repo: Repository in the form {org}/{repo} | |
""" | |
org, repo_name = repo.split("/") | |
for shard_index, shard in enumerate(self._iter_issues(org, repo_name, | |
output=output, | |
issue_filter=None)): | |
logging.info("Wrote shard %s", shard_index) | |
def _build_dataframes(self, issues_dir): | |
"""Build dataframes containing triage info. | |
Args: | |
issues_dir: The directory containing issues | |
Returns: | |
data: | |
""" | |
def update_kanban_board(self): | |
"""Checks if any issues in the needs triage board can be removed. | |
""" | |
query = """query getIssues($issueCursor: String) { | |
search(type: ISSUE, query: "is:open is:issue org:kubeflow project:kubeflow/26", first: 100, after: $issueCursor) { | |
issueCount | |
pageInfo { | |
endCursor | |
hasNextPage | |
} | |
edges { | |
node { | |
__typename | |
... on Issue { | |
author { | |
__typename | |
... on User { | |
login | |
} | |
... on Bot { | |
login | |
} | |
} | |
id | |
title | |
body | |
url | |
state | |
createdAt | |
closedAt | |
labels(first: 30) { | |
totalCount | |
edges { | |
node { | |
name | |
} | |
} | |
} | |
projectCards(first: 30) { | |
totalCount | |
pageInfo { | |
endCursor | |
hasNextPage | |
} | |
edges { | |
node { | |
id | |
project { | |
name | |
number | |
} | |
} | |
} | |
} | |
timelineItems(first: 30) { | |
totalCount | |
pageInfo { | |
endCursor | |
hasNextPage | |
} | |
edges { | |
node { | |
__typename | |
... on AddedToProjectEvent { | |
createdAt | |
} | |
... on LabeledEvent { | |
createdAt | |
label { | |
name | |
} | |
} | |
... on ClosedEvent { | |
createdAt | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
""" | |
issues_cursor = None | |
has_next_issues_page = True | |
while has_next_issues_page: | |
variables = { | |
"issueCursor": issues_cursor, | |
} | |
results = self.client.run_query(query, variables=variables) | |
if results.get("errors"): | |
message = json.dumps(results.get("errors")) | |
logging.error(f"There was a problem issuing the query; errors:\n{message}\n") | |
return | |
issues = graphql.unpack_and_split_nodes( | |
results, ["data", "search", "edges"]) | |
for i in issues: | |
self._process_issue(i) | |
page_info = results["data"]["search"]["pageInfo"] | |
issues_cursor = page_info["endCursor"] | |
has_next_issues_page = page_info["hasNextPage"] | |
def triage(self, repo, output=None, **kwargs): | |
"""Triage issues in the specified repository. | |
Args: | |
repo: Repository in the form {org}/{repo} | |
output: (Optional) directory to write issues | |
""" | |
org, repo_name = repo.split("/") | |
for shard_index, shard in enumerate(self._iter_issues(org, repo_name, | |
output=output, | |
**kwargs)): | |
logging.info("Processing shard %s", shard_index) | |
for i in shard: | |
self._process_issue(i) | |
def _get_issue(self, url): | |
"""Gets the complete issue. | |
This function does pagination to fetch all timeline items. | |
""" | |
# TODO(jlewi): We should impelement pagination for labels as well | |
query = """query getIssue($url: URI!, $timelineCursor: String) { | |
resource(url: $url) { | |
__typename | |
... on Issue { | |
author { | |
__typename | |
... on User { | |
login | |
} | |
... on Bot { | |
login | |
} | |
} | |
id | |
title | |
body | |
url | |
state | |
labels(first: 30) { | |
totalCount | |
edges { | |
node { | |
name | |
} | |
} | |
} | |
projectCards(first:30, ){ | |
totalCount | |
edges { | |
node { | |
id | |
project { | |
name | |
number | |
} | |
} | |
} | |
} | |
timelineItems(first: 30, after: $timelineCursor) { | |
totalCount | |
pageInfo { | |
endCursor | |
hasNextPage | |
} | |
edges { | |
node { | |
__typename | |
... on AddedToProjectEvent { | |
createdAt | |
} | |
... on LabeledEvent { | |
createdAt | |
label { | |
name | |
} | |
} | |
... on ClosedEvent { | |
createdAt | |
} | |
} | |
} | |
} | |
} | |
} | |
}""" | |
variables = { | |
"url": url, | |
"timelineCursor": None, | |
} | |
results = self.client.run_query(query, variables=variables) | |
if results.get("errors"): | |
message = json.dumps(results.get("errors")) | |
logging.error(f"There was a problem issuing the query; errors:\n{message}\n") | |
return | |
issue = results["data"]["resource"] | |
has_next_page = issue["timelineItems"]["pageInfo"]["hasNextPage"] | |
while has_next_page: | |
variables["timelineCursor"] = issue["timelineItems"]["pageInfo"]["endCursor"] | |
results = self.client.run_query(query, variables=variables) | |
edges = (issue["timelineItems"]["edges"] + | |
results["data"]["resource"]["timelineItems"]["edges"]) | |
issue["timelineItems"]["edges"] = edges | |
issue["timelineItems"]["pageInfo"] = ( | |
results["data"]["resource"]["timelineItems"]["pageInfo"]) | |
has_next_page = (results["data"]["resource"]["timelineItems"]["pageInfo"] | |
["hasNextPage"]) | |
return issue | |
def triage_issue(self, url, project=None, add_comment=False): | |
"""Triage a single issue. | |
Args: | |
url: The url of the issue e.g. | |
https://github.com/kubeflow/community/issues/280 | |
project: (Optional) If supplied the URL of the project to add issues | |
needing triage to. | |
add_comment: Set to true to comment on the issue with why | |
the issue needs triage | |
""" | |
issue = self._get_issue(url) | |
return self._process_issue(issue) | |
def _process_issue(self, issue, add_comment=False): | |
"""Process a single issue. | |
Args: | |
issue: Issue to process. | |
""" | |
if issue["timelineItems"]["pageInfo"]["hasNextPage"]: | |
# Since not all timelineItems were fetched; we need to refetch | |
# the issue and this time paginate to get all items. | |
logging.info("Issue: %s; fetching all timeline items", issue["url"]) | |
issue = self._get_issue(issue["url"]) | |
info = TriageInfo.from_issue(issue) | |
url = info.issue["url"] | |
logging.info(f"Issue {url}:\nstate:{info.message()}\n") | |
if not info.needs_triage: | |
self._remove_triage_project(info) | |
return | |
# TODO(jlewi): We should check if there is already a triage message | |
if add_comment: | |
mutation = """ | |
mutation AddIssueComment($input: AddCommentInput!){ | |
addComment(input:$input){ | |
subject { | |
id | |
} | |
} | |
} | |
""" | |
mutation_variables = { | |
"input": { | |
"subjectId": issue["id"], | |
"body": info.message(), | |
} | |
} | |
results = client.run_query(mutation, variables=mutation_variables) | |
if results.get("errors"): | |
message = json.dumps(results.get("errors")) | |
logging.error(f"There was a problem commenting on the issue; errors:\n{message}\n") | |
return | |
# add project | |
self._add_triage_project(info) | |
return info | |
def _remove_triage_project(self, issue_info): | |
"""Remove the issue from the triage project. | |
Args: | |
issue_info: TriageInfo | |
""" | |
if not issue_info.in_triage_project: | |
return | |
add_card = """ | |
mutation DeleteFromTriageProject($input: DeleteProjectCardInput!){ | |
deleteProjectCard(input:$input) { | |
clientMutationId | |
} | |
} | |
""" | |
variables = { | |
"input": { | |
"cardId": issue_info.triage_project_card["id"], | |
} | |
} | |
logging.info("Issue %s remove from triage project", issue_info.issue["url"]) | |
results = self.client.run_query(add_card, variables=variables) | |
if results.get("errors"): | |
message = json.dumps(results.get("errors")) | |
logging.error(f"There was a problem removing the issue from the triage project; errors:\n{message}\n") | |
return | |
def _add_triage_project(self, issue_info): | |
"""Add the issue to the triage project if needed | |
Args: | |
issue_info: IssueInfo | |
""" | |
if issue_info.in_triage_project: | |
logging.info("Issue %s already in triage project", | |
issue_info.issue["url"]) | |
return | |
add_card = """ | |
mutation AddProjectIssueCard($input: AddProjectCardInput!){ | |
addProjectCard(input:$input) { | |
clientMutationId | |
} | |
} | |
""" | |
add_variables = { | |
"input": { | |
"contentId": issue_info.issue["id"], | |
"projectColumnId": PROJECT_CARD_ID, | |
} | |
} | |
results = self.client.run_query(add_card, variables=add_variables) | |
if results.get("errors"): | |
# Check if the error was because the issue was already added | |
ALREADY_ADDED = "Project already has the associated issue" | |
if not (len(results["errors"]) == 1 and | |
results["errors"][0]["message"] == ALREADY_ADDED): | |
message = json.dumps(results.get("errors")) | |
logging.error(f"There was a problem adding the issue to the project; errors:\n{message}\n") | |
return | |
if __name__ == "__main__": | |
logging.basicConfig(level=logging.INFO, | |
format=('%(levelname)s|%(asctime)s' | |
'|%(message)s|%(pathname)s|%(lineno)d|'), | |
datefmt='%Y-%m-%dT%H:%M:%S', | |
) | |
fire.Fire(IssueTriage) |