Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
# Copyright 2016 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import logging
import re
import google.appengine.ext.ndb as ndb
import models
XREF_RE = re.compile(r'(?:k8s-gubernator\.appspot\.com|gubernator\.k8s\.io)/build(/[^])\s]+/\d+)')
APPROVERS_RE = re.compile(r'<!-- META={"?approvers"?:\[([^]]*)\]} -->')
def classify_issue(repo, number):
"""
Classify an issue in a repo based on events in Datastore.
Args:
repo: string
number: int
Returns:
is_pr: bool
is_open: bool
involved: list of strings representing usernames involved
payload: a dict, see full description for classify below.
last_event_timestamp: the timestamp of the most recent event.
"""
ancestor = models.GithubResource.make_key(repo, number)
logging.info('finding webhooks for %s %s', repo, number)
event_keys = list(models.GithubWebhookRaw.query(ancestor=ancestor)
.order(models.GithubWebhookRaw.timestamp)
.fetch(keys_only=True))
logging.info('classifying %s %s (%d events)', repo, number, len(event_keys))
last_event_timestamp = [datetime.datetime(2000, 1, 1)]
def events_iterator():
for x in xrange(0, len(event_keys), 100):
events = ndb.get_multi(event_keys[x:x+100])
for event in events:
last_event_timestamp[0] = max(last_event_timestamp[0], event.timestamp)
yield [event.to_tuple() for event in events]
def get_status_for(sha):
statuses = {}
for status in models.GHStatus.query_for_sha(repo, sha):
last_event_timestamp[0] = max(last_event_timestamp[0], status.updated_at)
statuses[status.context] = [
status.state, status.target_url, status.description]
return statuses
classified = classify_from_iterator(events_iterator(), status_fetcher=get_status_for)
return list(classified) + last_event_timestamp
def get_merged(events, merged=None):
"""
Determine the most up-to-date view of the issue given its inclusion
in a series of events.
Note that different events have different levels of detail-- comments
don't include head SHA information, pull request events don't have label
information, etc.
Args:
events: a list of (event_type str, event_body dict, timestamp).
merged: the result of a previous invocation.
Returns:
body: a dict representing the issue's latest state.
"""
merged = merged or {}
for _event, body, _timestamp in events:
if 'issue' in body:
merged.update(body['issue'])
if 'pull_request' in body:
merged.update(body['pull_request'])
return merged
def get_labels(events, labels=None):
"""
Determine the labels applied to an issue.
Args:
events: a list of (event_type str, event_body dict, timestamp).
Returns:
labels: the currently applied labels as {label_name: label_color}
"""
labels = labels or {}
for event, body, _timestamp in events:
if 'issue' in body:
# issues come with labels, so we can update here
labels = {l['name']: l['color'] for l in body['issue']['labels']}
# pull_requests don't include their full labels :(
action = body.get('action')
if event == 'pull_request':
# Pull request label events don't come with a full label set.
# Track them explicitly here.
try:
if action in ('labeled', 'unlabeled') and 'label' not in body:
logging.warning('label event with no labels (multiple changes?)')
elif action == 'labeled':
label = body['label']
if label['name'] not in labels:
labels[label['name']] = label['color']
elif action == 'unlabeled':
labels.pop(body['label']['name'], None)
except:
logging.exception('??? %r', body)
raise
return labels
def get_skip_comments(events, skip_users=None):
"""
Determine comment ids that should be ignored, either because of
deletion or because the user should be skipped.
Args:
events: a list of (event_type str, event_body dict, timestamp).
Returns:
comment_ids: a set of comment ids that were deleted or made by
users that should be skipped.
"""
skip_users = skip_users or []
skip_comments = set()
for event, body, _timestamp in events:
action = body.get('action')
if event in ('issue_comment', 'pull_request_review_comment'):
comment_id = body['comment']['id']
if action == 'deleted' or body['sender']['login'] in skip_users:
skip_comments.add(comment_id)
return skip_comments
def classify(events, status_fetcher=None):
"""
Given an event-stream for an issue and status-getter, process
the events and determine what action should be taken, if any.
Args: One of:
events: a list of (event_type str, event_body dict, timestamp).
events_iterator: an iterable yielding successive events lists
status_fetcher: a function that returns statuses for the given SHA.
Returns:
is_pr: bool
is_open: bool
involved: list of strings representing usernames involved
payload: a dictionary of additional information, including:
{
'author': str author_name,
'title': str issue title,
'labels': {label_name: label_color},
'attn': {user_name: reason},
'mergeable': bool,
'comments': [{'user': str name, 'comment': comment, 'timestamp': str iso8601}],
'xrefs': list of builds referenced (by GCS path),
}
"""
merged = get_merged(events)
labels = get_labels(events)
comments = get_comments(events)
reviewers = get_reviewers(events)
distilled_events = distill_events(events)
return _classify_internal(
merged, labels, comments, reviewers, distilled_events, status_fetcher)
def classify_from_iterator(events_iterator, status_fetcher=None):
"""Like classify(), but process batches of events from an iterator."""
merged = None
labels = None
comments = None
reviewers = None
distilled_events = None
for events in events_iterator:
merged = get_merged(events, merged)
labels = get_labels(events, labels)
comments = get_comments(events, comments)
reviewers = get_reviewers(events, reviewers)
distilled_events = distill_events(events, distilled_events)
return _classify_internal(
merged, labels, comments, reviewers, distilled_events, status_fetcher)
def _classify_internal(merged, labels, comments, reviewers, distilled_events, status_fetcher):
approvers = get_approvers(comments)
is_pr = 'head' in merged or 'pull_request' in merged
is_open = merged['state'] != 'closed'
author = merged['user']['login']
assignees = sorted({assignee['login'] for assignee in merged['assignees']} | reviewers)
involved = sorted(u.lower() for u in set([author] + assignees + approvers))
payload = {
'author': author,
'assignees': assignees,
'title': merged['title'],
'labels': labels,
'xrefs': get_xrefs(comments, merged),
}
if is_pr:
if is_open:
payload['needs_rebase'] = 'needs-rebase' in labels or merged.get('mergeable') == 'false'
payload['additions'] = merged.get('additions', 0)
payload['deletions'] = merged.get('deletions', 0)
if 'head' in merged:
payload['head'] = merged['head']['sha']
if approvers:
payload['approvers'] = approvers
if status_fetcher and 'head' in payload:
payload['status'] = status_fetcher(payload['head'])
if merged.get('milestone'):
payload['milestone'] = merged['milestone']['title']
payload['attn'] = calculate_attention(distilled_events, payload)
return is_pr, is_open, involved, payload
def get_xrefs(comments, merged):
xrefs = set(XREF_RE.findall(merged.get('body') or ''))
for c in comments:
xrefs.update(XREF_RE.findall(c['comment']))
return sorted(xrefs)
def get_comments(events, comments=None):
"""
Pick comments and pull-request review comments out of a list of events.
Args:
events: a list of (event_type str, event_body dict, timestamp).
comments_prev: the previous output of this function.
Returns:
comments: a list of dict(author=..., comment=..., timestamp=...),
ordered with the earliest comment first.
"""
if not comments:
comments = {}
else:
comments = {c['id']: c for c in comments}
comments = {} # comment_id : comment
for event, body, _timestamp in events:
action = body.get('action')
if event in ('issue_comment', 'pull_request_review_comment'):
comment_id = body['comment']['id']
if action == 'deleted':
comments.pop(comment_id, None)
else:
c = body['comment']
comments[comment_id] = {
'author': c['user']['login'],
'comment': c['body'],
'timestamp': c['created_at'],
'id': c['id'],
}
return sorted(comments.values(), key=lambda c: c['timestamp'])
def get_reviewers(events, reviewers=None):
"""
Return the set of users that have a code review requested or completed.
"""
reviewers = reviewers or set()
for event, body, _timestamp in events:
action = body.get('action')
if event == 'pull_request':
if action == 'review_requested':
if 'requested_reviewer' not in body:
logging.warning('no reviewer present -- self-review?')
continue
reviewers.add(body['requested_reviewer']['login'])
elif action == 'review_request_removed':
reviewers -= {body['requested_reviewer']['login']}
elif event == 'pull_request_review':
if action == 'submitted':
reviewers.add(body['sender']['login'])
return reviewers
def get_approvers(comments):
"""
Return approvers requested in comments.
This MUST be kept in sync with mungegithub's getGubernatorMetadata().
"""
approvers = []
for comment in comments:
if comment['author'] == 'k8s-merge-robot':
m = APPROVERS_RE.search(comment['comment'])
if m:
approvers = m.group(1).replace('"', '').split(',')
return approvers
def distill_events(events, distilled_events=None):
"""
Given a sequence of events, return a series of user-action tuples
relevant to determining user state.
"""
bots = [
'google-oss-robot',
'istio-testing',
'k8s-bot',
'k8s-ci-robot',
'k8s-merge-robot',
'k8s-oncall',
'k8s-reviewable',
]
skip_comments = get_skip_comments(events, bots)
output = distilled_events or []
for event, body, timestamp in events:
action = body.get('action')
user = body.get('sender', {}).get('login')
if event in ('issue_comment', 'pull_request_review_comment'):
if body['comment']['id'] in skip_comments:
continue
if action == 'created':
output.append(('comment', user, timestamp))
if event == 'pull_request_review':
if action == 'submitted':
# this is morally equivalent to a comment
output.append(('comment', user, timestamp))
if event == 'pull_request':
if action in ('opened', 'reopened', 'synchronize'):
output.append(('push', user, timestamp))
if action == 'labeled' and 'label' in body:
output.append(('label ' + body['label']['name'].lower(), user, timestamp))
return output
def evaluate_fsm(events, start, transitions):
"""
Given a series of event tuples and a start state, execute the list of transitions
and return the resulting state, the time it entered that state, and the last time
the state would be entered (self-transitions are allowed).
transitions is a list of tuples
(state_before str, state_after str, condition str or callable)
The transition occurs if condition equals the action (as a str), or if
condition(action, user) is True.
"""
state = start
state_start = 0 # time that we entered this state
state_last = 0 # time of last transition into this state
for action, user, timestamp in events:
for state_before, state_after, condition in transitions:
if state_before is None or state_before == state:
if condition == action or (callable(condition) and condition(action, user)):
if state_after != state:
state_start = timestamp
state = state_after
state_last = timestamp
break
return state, state_start, state_last
def get_author_state(author, distilled_events):
"""
Determine the state of the author given a series of distilled events.
"""
return evaluate_fsm(distilled_events, start='waiting', transitions=[
# before, after, condition
(None, 'address comments', lambda a, u: a == 'comment' and u != author),
('address comments', 'waiting', 'push'),
('address comments', 'waiting', lambda a, u: a == 'comment' and u == author),
])
def get_assignee_state(assignee, author, distilled_events):
"""
Determine the state of an assignee given a series of distilled events.
"""
return evaluate_fsm(distilled_events, start='needs review', transitions=[
# before, after, condition
('needs review', 'waiting', lambda a, u: u == assignee and a in ('comment', 'label lgtm')),
(None, 'needs review', 'push'),
(None, 'needs review', lambda a, u: a == 'comment' and u == author),
])
def calculate_attention(distilled_events, payload):
"""
Given information about an issue, determine who should look at it.
It can include start and last update time for various states --
"address comments#123#456" means that something has been in 'address comments' since
123, and there was some other event that put it in 'address comments' at 456.
"""
author = payload['author']
assignees = payload['assignees']
attn = {}
def notify(to, reason):
attn[to] = reason
if any(state == 'failure' for state, _url, _desc
in payload.get('status', {}).values()):
notify(author, 'fix tests')
for approver in payload.get('approvers', []):
notify(approver, 'needs approval')
for assignee in assignees:
assignee_state, first, last = get_assignee_state(assignee, author, distilled_events)
if assignee_state != 'waiting':
notify(assignee, '%s#%s#%s' % (assignee_state, first, last))
author_state, first, last = get_author_state(author, distilled_events)
if author_state != 'waiting':
notify(author, '%s#%s#%s' % (author_state, first, last))
if payload.get('needs_rebase'):
notify(author, 'needs rebase')
if 'do-not-merge/release-note-label-needed' in payload['labels']:
notify(author, 'needs release-note label')
return attn