Skip to content

Commit

Permalink
Bug 1596439 - Cache calls to github's branch_commits
Browse files Browse the repository at this point in the history
  • Loading branch information
JohanLorenzo committed Nov 19, 2019
1 parent 6c06ff3 commit 8535016
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 43 deletions.
1 change: 1 addition & 0 deletions requirements/base.in
@@ -1,4 +1,5 @@
aiohttp
aiomemoizettl
arrow
cryptography>=2.6.1
dictdiffer
Expand Down
19 changes: 11 additions & 8 deletions requirements/base.txt
@@ -1,4 +1,4 @@
# SHA1:877ece2910765a8459ad805bf6d3866660799867
# SHA1:e4fedf520da8ea79f254dd568a06441e6d3a665d
#
# This file is autogenerated by pip-compile-multi
# To update, run:
Expand All @@ -18,6 +18,9 @@ aiohttp==3.6.2 \
--hash=sha256:65f31b622af739a802ca6fd1a3076fd0ae523f8485c52924a89561ba10c49b48 \
--hash=sha256:ae55bac364c405caa23a4f2d6cfecc6a0daada500274ffca4a9230e7129eac59 \
--hash=sha256:b778ce0c909a2653741cb4b1ac7015b5c130ab9c897611df43ae6a58523cb965
aiomemoizettl==0.0.3 \
--hash=sha256:07a6becac60f6cd2604b9f2b73bcd9a50079a0b7b55e2a4e45b1eec5a3ea9659 \
--hash=sha256:0a80d2dc765e545263f515363b6700ec8cf86fa3968b529f56390b28e34f743d
arrow==0.15.4 \
--hash=sha256:01a16d8a93eddf86a29237f32ae36b29c27f047e79312eb4df5d55fd5a2b3183 \
--hash=sha256:e1a318a4c0b787833ae46302c02488b6eeef413c6a13324b3261ad320f21ec1e
Expand Down Expand Up @@ -199,18 +202,18 @@ taskcluster-urls==11.0.0 \
--hash=sha256:2aceab7cf5b1948bc197f2e5e50c371aa48181ccd490b8bada00f1e3baf0c5cc \
--hash=sha256:74bd2110b5daaebcec5e1d287bf137b61cb8cf6b2d8f5f2b74183e32bc4e7c87 \
# via taskcluster
taskcluster==22.1.0 \
--hash=sha256:0c3c657e5b9d4ab9028446df9cdeac8e16584574d35a94a589517cea1fd0d292 \
--hash=sha256:d4c500b4fd9b030ef0898d81c9c6551f2beeebbdf687705817725b6095f60189 \
--hash=sha256:da9857bdcb518a74ecfe0cbf34c7f4a047c13bfc8ee57b3a97e7d4edb6580667
taskcluster==22.1.1 \
--hash=sha256:07ef947d9b4fed32e4bc3e04b528f66a572944c6e2f907b8067c46b622067796 \
--hash=sha256:0b87096327374303fa8ab1a8985917841bc9e704adeade6214f21ded7065dc9e \
--hash=sha256:9221e19739f02bac4385fa3b1858028d92244b7f87d3b8e6dffe85e8fa118ff5
uritemplate==3.0.0 \
--hash=sha256:01c69f4fe8ed503b2951bef85d996a9d22434d2431584b5b107b2981ff416fbd \
--hash=sha256:1b9c467a940ce9fb9f50df819e8ddd14696f89b9a8cc87ac77952ba416e0a8fd \
--hash=sha256:c02643cebe23fc8adb5e6becffe201185bf06c40bda5c0b4028a93f1527d011d \
# via github3.py
urllib3==1.25.6 \
--hash=sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398 \
--hash=sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86 \
urllib3==1.25.7 \
--hash=sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293 \
--hash=sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745 \
# via requests
wheel==0.33.6 \
--hash=sha256:10c9da68765315ed98850f8e048347c3eb06dd81822dc2ab1d4fde9dc9702646 \
Expand Down
18 changes: 9 additions & 9 deletions requirements/test.txt
Expand Up @@ -95,9 +95,9 @@ pyflakes==2.1.1 \
--hash=sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0 \
--hash=sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2 \
# via flake8
pyparsing==2.4.4 \
--hash=sha256:4acadc9a2b96c19fe00932a38ca63e601180c39a189a696abce1eaab641447e1 \
--hash=sha256:61b5ed888beab19ddccab3478910e2076a6b5a0295dffc43021890e136edf764 \
pyparsing==2.4.5 \
--hash=sha256:20f995ecd72f2a1f4bf6b072b63b22e2eb457836601e76d6e5dfcd75436acc1f \
--hash=sha256:4ca62001be367f01bd3e92ecbb79070272a9d4964dce6a48a82ff0b8bc7e683a \
# via packaging
pytest-asyncio==0.10.0 \
--hash=sha256:9fac5100fd716cbecf6ef89233e8590a4ad61d729d1732e0a96b84182df1daaf \
Expand All @@ -111,9 +111,9 @@ pytest-mock==1.11.2 \
pytest-random-order==1.0.4 \
--hash=sha256:6b2159342a4c8c10855bc4fc6d65ee890fc614cb2b4ff688979b008a82a0ff52 \
--hash=sha256:72279a7f823969e18b10e438950f58330d17e0fcffb57cbd7929770cd687ecb2
pytest==5.2.2 \
--hash=sha256:27abc3fef618a01bebb1f0d6d303d2816a99aa87a5968ebc32fe971be91eb1e6 \
--hash=sha256:58cee9e09242937e136dbb3dab466116ba20d6b7828c7620f23947f37eb4dae4
pytest==5.2.3 \
--hash=sha256:15837d2880cb94821087bc07476892ea740696b20e90288fd6c19e44b435abdb \
--hash=sha256:b6cf7ad9064049ee486586b3a0ddd70dc5136c40e1147e7d286efd77ba66c5eb
snowballstemmer==2.0.0 \
--hash=sha256:209f257d7533fdb3cb73bdbd24f436239ca3b2fa67d56f6ff88e86be08cc5ef0 \
--hash=sha256:df3bac3df4c2c01363f3dd2cfa78cce2840a79b9f1c2d2de9ce8d31683992f52 \
Expand All @@ -122,9 +122,9 @@ toml==0.10.0 \
--hash=sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c \
--hash=sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e \
# via tox
tox==3.14.0 \
--hash=sha256:0bc216b6a2e6afe764476b4a07edf2c1dab99ed82bb146a1130b2e828f5bff5e \
--hash=sha256:c4f6b319c20ba4913dbfe71ebfd14ff95d1853c4231493608182f66e566ecfe1
tox==3.14.1 \
--hash=sha256:1d1368ac86e8332f79e2bcef9fefe2b077469f08449eadf0183759b34f3b2070 \
--hash=sha256:bcfa3e40abc1e9b70607b56adfd976fe7dc8286ad56aab44e3151daca7d2d0d0
virtualenv==16.7.7 \
--hash=sha256:11cb4608930d5fd3afb545ecf8db83fa50e1f96fc4fca80c94b07d2c83146589 \
--hash=sha256:d257bb3773e48cac60e475a19b608996c73f4d333b3ba2e4e57d5ac6134e0136
Expand Down
45 changes: 30 additions & 15 deletions scriptworker/github.py
Expand Up @@ -3,6 +3,7 @@
import logging
import re

from aiomemoizettl import memoize_ttl
from github3 import GitHub
from github3.exceptions import GitHubException

Expand All @@ -15,7 +16,6 @@
)

_GIT_FULL_HASH_PATTERN = re.compile(r'^[0-9a-f]{40}$')
_branch_commits_cache = {}

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -117,11 +117,6 @@ async def has_commit_landed_on_repository(self, context, revision):
bool: True if the commit is present in one of the branches of the main repository
"""
# Revision may be a tag name. `branch_commits` doesn't work on tags
if not _is_git_full_hash(revision):
revision = await self.get_tag_hash(tag_name=revision)

repo = self._github_repository.html_url
if any(
vcs_rule.get('require_secret')
for vcs_rule in context.config['trusted_vcs_rules']
Expand All @@ -132,21 +127,41 @@ async def has_commit_landed_on_repository(self, context, revision):
"repositories, assume True")
return True

url = '/'.join([repo.rstrip('/'), 'branch_commits', revision])
from scriptworker.task import get_decision_task_id
cache_key = '{}-{}'.format(get_decision_task_id(context.task), url)
if cache_key in _branch_commits_cache:
html_text = _branch_commits_cache[cache_key]
else:
html_data = await retry_request(context, url)
html_text = html_data.strip()
_branch_commits_cache[cache_key] = html_text
# Revision may be a tag name. `branch_commits` doesn't work on tags
if not _is_git_full_hash(revision):
revision = await self.get_tag_hash(tag_name=revision)

html_text = await _fetch_github_branch_commits_data(
context, self._github_repository.html_url, revision
)

# https://github.com/{repo_owner}/{repo_name}/branch_commits/{revision} just returns some \n
# when the commit hasn't landed on the origin repo. Otherwise, some HTML data is returned - it
# represents the branches on which the given revision is present.
return html_text != ''


# TODO Use memoize_ttl() as decorator once https://github.com/michalc/aiomemoizettl/issues/2 is done
async def _fetch_github_branch_commits_data_helper(context, repo_html_url, revision):
url = '/'.join((repo_html_url.rstrip('/'), 'branch_commits', revision))
log.info('Cache does not exist for URL "{}" (in this context), fetching it...'.format(url))
html_text = await retry_request(context, url)
return html_text.strip()

# XXX memoize_ttl() uses all function parameters to create a key that stores its cache.
# This means new contexts cannot use the memoized value, even though they're calling the same
# repo and revision. jlorenzo tried to take the context out of the memoize_ttl() call, but
# whenever the cache is invalidated, request() doesn't work anymore because the session carried
# by the context has been long closed.
# Therefore, the defined TTL has 2 purposes:
# a. it memoizes calls for the time of a single cot_verify() run
# b. it clears the cache automatically, so we don't have to manually invalidate it.
_BRANCH_COMMITS_CACHE_TTL_IN_SECONDS = 10*60 # 10 minutes
_fetch_github_branch_commits_data = memoize_ttl(
_fetch_github_branch_commits_data_helper, get_ttl=lambda _: _BRANCH_COMMITS_CACHE_TTL_IN_SECONDS
)


def is_github_url(url):
"""Tell if a given URL matches a Github one.
Expand Down
39 changes: 28 additions & 11 deletions scriptworker/test/test_github.py
@@ -1,7 +1,9 @@
from types import SimpleNamespace

import asyncio
import pytest

from copy import copy
from unittest.mock import MagicMock, patch

from . import mobile_rw_context, mpd_rw_context
Expand Down Expand Up @@ -203,19 +205,34 @@ async def retry_request(_, url):


@pytest.mark.asyncio
async def test_has_commit_landed_on_repository_cache(context, github_repository):
is_cached = False
async def retry_request(_, _url):
if is_cached:
assert False, "retry_request should not have been called."
return '\n'
async def test_has_commit_landed_on_repository_cache(context, mpd_context, github_repository):
global retry_request_call_count
retry_request_call_count = 0
async def _counter(*args, **kwargs):
global retry_request_call_count
retry_request_call_count += 1
return ''

with patch('scriptworker.github.retry_request', _counter):
await asyncio.gather(*[
github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678"),
github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678"),
github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678"),
github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678"),
])
# Even though all calls were fired at once, just a single call was made
assert retry_request_call_count == 1


with patch('scriptworker.github.retry_request', retry_request):
await github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678")
is_cached = True
await github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678")
is_cached = False
await github_repository.has_commit_landed_on_repository(context, "456789abcdef0123456780129abcdef012345643")
# New commit means new request
assert retry_request_call_count == 2

different_context = copy(context)
different_context.task = {'taskGroupId': 'someOtherTaskId'}
await github_repository.has_commit_landed_on_repository(different_context, "456789abcdef0123456780129abcdef012345643")
# New context means new request too
assert retry_request_call_count == 3


@pytest.mark.parametrize('url, expected', ((
Expand Down
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -18,6 +18,7 @@

reqs = [
"aiohttp>=3",
"aiomemoizettl",
"arrow",
"cryptography>=2.6.1",
"dictdiffer",
Expand Down

0 comments on commit 8535016

Please sign in to comment.