Skip to content

Commit

Permalink
added support for commit log parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
jayfk committed Apr 25, 2017
1 parent 4ee7e30 commit 2b1f13d
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 11 deletions.
2 changes: 1 addition & 1 deletion changelogs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import re
from requests import Session
from .changelogs import get
from .changelogs import get, get_commit_log

"""
if os.environ.get("DEBUG", "") in ("TRUE", "True", "true"):
Expand Down
42 changes: 41 additions & 1 deletion changelogs/changelogs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import os
import subprocess
from tempfile import mkdtemp
import imp
import requests
import os
Expand Down Expand Up @@ -189,6 +190,29 @@ def get(name, vendor="pypi", functions={}, _depth=0):
return {}


def get_commit_log(name, vendor='pypi', functions={}, _depth=0):
"""
Tries to parse a changelog from the raw commit log.
:param name: str, package name
:param vendor: str, vendor
:param functions: dict, custom functions
:return: tuple, (dict -> commit log, str -> raw git log)
"""
if "find_changelogs" not in functions:
from .finder import find_git_repo
functions["find_changelogs"] = find_git_repo
if "get_content" not in functions:
functions["get_content"] = clone_repo
if "parse" not in functions:
from .parser import parse_commit_log
functions["parse"] = parse_commit_log
return get(
name=name,
vendor=vendor,
functions=functions
)


def get_content(session, urls):
"""
Loads the content from URLs, ignoring connection errors.
Expand Down Expand Up @@ -220,3 +244,19 @@ def get_content(session, urls):
except requests.ConnectionError:
pass
return content


def clone_repo(session, urls):
"""
Clones the given repos in temp directories
:param session: requests Session instance
:param urls: list, str URLs
:return: tuple, (str -> directory, str -> URL)
"""
repos = []
for url in urls:
dir = mkdtemp()
call = ["git", "clone", url, dir]
subprocess.call(call)
repos.append((dir, url))
return repos
11 changes: 10 additions & 1 deletion changelogs/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,25 @@ def main():
parser.add_argument("vendor", help="vendor (pypi, npm, gem)", default="pypi", nargs='?')
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action="store_true")
parser.add_argument("-c", "--commits", help="",
action="store_true")

args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)

data = changelogs.get(args.package, vendor=args.vendor)
if args.commits:
data, raw_log = changelogs.get_commit_log(args.package, vendor=args.vendor)
else:
data = changelogs.get(args.package, vendor=args.vendor)

for release in sorted(data.keys(), key=lambda v: parse(v), reverse=True):
print(release)
print(data[release])

if not data and args.commits:
print(raw_log)


if __name__ == "__main__":
main()
58 changes: 53 additions & 5 deletions changelogs/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,19 +176,32 @@ def find_release_page(session, repo_url):
logger.debug("Unable to construct releases url for {}".format(repo_url))


def find_changelogs(session, name, candidates):
def filter_repo_urls(candidates):
"""
Tries to find changelogs on the given URL candidates
:param session: requests Session instance
:param name: str, project name
Filters down a list of URL candidates
:param candidates: list, URL candidates
:return: tuple, (set(changelog URLs), set(repo URLs))
:return: set, Repo URLs
"""
# first, we are going to filter down the URL candidates to be all valid urls
candidates = set(url for url in [validate_url(_url) for _url in candidates] if url)
logger.info("Got repo candidates {}".format(candidates))
repos = set(url for url in [validate_repo_url(_url) for _url in candidates] if url)
logger.info("Filtered initial candidates down to {}".format(repos))

return repos


def find_changelogs(session, name, candidates):
"""
Tries to find changelogs on the given URL candidates
:param session: requests Session instance
:param name: str, project name
:param candidates: list, URL candidates
:return: tuple, (set(changelog URLs), set(repo URLs))
"""

repos = filter_repo_urls(candidates=candidates)

# if we are lucky and there isn't a valid repo URL in our URL candidates, we need to go deeper
# and check the URLs if they contain a link to a repo
if not repos:
Expand All @@ -208,3 +221,38 @@ def find_changelogs(session, name, candidates):
for url in find_release_page(session, repo):
urls.append(url)
return set(urls), repos


def find_git_repo(session, name, candidates):
"""
Tries to find git repos on the given URL candidates
:param session: requests Session instance
:param name: str, project name
:param candidates: list, URL candidates
:return: tuple, (set(git URLs), set(repo URLs))
"""

repos = filter_repo_urls(candidates=candidates)

# if we are lucky and there isn't a valid repo URL in our URL candidates, we need to go deeper
# and check the URLs if they contain a link to a repo
if not repos:
logger.info("No repo found, trying to find one on related sites {}".format(candidates))
repos = set(find_repo_urls(session, name, candidates))

urls = []
for repo in repos:
username, reponame = repo.split("/")[3:5]
if "github.com" in repo:
urls.append(
"https://github.com/{username}/{reponame}.git".format(
username=username, reponame=reponame
)
)
elif "bitbucket.org" in repo:
urls.append(
"https://bitbucket.org/{username}/{reponame}".format(
username=username, reponame=reponame
)
)
return set(urls), repos
24 changes: 24 additions & 0 deletions changelogs/parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
import re
from packaging.version import Version, InvalidVersion
from gitchangelog.gitchangelog import changelog, GitRepos
import subprocess
import shutil

INVALID_LINE_START = frozenset(["-", "*", " ", "\t", "<!--"])
INVALID_LINE_ENDS = frozenset(["."])
Expand Down Expand Up @@ -117,3 +120,24 @@ def get_head(name, line, releases):
except InvalidVersion as e:
pass
return False


def parse_commit_log(name, content, releases, get_head_fn):
"""
Parses the given commit log
:param name: str, package name
:param content: list, directory paths
:param releases: list, releases
:param get_head_fn: function
:return: dict, changelog
"""
log = ""
raw_log = ""
for path, _ in content:
log += "\n".join(changelog(repository=GitRepos(path), tag_filter_regexp=r"v?\d+\.\d+(\.\d+)?"))
raw_log += "\n" + subprocess.check_output(
["git", "-C", dir, "--no-pager", "log", "--decorate"]).decode("utf-8")
shutil.rmtree(path)
log = parse(name, log, releases, get_head_fn)

return log, raw_log
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@
'requests',
'validators',
'packaging',
'lxml'
# TODO: put package requirements here
'lxml',
'gitchangelog'
]

test_requirements = [
# TODO: put package test requirements here
'pytest',
'pytest-cov',
'betamax',
Expand Down
22 changes: 22 additions & 0 deletions tests/test_commit_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals
import pytest

from changelogs import get_commit_log

@pytest.fixture(autouse=True)
@pytest.mark.usefixtures('betamax_session')
def record(monkeypatch, betamax_session):
def session():
return betamax_session
monkeypatch.setattr("changelogs.changelogs.Session", session)


def test_changelogs():
log, raw_log = get_commit_log("changelogs")
assert 'Correct time traveling changelog. [Alexandru Chirila]' in log['0.9.0']
assert 'Bump version: 0.2.0 → 0.3.0' in log['0.3.0']

assert 'fcafefa4380a03135745f5e306577ff2446130bb' in raw_log
assert 'added test dependencies' in raw_log
assert 'initial release from pyup.io' in raw_log
1 change: 1 addition & 0 deletions vcr/cassettes/tests.test_commit_logs.test_changelogs.json

Large diffs are not rendered by default.

0 comments on commit 2b1f13d

Please sign in to comment.