Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for removing comments #7

Merged
merged 10 commits into from Jul 17, 2019
2 changes: 1 addition & 1 deletion .isort.cfg
@@ -1,2 +1,2 @@
[settings]
known_third_party = hglib,pygit2,requests,setuptools,tqdm
known_third_party = hglib,pygit2,pytest,requests,setuptools,tqdm
29 changes: 29 additions & 0 deletions .taskcluster.yml
Expand Up @@ -95,12 +95,41 @@ tasks:
owner: ${user}@users.noreply.github.com
source: ${repository}/raw/${head_rev}/.taskcluster.yml

- taskId: {$eval: as_slugid("tests_task")}
created: {$fromNow: ''}
deadline: {$fromNow: '1 hour'}
provisionerId: aws-provisioner-v1
workerType: github-worker
payload:
maxRunTime: 3600
image: python
command:
- "/bin/bash"
- "-lcx"
- "apt-get -qq update &&
apt-get -qq install -y python-pip &&
python2 -m pip install --quiet mercurial==4.8 &&
curl -L https://github.com/mozilla/rust-code-analysis/releases/download/v0.0.1/rust-code-analysis-linux-x86_64.tar.bz2 | tar -C /usr/bin -xjv &&
chmod +x /usr/bin/rust-code-analysis &&
git clone --quiet ${repository} &&
cd microannotate &&
git -c advice.detachedHead=false checkout ${head_rev} &&
pip install --quiet -r requirements.txt &&
pip install --quiet -r test-requirements.txt &&
python -m pytest tests/test_*.py"
metadata:
name: microannotate tests
description: microannotate tests
owner: ${user}@users.noreply.github.com
source: ${repository}/raw/${head_rev}/.taskcluster.yml

- $if: 'tasks_for == "github-push" && head_branch[:10] == "refs/tags/"'
then:
dependencies:
- {$eval: as_slugid("lint_task")}
- {$eval: as_slugid("packaging_test_task")}
- {$eval: as_slugid("version_check_task")}
- {$eval: as_slugid("tests_task")}
scopes:
- secrets:get:project/relman/microannotate/deploy
created: {$fromNow: ''}
Expand Down
11 changes: 10 additions & 1 deletion bin/microannotate-generate.py
Expand Up @@ -29,8 +29,17 @@
action="store",
default="tip",
)
parser.add_argument("--tokenize", action="store_true", default=True)
parser.add_argument("--remove-comments", action="store_true", default=False)
args = parser.parse_args()

repo_out_dir = os.path.realpath(args.repository_out_dir)

generator.generate(args.repository_dir, repo_out_dir, args.rev_start, args.rev_end)
generator.generate(
args.repository_dir,
repo_out_dir,
args.rev_start,
args.rev_end,
args.tokenize,
args.remove_comments,
)
83 changes: 75 additions & 8 deletions microannotate/generator.py
Expand Up @@ -7,13 +7,20 @@
import itertools
import os
import re
import subprocess
import time
from logging import INFO, basicConfig, getLogger

import hglib
import pygit2
import requests
from tqdm import tqdm

from microannotate import utils

basicConfig(level=INFO)
logger = getLogger(__name__)


class Commit:
def __init__(self, node, parents, desc):
Expand Down Expand Up @@ -59,7 +66,7 @@ def set_modified_files(commit):
SPLIT_WORD_REGEX = re.compile(rb"(\w+|{|}|\[|\]|\"|'|\(|\)|\\\\|\*|#|/)")


def convert(repo, commit):
def convert(repo, commit, tokenize, remove_comments, code_analysis_port):
set_modified_files(commit)

copy_target_paths = set(commit.file_copies.values())
Expand All @@ -81,7 +88,7 @@ def convert(repo, commit):
_, ext = os.path.splitext(after_path)

try:
after = HG.cat([after_path], rev=commit.node)
content = HG.cat([after_path], rev=commit.node)
except hglib.error.CommandError as e:
if b"no such file in rev" in e.err:
# The file was removed.
Expand All @@ -96,10 +103,27 @@ def convert(repo, commit):
os.path.dirname(os.path.join(repo.workdir, after_path)), exist_ok=True
)

if remove_comments:
try:
r = requests.post(
f"http://localhost:{code_analysis_port}/comment?file_name={after_path}",
headers={"Content-Type": "text/plain"},
data=content,
)
if r.ok:
content = r.text.encode("utf-8")
except requests.exceptions.ConnectionError as e:
# The code analysis server currently doesn't respond when we pass an unsupported language.
logger.info(f"Error connecting to code analysis server: {e}")
marco-c marked this conversation as resolved.
Show resolved Hide resolved
pass

with open(os.path.join(repo.workdir, after_path), "wb") as f:
f.writelines(
word.group(0) + b"\n" for word in SPLIT_WORD_REGEX.finditer(after)
)
if tokenize:
f.writelines(
word.group(0) + b"\n" for word in SPLIT_WORD_REGEX.finditer(content)
)
else:
f.write(content)

index.add(after_path)

Expand Down Expand Up @@ -165,7 +189,46 @@ def get_revs(hg, rev_start=0, rev_end="tip"):
return x.splitlines()


def generate(repo_dir, repo_out_dir, rev_start=0, rev_end="tip", limit=None):
def generate(
repo_dir,
repo_out_dir,
rev_start=0,
rev_end="tip",
limit=None,
tokenize=True,
remove_comments=False,
):
proc = None
code_analysis_port = None
if remove_comments:
ready = False

for _ in range(7):
try:
code_analysis_port = utils.get_free_tcp_port()
proc = subprocess.Popen(
["rust-code-analysis", "--serve", "--port", str(code_analysis_port)]
)
except FileNotFoundError:
raise Exception("rust-code-analysis is required for comment removal")

for _ in range(7):
try:
r = requests.get(f"http://localhost:{code_analysis_port}/ping")
r.raise_for_status()
ready = True
break
except Exception:
if proc.poll() is not None:
break

time.sleep(1)

if ready:
break

assert ready, "rust-code-analysis should be able to start"

if os.path.exists(repo_out_dir):
repo = pygit2.Repository(repo_out_dir)
try:
Expand Down Expand Up @@ -213,10 +276,14 @@ def generate(repo_dir, repo_out_dir, rev_start=0, rev_end="tip", limit=None):
_init(repo_dir)
for commit in tqdm(commits):
try:
convert(repo, commit)
except Exception:
convert(repo, commit, tokenize, remove_comments, code_analysis_port)
except Exception as e:
print(e)
f.write(f"{commit.node} - {commit.parents}\n")

os.chdir(cwd)

if proc is not None:
proc.terminate()

return all_commits_done
9 changes: 9 additions & 0 deletions microannotate/utils.py
Expand Up @@ -4,6 +4,7 @@
# You can obtain one at http://mozilla.org/MPL/2.0/.

import re
import socket

import pygit2

Expand All @@ -17,3 +18,11 @@ def get_original_hash(repo, rev):
commit = repo[rev]

return ORIGINAL_COMMIT_REGEX.search(commit.message).group(1)


def get_free_tcp_port():
tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
tcp.bind(("", 0))
addr, port = tcp.getsockname()
tcp.close()
return port
1 change: 1 addition & 0 deletions requirements.txt
@@ -1,3 +1,4 @@
python-hglib==2.6.1
pygit2==0.28.2
tqdm==4.32.2
requests==2.22.0
1 change: 1 addition & 0 deletions test-requirements.txt
@@ -1 +1,2 @@
pre-commit==1.17.0
pytest==5.0.0