Skip to content

Commit

Permalink
Add quality report on the random noisy dataset
Browse files Browse the repository at this point in the history
Signed-off-by: Waren Long <waren@sourced.tech>
  • Loading branch information
Waren Long committed Dec 3, 2018
1 parent 34f0595 commit 286b551
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 110 deletions.
49 changes: 27 additions & 22 deletions lookout/style/format/cmdline_tools.py
Expand Up @@ -10,7 +10,7 @@
from lookout.style.format.benchmarks.evaluate_smoke import evaluate_smoke_entry
from lookout.style.format.benchmarks.generate_smoke import generate_smoke_entry
from lookout.style.format.quality_report import quality_report
from lookout.style.format.robustness import plot_pr_curve, style_robustness_report
from lookout.style.format.quality_report_noisy import quality_report_noisy
from lookout.style.format.rule_stat import print_rules_report


Expand Down Expand Up @@ -67,6 +67,18 @@ def add_true_noisy_repos_args(my_parser: ArgumentParser):
"modified by adding artificial style mistakes.")


def add_rules_thresholds(my_parser: ArgumentParser):
"""
Add threshold arguments to filter rules.
:param my_parser: Parser to add the arguments to.
"""
my_parser.add_argument("--confidence-threshold", type=float, default=0.95,
help="Confidence threshold to filter relevant rules.")
my_parser.add_argument("--support-threshold", type=int, default=80,
help="Support threshold to filter relevant rules.")


def create_parser() -> ArgumentParser:
"""
Create a parser for the lookout.style.format utility.
Expand Down Expand Up @@ -101,27 +113,20 @@ def add_parser(name, help):
add_bblfsh_arg(rule_parser)
add_model_args(rule_parser)

# Style robustness quality report, includes precision, recall and F1-score
robust_parser = add_parser("robust-eval", "Quality report made by analyzing how well the "
"is able to fix random style mistakes among a model "
"repository: includes precision, recall and "
"F1-score.")
robust_parser.set_defaults(handler=style_robustness_report)
add_true_noisy_repos_args(robust_parser)
add_bblfsh_arg(robust_parser)
add_model_args(robust_parser)

# Plot Precision and Recall curves
pr_curve_parser = add_parser("pr-curve", "Plot Precision/Recall curves with different rules "
"selected based on their confidence.")
pr_curve_parser.set_defaults(handler=plot_pr_curve)
add_true_noisy_repos_args(pr_curve_parser)
add_bblfsh_arg(pr_curve_parser)
add_model_args(pr_curve_parser)
pr_curve_parser.add_argument("--support-threshold", type=int, default=0,
help="Support threshold to filter relevant rules.")
pr_curve_parser.add_argument("-o", "--output", required=True, type=str,
help="Path to the output figure. Could be a png or svg file.")
# Generate the quality report based on the artificial noisy dataset
quality_report_noisy_parser = add_parser("quality-report-noisy", "Quality report on the "
"artificial noisy dataset")
quality_report_noisy_parser.set_defaults(handler=quality_report_noisy)
add_true_noisy_repos_args(quality_report_noisy_parser)
add_bblfsh_arg(quality_report_noisy_parser)
add_model_args(quality_report_noisy_parser)
add_rules_thresholds(quality_report_noisy_parser)
quality_report_noisy_parser.add_argument("--precision-threshold", type=float, default=0.95,
help="Precision threshold tolerated for the model.")
quality_report_noisy_parser.add_argument("--dir-output-figure", required=True, type=str,
help="Path to the output figure in png format.")
quality_report_noisy_parser.add_argument("--dir-output-report", required=True, type=str,
help="Path to the output report.")

# Generate dataset of different styles in code for smoke testing.
gen_smoke_parser = add_parser("gen-smoke-dataset",
Expand Down
Expand Up @@ -3,10 +3,12 @@
from difflib import SequenceMatcher
import glob
import logging
import os
import sys
from typing import Iterable, List, Mapping, NamedTuple, Set, Tuple

from bblfsh import BblfshClient
import jinja2
import numpy

from lookout.style.format.feature_extractor import FeatureExtractor
Expand Down Expand Up @@ -204,85 +206,17 @@ def compute_metrics(changes_count: int, predictions_count: int, true_positive: i
return precision, recall, f1_score


def style_robustness_report(true_repo: str, noisy_repo: str, bblfsh: str, language: str,
model_path: str) -> None:
"""
Print the quality report of a model tested on a given repository.
The tests consists in adding random style mistakes in the given repo and looking how well
the model is able to fix them according to the style of the original repository.
:param true_repo: Path to the original repository we want to test the model on.
:param noisy_repo: Path to the noisy version of the repository where 1 style mistake is \
randomly added in every file.
:param bblfsh: Babelfish client. Babelfish server should be started accordingly.
:param language: Language to consider, others will be discarded.
:param model_path: Path to the model to test. It should be previously trained on the original \
repository located in ':param true_repo:'.
"""
log = logging.getLogger("style_robustness_report")

true_content = get_content_from_repo(true_repo)
noisy_content = get_content_from_repo(noisy_repo)
true_files, noisy_files, lines_changed = get_difflib_changes(true_content, noisy_content)
log.info("Number of files modified by adding style noise: %d / %d", len(true_files),
len(true_content))
del true_content, noisy_content

client = BblfshClient(bblfsh)
analyzer = FormatModel().load(model_path)
rules = analyzer[language]
feature_extractor = FeatureExtractor(language=language,
**rules.origin_config["feature_extractor"])
vnodes_y_true = files2vnodes(true_files, feature_extractor, client)
mispreds_noise = files2mispreds(noisy_files, feature_extractor, rules, client, log)
diff_mispreds = get_diff_mispreds(mispreds_noise, lines_changed)
changes_count = len(lines_changed)
log.info("Number of artificial mistakes potentially fixed by the model "
"(diff of mispredictions): %d / %d", len(diff_mispreds), changes_count)
style_fixes = get_style_fixes(diff_mispreds, vnodes_y_true, true_files, noisy_files,
feature_extractor)
log.info("style-analyzer fixes in the noisy repos: %d / %d -> %.1f %%",
len(style_fixes), changes_count, 100 * len(style_fixes) / changes_count)

precision, recall, f1_score = compute_metrics(changes_count=changes_count,
predictions_count=len(diff_mispreds),
true_positive=len(style_fixes))
print("precision:", round(precision, 3))
print("recall:", round(recall, 3))
print("F1 score:", round(f1_score, 3))

print()
print("list of files where the style-analyzer succeeds in fixing the random noise:")
for mispred in style_fixes:
print(mispred.node.path)


def filter_relevant_rules(rules: Iterable[Rules], support_threshold: int, log: logging.Logger
) -> Iterable[Tuple[int, float]]:
"""
Filter relevant rules that have a support higher than `support threshold`.
:param rules: List of `Rules` from the model.
:param support_threshold: Support threshold to filter relevant rules.
:param log: Logger.
:return: List of `Rules` index and confidence we filter according to `support_threshold`.
"""
log.info("Filtering rules with support higher than %d", support_threshold)
rules_id = [(i, r.stats.conf, r.stats.support) for i, r in enumerate(rules)
if r.stats.support > support_threshold]
rules_selection = sorted(rules_id, key=lambda k: k[1], reverse=True)
log.info("Number of rules decreased from %d to %d", len(rules), len(rules_selection))
return rules_selection


def plot_curve(x: numpy.ndarray, y: numpy.ndarray, output: str) -> None:
def plot_curve(repo: str, x: numpy.ndarray, y: numpy.ndarray, precision_threshold: float,
path_to_figure: str) -> None:
"""
Plot y versus x as lines and markers using matplotlib.
:param repo: Name of the repository we plot the precision-recall curve of.
:param x: 1-D numpy array containing the x coordinates.
:param y: 1-D numpy array containing the y coordinates.
:param output: Path to the output figure, could be either a png or svg file.
:param precision_threshold: Precision threshold tolerated for the model.
Limit drawn as a red horizontal line on the figure.
:param path_to_figure: Path to the output figure, in png format.
"""
try:
import matplotlib
Expand All @@ -293,17 +227,22 @@ def plot_curve(x: numpy.ndarray, y: numpy.ndarray, output: str) -> None:
plt.figure(figsize=(15, 10))
ax = plt.subplot(111)
ax.plot(x, y, marker="x", linestyle="--")
handle = plt.axhline(precision_threshold, color="r")
ax.legend([handle], ["precision threshold"], fontsize=17)
ax.set_title("Precision-recall curve on the %s repository" % repo, fontsize=17)
ax.set_ylabel("Precision", fontsize=17, labelpad=15)
ax.set_xlabel("Recall", fontsize=17, labelpad=15)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
plt.savefig(output)
plt.savefig(path_to_figure)


def plot_pr_curve(true_repo: str, noisy_repo: str, bblfsh: str, language: str,
model_path: str, support_threshold: int, output: str) -> None:
def quality_report_noisy(true_repo: str, noisy_repo: str, bblfsh: str, language: str,
model_path: str, confidence_threshold: float, support_threshold: int,
precision_threshold: float, dir_output_figure: str,
dir_output_report: str) -> None:
"""
Plot a precision/recall curve with rules having higher support than `support_threshold`.
Generate a quality report on the artificial noisy dataset including a precision-recall curve.
:param true_repo: Path to the original repository we want to test the model on.
:param noisy_repo: Path to the noisy version of the repository where 1 style mistake is \
Expand All @@ -312,10 +251,14 @@ def plot_pr_curve(true_repo: str, noisy_repo: str, bblfsh: str, language: str,
:param language: Language to consider, others will be discarded.
:param model_path: Path to the model to test. It should be previously trained on the original \
repository located in ':param true_repo:'.
:param confidence_threshold: Confidence threshold to filter relevant rules.
:param support_threshold: Support threshold to filter relevant rules.
:param output: Path to the output figure. Could yield to a png or svg file.
:param precision_threshold: Precision threshold tolerated for the model.
Limit drawn as a red horizontal line on the figure.
:param dir_output_figure: Path to the output precision-recall curve, figure in png format.
:param dir_output_report: Path to the output report in markdown, rendered as a jinja2 template.
"""
log = logging.getLogger("plot_pr_curve")
log = logging.getLogger("quality_report_noisy")

true_content = get_content_from_repo(true_repo)
noisy_content = get_content_from_repo(noisy_repo)
Expand All @@ -335,10 +278,11 @@ def plot_pr_curve(true_repo: str, noisy_repo: str, bblfsh: str, language: str,
changes_count = len(lines_changed)

precisions, recalls = [], []
rules_selection = filter_relevant_rules(rules.rules, support_threshold, log)
for i in range(len(rules_selection)):
n_rules = len(rules.rules)
rules = rules.filter_by_confidence(confidence_threshold).filter_by_support(support_threshold)
for i in range(len(rules.rules)):
filtered_mispreds = {k: m for k, m in diff_mispreds.items()
if any(r[0] == m.rule for r in rules_selection[:i + 1])}
if any(j == m.rule for j, r in enumerate(rules.rules[:i + 1]))}
style_fixes = get_style_fixes(filtered_mispreds, vnodes_y_true,
true_files, noisy_files, feature_extractor)
precision, recall, f1_score = compute_metrics(changes_count=changes_count,
Expand All @@ -347,6 +291,41 @@ def plot_pr_curve(true_repo: str, noisy_repo: str, bblfsh: str, language: str,
precisions.append(round(precision, 3))
recalls.append(round(recall, 3))

print("recall x:", recalls)
print("precision y:", precisions)
plot_curve(numpy.asarray(recalls), numpy.asarray(precisions), output)
# Compute some stats and quality metrics for the model's evaluation
n_mistakes = len(true_files)
prec_max_rec = precisions[-1]
max_rec = max(recalls)
min_prec = min(precisions)
n_rules_filtered = len(rules.rules)
# Compute the recall score at the given threshold for precision.
for (prec, rec) in zip(precisions, recalls):
rec_threshold_prec = rec
if prec < precision_threshold:
break

# Compile the precision-recall curve
path_to_figure = os.path.join(dir_output_figure, "pr_curve_jquery.png")
plot_curve("jquery", numpy.asarray(recalls), numpy.asarray(precisions), precision_threshold,
path_to_figure)

# Compile the markdown template for the report through jinja2
loader = jinja2.FileSystemLoader(("/", os.path.dirname(__file__), os.getcwd()),
followlinks=True)
env = jinja2.Environment(
trim_blocks=True,
lstrip_blocks=True,
keep_trailing_newline=True,
)
env.globals.update(range=range)
template = loader.load(env, "templates/noisy_quality_report.md.jinja2")
report = template.render(n_mistakes=n_mistakes, rec_threshold_prec=rec_threshold_prec,
prec_max_rec=prec_max_rec, max_rec=max_rec, min_prec=min_prec,
confidence_threshold=confidence_threshold,
support_threshold=support_threshold,
n_rules=n_rules, n_rules_filtered=n_rules_filtered,
path_to_figure=path_to_figure)

# Write the quality report
path_to_report = os.path.join(dir_output_report, "report2.md")
with open(path_to_report, "w", encoding="utf-8") as f:
f.write(report)
15 changes: 15 additions & 0 deletions lookout/style/format/templates/noisy_quality_report.md.jinja2
@@ -0,0 +1,15 @@

# Quality report on the artificial noisy dataset

### Rules filtering thresholds

* `{{ "Confidence: %s" % (confidence_threshold) }}` {{ "\n" }}
* `{{ "Support: %s" % (support_threshold) }}`

### Metrics table for the *jquery/jquery* repository

| number of mistakes | recall at threshold precision | precision at max recall | max recall | min precision | Number of rules |
|:------------------:|:-----------------------------:|:-----------------------:|:-----------:|:-------------:|:----------------------------------------------:|
| {{n_mistakes}} | {{rec_threshold_prec}} | {{prec_max_rec}} | {{max_rec}} | {{min_prec}} | `{{ "%s / %s" % (n_rules_filtered, n_rules)}}` |

![Precision-Racall curve]({{path_to_figure}})
3 changes: 1 addition & 2 deletions lookout/style/format/tests/test_main.py
Expand Up @@ -10,8 +10,7 @@ def test_handlers(self):
action2handler = {
"eval": "quality_report",
"rule": "print_rules_report",
"robust-eval": "style_robustness_report",
"pr-curve": "plot_pr_curve",
"quality-report-noisy": "quality_report_noisy",
"gen-smoke-dataset": "generate_smoke_entry",
"eval-smoke-dataset": "evaluate_smoke_entry",
}
Expand Down

0 comments on commit 286b551

Please sign in to comment.