Skip to content

Commit

Permalink
Merge pull request #6 from pltrdy/multi_sentences
Browse files Browse the repository at this point in the history
Major update for multi-sentences sequences ROUGE-L
  • Loading branch information
pltrdy authored Feb 16, 2018
2 parents d8f1f0c + 1700917 commit 4bf5c05
Show file tree
Hide file tree
Showing 6 changed files with 394 additions and 511 deletions.
56 changes: 29 additions & 27 deletions bin/rouge_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,35 @@
import os
from rouge import Rouge, FilesRouge


def main():
import argparse
parser = argparse.ArgumentParser(description='Rouge Metric Calculator')
parser.add_argument('-f', '--file', help="File mode", action='store_true')
parser.add_argument('-a', '--avg', help="Average mode", action='store_true')
parser.add_argument('hypothesis', type=str, help='Text of file path')
parser.add_argument('reference', type=str, help='Text or file path')

args = parser.parse_args()
if args.file:
hyp, ref = args.hypothesis, args.reference
assert(os.path.isfile(hyp))
assert(os.path.isfile(ref))

files_rouge = FilesRouge(hyp, ref)
scores = files_rouge.get_scores(avg=args.avg)

print(json.dumps(scores, indent=2))
else:
hyp, ref = args.hypothesis, args.reference
assert(type(hyp) == str)
assert(type(ref) == str)

rouge = Rouge()
scores = rouge.get_scores(hyp, ref, avg=args.avg)

print(json.dumps(scores, indent=2))
parser = argparse.ArgumentParser(description='Rouge Metric Calculator')
parser.add_argument('-f', '--file', help="File mode", action='store_true')
parser.add_argument('-a', '--avg', help="Average mode",
action='store_true')
parser.add_argument('hypothesis', type=str, help='Text of file path')
parser.add_argument('reference', type=str, help='Text or file path')

args = parser.parse_args()
if args.file:
hyp, ref = args.hypothesis, args.reference
assert(os.path.isfile(hyp))
assert(os.path.isfile(ref))

files_rouge = FilesRouge(hyp, ref)
scores = files_rouge.get_scores(avg=args.avg)

print(json.dumps(scores, indent=2))
else:
hyp, ref = args.hypothesis, args.reference
assert(type(hyp) == str)
assert(type(ref) == str)

rouge = Rouge()
scores = rouge.get_scores(hyp, ref, avg=args.avg)

print(json.dumps(scores, indent=2))


if __name__ == "__main__":
main()
main()
1 change: 1 addition & 0 deletions rouge/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from rouge.rouge import FilesRouge, Rouge

__version__ = "0.2.1"
__all__ = ["FilesRouge", "Rouge"]
228 changes: 102 additions & 126 deletions rouge/rouge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,135 +2,111 @@
from __future__ import absolute_import
import rouge.rouge_score as rouge_score
import os
import numpy as np


class FilesRouge:
def __init__(self, hyp_path, ref_path, metrics=None, stats=None, batch_lines=None):
assert(os.path.isfile(hyp_path))
assert(os.path.isfile(ref_path))

self.rouge = Rouge(metrics=metrics, stats=stats)

def line_count(path):
count = 0
for line in open(path):
count += 1
return count

hyp_lc = line_count(hyp_path)
ref_lc = line_count(ref_path)
assert(hyp_lc == ref_lc)

assert(batch_lines is None or type(batch_lines) == int)

self.hyp_path = hyp_path
self.ref_path = ref_path
self.batch_lines = batch_lines

def get_scores(self, avg=False):
"""Calculate ROUGE scores between each pair of
lines (hyp_file[i], ref_file[i]).
Args:
* hyp_path: hypothesis file path
* ref_path: references file path
* avg (False): whether to get an average scores or a list
* batch_line(None): set it to an integer value to work with
subsets of `batch_line` lines (uses less memory)
"""
batch_lines = self.batch_lines
hyp_path, ref_path = self.hyp_path, self.ref_path

if batch_lines is None:
hyps = [line[:-1] for line in open(hyp_path).readlines()]
refs = [line[:-1] for line in open(ref_path).readlines()]


return self.rouge.get_scores(hyps, refs, avg=avg)

else:
if batch_lines > hyp_lc:
batch_lines = hyp_lc

if avg:
sc = [0, 0, 0]
update_scores = lambda s, h, r: [sum(x) for x in zip(s, self.rouge.get_scores(h, r, avg=True))]
else:
sc = []
update_scores = lambda s, h, r: s + self.rouge.get_scores(batch_hyp, batch_ref)

hyp_file = open(hyp_path)
ref_file = open(ref_path)

batch_hyp = []
batch_ref = []

for count in range(hyp_lc):
batch_hyp.append(hyp_file.readline()[:-1])
batch_ref.append(ref_file.readline()[:-1])

count += 1
if count == batch_lines:
sc = update_scores(sc, batch_hyp, batch_ref)
count = 0
batch_hyp = []
batch_ref = []

if avg:
return [s/hyp_lc for s in sc]
return sc
def __init__(self, hyp_path, ref_path, metrics=None, stats=None,
batch_lines=None):
assert(os.path.isfile(hyp_path))
assert(os.path.isfile(ref_path))

self.rouge = Rouge(metrics=metrics, stats=stats)

class Rouge:
DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"]
AVAILABLE_METRICS = {"rouge-1": lambda hyp, ref: rouge_score.rouge_n([hyp], [ref], 1),
"rouge-2": lambda hyp, ref: rouge_score.rouge_n([hyp], [ref], 2),
"rouge-l": lambda hyp, ref: rouge_score.rouge_l_sentence_level([hyp], [ref]),
}

DEFAULT_STATS = ["f", "p", "r"]
AVAILABLE_STATS = {"f": 0, "p": 1, "r": 2
}
def __init__(self, metrics=None, stats=None):
self.metrics = metrics if metrics is not None else Rouge.DEFAULT_METRICS
self.stats = stats if stats is not None else Rouge.DEFAULT_STATS

for m in self.metrics:
if m not in Rouge.AVAILABLE_METRICS:
raise ValueError("Unknown metric '%s'" % m)

for s in self.stats:
if s not in Rouge.AVAILABLE_STATS:
raise ValueError("Unknown stat '%s'" % s)

def get_scores(self, hyps, refs, avg=False):
if type(hyps) == str:
hyps, refs = [hyps], [refs]

assert(type(hyps) == type(refs))
assert(len(hyps) == len(refs))

if not avg:
return self._get_scores(hyps, refs)
return self._get_avg_scores(hyps, refs)

def _get_scores(self, hyps, refs):
scores = []
for hyp, ref in zip(hyps, refs):
sen_score = {}
for m in self.metrics:
fn = Rouge.AVAILABLE_METRICS[m]
sc = fn(hyp, ref)
sen_score[m] = {s: sc[Rouge.AVAILABLE_STATS[s]] for s in self.stats}
scores.append(sen_score)
return scores

def _get_avg_scores(self, hyps, refs):
scores = {}
for m in self.metrics:
fn = Rouge.AVAILABLE_METRICS[m]
sc = [fn(hyp, ref) for hyp, ref in zip(hyps, refs)]
sc = [[sen_sc[Rouge.AVAILABLE_STATS[s]] for s in self.stats] for sen_sc in sc]
scores[m] = {s: st for s, st in zip(self.stats, tuple(map(np.mean, zip(*sc))))}
return scores
def line_count(path):
count = 0
for line in open(path):
count += 1
return count

hyp_lc = line_count(hyp_path)
ref_lc = line_count(ref_path)
assert(hyp_lc == ref_lc)

assert(batch_lines is None or type(batch_lines) == int)

self.hyp_path = hyp_path
self.ref_path = ref_path
self.batch_lines = batch_lines

def get_scores(self, avg=False):
"""Calculate ROUGE scores between each pair of
lines (hyp_file[i], ref_file[i]).
Args:
* hyp_path: hypothesis file path
* ref_path: references file path
* avg (False): whether to get an average scores or a list
"""
hyp_path, ref_path = self.hyp_path, self.ref_path

hyps = [line[:-1] for line in open(hyp_path).readlines()]
refs = [line[:-1] for line in open(ref_path).readlines()]

return self.rouge.get_scores(hyps, refs, avg=avg)


class Rouge:
DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"]
AVAILABLE_METRICS = {
"rouge-1": lambda hyp, ref: rouge_score.rouge_n(hyp, ref, 1),
"rouge-2": lambda hyp, ref: rouge_score.rouge_n(hyp, ref, 2),
"rouge-l": lambda hyp, ref:
rouge_score.rouge_l_summary_level(hyp, ref),
}
DEFAULT_STATS = ["f", "p", "r"]
AVAILABLE_STATS = ["f", "p", "r"]

def __init__(self, metrics=None, stats=None):
self.metrics = metrics if metrics is not None \
else Rouge.DEFAULT_METRICS
self.stats = stats if stats is not None \
else Rouge.DEFAULT_STATS

for m in self.metrics:
if m not in Rouge.AVAILABLE_METRICS:
raise ValueError("Unknown metric '%s'" % m)

for s in self.stats:
if s not in Rouge.AVAILABLE_STATS:
raise ValueError("Unknown stat '%s'" % s)

def get_scores(self, hyps, refs, avg=False):
if type(hyps) == str:
hyps, refs = [hyps], [refs]

assert(type(hyps) == type(refs))
assert(len(hyps) == len(refs))

if not avg:
return self._get_scores(hyps, refs)
return self._get_avg_scores(hyps, refs)

def _get_scores(self, hyps, refs):
scores = []
for hyp, ref in zip(hyps, refs):
sen_score = {}
hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

for m in self.metrics:
fn = Rouge.AVAILABLE_METRICS[m]
sc = fn(hyp, ref)
sen_score[m] = {s: sc[s] for s in self.stats}
scores.append(sen_score)
return scores

def _get_avg_scores(self, hyps, refs):
scores = {m: {s: 0 for s in self.stats} for m in self.metrics}

count = 0
for (hyp, ref) in zip(hyps, refs):
hyp = [" ".join(_.split()) for _ in hyp.split(".") if len(_) > 0]
ref = [" ".join(_.split()) for _ in ref.split(".") if len(_) > 0]

for m in self.metrics:
fn = Rouge.AVAILABLE_METRICS[m]
sc = fn(hyp, ref)
scores[m] = {s: scores[m][s] + sc[s] for s in sc}
count += 1
scores = {m: {s: scores[m][s] / count for s in scores[m]}
for m in scores}
return scores
Loading

0 comments on commit 4bf5c05

Please sign in to comment.