# ROUGE

### Package installation

In [None]:
# !pip install rouge-metric

### Imports

In [22]:
# textrank dict
%run ../textrank/textrank.ipynb

In [23]:
# lexrank dict
%run ../lexrank/lexrank.ipynb

In [24]:
# for 'red_txt' method
%run ../rfej_preprocessing/find_shortenings.ipynb

### ROUGE algo

In [25]:
# Путь к файлам с аннотациями
PATH = os.path.abspath('..\\rfej_parser\\articles\\') + '\\'

In [26]:
from rouge_metric import PyRouge

In [27]:
# sort file names
def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    return [ atoi(c) for c in re.split(r'(\d+)', text) ]

In [28]:
import logging

def join_text_annotation() -> list:
    # supress 'WARNING:root:Something went wrong while tokenizing'
    logging.root.level = logging.ERROR

    annot = []
    files = [fname for fname in glob.glob(PATH + "*") if re.match(f".*\\d+.txt$", fname)]
    files.sort(key=natural_keys)
    for f in files:
        sent_ann = ['\n'.join(sentence_collection(read_txt(f)))]
        annot.append(sent_ann)
        
    logging.root.level = logging.WARNING
    return annot

In [29]:
def join_text_summary(summary: dict, type_sum: str) -> dict:
    """ 
    Reformat dict for ROUGE method standart.
        type_sum: lexrank -> 'l', textrank -> 't'.
    """
    result = {}
    summary = dict(sorted(summary.items()))
    for nb in summary:
        result[nb] = '\n'.join(
            summary[nb] if type_sum == 't' else [str(sent) for sent in summary[nb]]
        ).lower()
#         result[nb] = re.sub(r'[^\w]', ' ', result[nb])
    return result

In [30]:
# ROUGE METRIC CALCULATION
def rouge_alg(summary: dict, references: list) -> dict:
    # Load summary results
    hypotheses = []
    for nb in summary:
        hypotheses.append(summary[nb])
    # Evaluate document-wise ROUGE scores
    rouge = PyRouge(rouge_n=(1, 2), rouge_l=True, mode='average')
    scores = rouge.evaluate(hypotheses, references)
    return scores

In [31]:
annotations = join_text_annotation()

In [32]:
hypotheses_textrank = join_text_summary(sum_textrank, 't')

In [33]:
hypotheses_lexrank = join_text_summary(sum_lexrank, 'l')

In [34]:
# ROUGE for TextRank
metrics_textrank = rouge_alg(hypotheses_textrank, annotations)

In [35]:
# ROUGE for LexRank
metrics_lexrank = rouge_alg(hypotheses_lexrank, annotations)

### Result metrics

In [36]:
# По метрикам rouge-1, rouge-l TextRank лучше (8 предложений на summary)
metrics_textrank

{'rouge-1': {'f': 0.14106136813929335,
  'p': 0.1232744008653751,
  'r': 0.1648467030676705},
 'rouge-2': {'f': 0.02208817713994442,
  'p': 0.018807195458214607,
  'r': 0.026755825390976602},
 'rouge-l': {'f': 0.13044844876831158,
  'p': 0.11427454976768742,
  'r': 0.15195554396767527}}

In [37]:
metrics_lexrank

{'rouge-1': {'f': 0.13181783467116304,
  'p': 0.08159462030314618,
  'r': 0.3428480760925167},
 'rouge-2': {'f': 0.032980495952016624,
  'p': 0.02013056627145094,
  'r': 0.09118928697231742},
 'rouge-l': {'f': 0.11904164997049979,
  'p': 0.07374374521991428,
  'r': 0.3086067052737675}}