#  Running our models on the How2/WikiHow/CNN data. 

Following are the high level steps we are following in this notebook:
* **Load Test Data :** Summary provided with the article.  
* **Use PreProcessed2 data  :**  Pre-Processed 2 data has following details:
 * Remove Special Characters from Text
 * Remove double punctuations and cr-lf
 * Remove greeting words like 'hi', 'hello', ..
* **Execute following Models  :**  We are executing multiple models including:
 * Extractive Summary Model (BERT)
 * Abstractive Summary Model (BERT2BERT for CNN/Dailymail)
 * Abstractive T5 Model (pre-trained model that was trained on our data). 


In [1]:
%%capture
##############
## INSTALLS ##
##############

#!pip install bert-extractive-summarizer
#!pip install transformers
#!pip install neuralcoref
#!pip install datasets==1.0.2
#!pip install git-python==1.0.3
#!pip install sacrebleu==1.4.12
#!pip install rouge_score
#!pip install rouge-metric

#!pip install rouge
#!pip install py-rouge
#!pip install pyrouge
#!pip install torch
#!pip install sentencepiece
#!pip install nlp

#!python -m nltk.downloader all
#!python -m spacy download en_core_web_md
#!python -m spacy download en
#!python -m spacy download en_core_web_sm



In [2]:
###########
# IMPORTS #
###########

import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
from nltk.chunk import conlltags2tree, tree2conlltags
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
from pprint import pprint
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_md
from bs4 import BeautifulSoup
import requests
import re
import string
import pandas as pd
import csv
import rouge
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
#from rouge_score import rouge_scorer
import transformers
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM  
from transformers import BertTokenizer, EncoderDecoderModel
from tqdm import tqdm_pandas
from tqdm import tqdm
from summarizer import Summarizer
from simplet5 import SimpleT5
from datetime import datetime

nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

Global seed set to 42
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/sunitc/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /home/sunitc/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /home/sunitc/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [3]:
%%capture
###############
# GLOBAL VARS #
###############
start_time = datetime.now()
aggregator='Avg'
apply_avg = aggregator == 'Avg'
apply_best = aggregator == 'Best'
vectorizer = TfidfVectorizer()
tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")  
abstractive_summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
extractive_summarizer_model = Summarizer()
modelt5 = SimpleT5()
modelt5.from_pretrained(model_type="t5", model_name="t5-base")
nltk.download("stopwords")
stop_words = stopwords.words('english')

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[nltk_data] Downloading package stopwords to /home/sunitc/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
nlp = en_core_web_md.load()

In [5]:
########
# DATA #
########


# setting number of rows to low number so notebook runs in minutes and not hours. 
num_rows_each_df = 10

cnn_dailymail_df = pd.read_csv(os.getcwd() + "/data/cnn_dm_df.csv",encoding = "utf-8")
wikihow_df = pd.read_csv(os.getcwd() + "/data/wikihow_df.csv",encoding = "utf-8")
how2_df = pd.read_csv(os.getcwd() + "/data/how2_df.csv",encoding = "utf-8")

wikihow_df = wikihow_df[(wikihow_df.article_pp1.str.len() < 3700) & (wikihow_df.summary.str.len() > 100)]
how2_df = how2_df[(how2_df.article_pp1.str.len() < 3700) & (how2_df.summary.str.len() > 100)]
cnn_dailymail_df = cnn_dailymail_df[(cnn_dailymail_df.article_pp1.str.len() > 250) & (cnn_dailymail_df.summary.str.len() > 100)]

if len(wikihow_df) > num_rows_each_df:
    wikihow_df = wikihow_df.head(num_rows_each_df)
    
if len(how2_df) > num_rows_each_df:
    how2_df = how2_df.head(num_rows_each_df)
    
if len(cnn_dailymail_df) > num_rows_each_df:
    cnn_dailymail_df = cnn_dailymail_df.head(num_rows_each_df)
    
merged_df = pd.concat([how2_df, wikihow_df,cnn_dailymail_df], axis=0)
#merged_df = pd.concat([how2_df, wikihow_df], axis=0)
merged_df = merged_df[merged_df.article_pp1.str.len() > 250]
#merged_df = merged_df.head(5000)
#how2_df = how2_df.head(10)
#how2_df = how2_df[(how2_df['num_words'] > 200)] # & (how2_df['num_words'] < 400 )]
#wikihow_df = wikihow_df[(wikihow_df['num_words'] > 200)] # & (how2_df['num_words'] < 400 )]
#cnn_dailymail_df = cnn_dailymail_df[(cnn_dailymail_df['num_words'] > 200)]# & (how2_df['num_words'] < 400 )]

merged_df.head(5)

Unnamed: 0.1,Unnamed: 0,summary,article,data_source,article_pp1,article_pp2,article_pp3,num_words_article,num_sentences_article,num_words_summary,num_sentences_summary,num_words_article_pp1,num_sentences_article_pp1,num_words_article_pp2,num_sentences_article_pp2,num_words_article_pp3,num_sentences_article_pp3
2,2,learn about how hand washing can help prevent ...,hi ! this is david jackel on behalf of expert ...,How2,cold come direct contact somebody else virus o...,most colds come from direct conotact that you ...,cold come direct contact somebody else virus o...,359,14,20,2,123,1,284,11,116,1
3,3,how to julienne cucumbers to make kimchi for k...,the other way we can do cucumbers which is als...,How2,way cucumber also nice pickling cucumber find ...,the other way we can do cucumbers which is als...,way cucumber also nice cucumber find work best...,171,6,26,2,62,1,169,6,56,1
4,4,in order to put photographic emulsion on water...,my name is anthony maddaloni and i 'm going to...,How2,photograph emulsion heat emulsion light tight ...,now photographs have an emulsion on them .and ...,photograph emulsion heat emulsion light tight ...,149,9,54,3,56,1,124,8,48,1
5,5,combining bleeding and cupping methods in acup...,"in this episode , we 're actually going to use...",How2,episode actually going use interesting techniq...,"in this episode , we 're actually going to use...",episode actually going use interesting techniq...,360,16,31,3,127,1,346,18,121,1
6,6,what terms are necessary for an umpire to know...,"alright , some of the terminology is balls and...",How2,alright terminology ball strike call two ball ...,"alright , some of the terminology is balls and...",alright terminology ball strike call two ball ...,177,12,27,2,78,1,166,12,68,1


In [6]:
print(len(merged_df))

29


In [7]:
####################
# HELPER FUNCTIONS #
####################


def prepare_results(p, r, f):
    return '\t{}:\t{}: {:5.2f}\t{}: {:5.2f}\t{}: {:5.2f}'.format(metric, 'P', 100.0 * p, 'R', 100.0 * r, 'F1', 100.0 * f)


def RemoveIntroFromText(script):
    sentences = [x for x in script.sents]
    i=0
    new_text=""
    print("Original text: \n")
    displacy.render(script, jupyter=True, style='ent')
    print("Some preprocessing details: \n************\n")
    is_intro=False
    
    for sent in sentences:
        at_least_one_person=0
        print("Sentence ", i, ": ", sentences[i])
        d= dict([(str(x), x.label_) for x in nlp(str(sent)).ents])
        print(d)
        if len(d)>0:
            print(d)
            for key in d:
                #print("key:",key, "; value=", d[key])
                #print(sent)
                if (d[key]=="PERSON"):
                    at_least_one_person+=1
        if "expertvillage" in str(sent).lower() or "expert village" in str(sent).lower():
            is_intro=True
        if (at_least_one_person>0):
            print("the sentence has at least one person:")
            print("Sentence ", i, ": ", sentences[i])    
        if (i<4 and (at_least_one_person>0  or is_intro)):
            print("the sentence is likely an introduction")
            new_text=''
        else:
            new_text+=str(sent)
            if not (str(sent).strip()[-1] in string.punctuation): 
                print ("Missing punctuation at the end", sent, "; last char is ", str(sent).strip()[-1])
                new_text+=". "
        i+=1
    print("\n*************\nNew text, hopefully without person introduction:\n**********\n", new_text)
    return new_text


def RemoveIntroFromTextMiddle(text):
    script = nlp(text)
    sentences = [x for x in script.sents]
    #print("sentences.....")
    #print(sentences)
    i=0
    new_text=""
    print("Original text: \n")
    displacy.render(script, jupyter=True, style='ent')
    print("Some preprocessing details: \n************\n")
    is_intro=False
    
    for sent in sentences:
        at_least_one_person=0
        print("Sentence ", i, ": ", sentences[i])
        d= dict([(str(x), x.label_) for x in nlp(str(sent)).ents])
        print(d)
        if len(d)>0:
            print(d)
            for key in d:
                #print("key:",key, "; value=", d[key])
                #print(sent)
                if (d[key]=="PERSON"):
                    at_least_one_person+=1
        if "expertvillage" in str(sent).lower() or "expert village" in str(sent).lower():
            is_intro=True
        if (at_least_one_person>0):
            print("the sentence has at least one person:")
            print("Sentence ", i, ": ", sentences[i])    
        if (i<4 and (at_least_one_person>0  or is_intro)):
            print("skipping the sentence as it is likely an introduction")
            #new_text=''
        else:
            new_text+=str(sent)
            if not (str(sent).strip()[-1] in string.punctuation): 
                print ("Missing punctuation at the end", sent, "; last char is ", str(sent).strip()[-1])
                new_text+=". "
        i+=1
    print("\n*************\nNew text, hopefully without person introduction:\n**********\n", new_text)
    return new_text

def RemoveIntroFromTextNonVerbose(script):
    sentences = [x for x in script.sents]
    i=0
    new_text="" 
    #displacy.render(script, jupyter=True, style='ent') 
    is_intro=False
    
    for sent in sentences:
        at_least_one_person=0
        d= dict([(str(x), x.label_) for x in nlp(str(sent)).ents])
        if len(d)>0:
            for key in d:
                if (d[key]=="PERSON"):
                    at_least_one_person+=1
        if "expertvillage" in str(sent).lower() or "expert village" in str(sent).lower():
            is_intro=True
        if (i<4 and (at_least_one_person>0  or is_intro)):
             new_text=''
        else:
            new_text+=str(sent)
            if not (str(sent).strip()[-1] in string.punctuation): 
                 new_text+=". "
        i+=1
    return new_text

#Raw Text Summarization
def generate_abstractive_summary(raw_string, model = abstractive_summarizer_model, max_length=512):
    # Tokenizer will automatically set [BOS] <text> [EOS]
    # cut off at BERT max length 512
    inputs = tokenizer(raw_string, padding="max_length", truncation=True, max_length=max_length, return_tensors="pt")
    input_ids = inputs.input_ids
    attention_mask = inputs.attention_mask
    outputs = model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return output_str[0]

#This function produces an extractive summary for a given article
def generate_extractive_summary(raw_string, model = extractive_summarizer_model, min_summary_length = 50):
    output_str = model(raw_string, min_length = min_summary_length)
    return output_str


def process_article(text):
    article = text.split(".")
    sentences = []
    for sentence in article:
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop() 
    return sentences

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
    all_words = list(set(sent1 + sent2))
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)
 
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix


def generate_summary(in_text, top_n=5):
    summarize_text = []
    try:
        # Step 1 - Read text anc split it
        sentences =  process_article(in_text)
        # Step 2 - Generate Similary Martix across sentences
        sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
        # Step 3 - Rank sentences in similarity martix
        sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
        scores = nx.pagerank(sentence_similarity_graph)
        # Step 4 - Sort the rank and pick top sentences
        ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
        #print("Indexes of top ranked_sentence order are ", ranked_sentence)    
        for i in range(top_n):
            summarize_text.append(" ".join(ranked_sentence[i][1]))
        # Step 5 - Offcourse, output the summarize text
        #print("Summarize Text: \n", ". ".join(summarize_text))
    except:
        return ""
    finally:
        return ". ".join(summarize_text)

def generate_abstractive_summary_T5(raw_string):
    # using epoch 6
    modelt5.load_model("t5","outputs/simplet5-epoch-6-train-loss-1.5226", use_gpu=False)
    return modelt5.predict(raw_string)[0]

def print_rogue_scores(hypo, refe):
    scores = evaluator.get_scores(hypo, refe)
    #scores = evaluator.get_scores(all_hypothesis, all_references)
    for metric, results in sorted(scores.items(), key=lambda x: x[0]):
        if not apply_avg and not apply_best: # value is a type of list as we evaluate each summary vs each reference
            for hypothesis_id, results_per_ref in enumerate(results):
                nb_references = len(results_per_ref['p'])
                for reference_id in range(nb_references):
                    print('\tHypothesis #{} & Reference #{}: '.format(hypothesis_id, reference_id))
                    print('\t' + '\t{}:\t{}: {:5.2f}\t{}: {:5.2f}\t{}: {:5.2f}'.format(metric, 'P', 100.0 * results_per_ref['p'][reference_id], 'R', 100.0 * results_per_ref['r'][reference_id], 'F1', 100.0 * results_per_ref['f'][reference_id]))
                    #print('\t' + prepare_results(results_per_ref['p'][reference_id], results_per_ref['r'][reference_id], results_per_ref['f'][reference_id]))
            print()
        else:
            print('\t' + '\t{}:\t{}: {:5.2f}\t{}: {:5.2f}\t{}: {:5.2f}'.format(metric, 'P', 100.0 * results['p'], 'R', 100.0 * results['r'], 'F1', 100.0 * results['f']))
            #print("x") #prepare_results(results['p'], results['r'], results['f']))

In [8]:
####################
# Rouge Evaluator  #
####################

evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l', 'rouge-w'],
                           max_n=4,
                           limit_length=True,
                           length_limit=100,
                           length_limit_type='words',
                           apply_avg=apply_avg,
                           apply_best=apply_best,
                           alpha=0.5, # Default F1_score
                           weight_factor=1.2,
                           stemming=True)



In [9]:
# TEST Loop for Abstractive and Extractive summarization

icount = 0

for article in merged_df['article_pp2']:
    if len(article) > 200:
        print("Article Len=", len(article))
        print(article)
        e_summary = generate_extractive_summary(article, min_summary_length=50)
        e2_summary = generate_summary(article, 3)
        if len(e2_summary) > 5:
            a_e_summary = generate_abstractive_summary(e2_summary, model = abstractive_summarizer_model) 
        elif len(e_summary) > 5:
            a_e_summary = generate_abstractive_summary(e_summary, model = abstractive_summarizer_model) 
        else:
            a_e_summary = generate_abstractive_summary(article, model = abstractive_summarizer_model) 

        a_summary = generate_abstractive_summary(article, model = abstractive_summarizer_model)
        t5_summary = generate_abstractive_summary_T5(article)
        all_summary = e_summary + "." + e2_summary + "." + a_e_summary + "." + a_summary + "." + t5_summary + "."
        s_s_summary = generate_abstractive_summary_T5(all_summary)
        
        print("----------------")    
        print("e-summary=",e_summary)
        print("----------------")    
        print("e2-summary=",e2_summary)
        print("----------------")    
        print("a-e-summary=",a_e_summary)
        print("----------------")    
        print("a-summary=",a_summary)
        print("----------------")    
        print("t5-summary=",t5_summary)
        print("----------------")    
        print("ss-summary=",s_s_summary)
        print("-------------------------------------------------------------------------------------------------") 
        icount +=1
    
    if icount > 3:
        break 
    


Article Len= 1437
most colds come from direct conotact that you 're having with somebody else who has the virus .often times that 's like shaking hands with somebody , being in close quarters , hugging , touching , anything when you 're close with somebody else , sharing things , touching the same glass , touching the same silverware , touching the same food , stuff like that .so what you should do is constanotly be washing your hands , especially if you 're traveling or if you 're in close quarters with people or you 're exposed to someone who might be sick .always be washing your hands and do it with warm water and soap , wash vigorously for at least 20 seconds to make sure you loosen up all the germs .now , you wo n't always have access to warm water and soap , so what you should do is carry hand sanitizer .i always keep some hand sanitizer with me , in my car , in a bag with me if i 'm traveling and this is something you can take out whenever you need to and just put a little bit o

----------------
e-summary= and we 'll go ahead and just clean that up and a little bit of bruising that 's left over from that .and that 's bleeding , cupping.
----------------
e2-summary= but in this case , for the magic of tv , we 'll go ahead and leave it there not quite so long , and what you 'll see is when i release the pressure from this cup , the blood actually kinda sprays up on the cup , cups . and actually , in order to be therapeutic , we oftenotimes , we 'll leave this on for five minutes and get about a teaspoon or so of blood out of it .  and we 'll go ahead and just clean that up and a little bit of bruising that 's left over from that 
----------------
a-e-summary= we'll go ahead and clean that cup, cups, cups and soak up the blood. we can't wait for the magic of tv, but we can get about a teaspoon of blood out of it. we'd like to leave it on for five minutes and clean it up. we ’ ll get about five minutes of the tea spoon.
----------------
a-summary= in this episode 

In [10]:
# Generate summaries for the articles using the different approaches
import warnings
warnings.filterwarnings('ignore')

e_list = []
a_list = []
t5_list = []
sum_sum_list = []

iCount = 0

for article in merged_df['article_pp2']:
    #print(article)
    print(iCount, end=",")
    iCount =  iCount + 1
    
    e_summary = generate_extractive_summary(article, min_summary_length=100)
    a_summary = generate_abstractive_summary(article, model = abstractive_summarizer_model)
    t5_summary = generate_abstractive_summary_T5(article)
    all_summary = e_summary + "." + a_summary + "." + t5_summary + "."
    s_s_summary = generate_abstractive_summary(all_summary, model = abstractive_summarizer_model)
    
    e_list.append(e_summary)
    a_list.append(a_summary)
    t5_list.append(t5_summary)
    sum_sum_list.append(s_s_summary)


print("Dataframe len=", len(how2_df))
print("e_summ len=", len(e_list))
print("a_summ len=", len(a_list))
print("t5_summ len=", len(t5_list))
print("ss_summ len=", len(sum_sum_list))

merged_df['e_summarization'] = e_list
merged_df['a_summarization'] = a_list
merged_df['t5_summarization'] = t5_list
merged_df['ss_summarization'] = sum_sum_list

merged_df.to_csv(os.getcwd() + "/data/merged_df_with_Summarization.csv")


Token indices sequence length is longer than the specified maximum sequence length for this model (882 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (902 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (605 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1187 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (612 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for th

Dataframe len= 10
e_summ len= 29
a_summ len= 29
t5_summ len= 29
ss_summ len= 29


In [11]:
# Calculate Rogue scores for the summarization that was just created. 

hypo=merged_df['summary'].tolist()
refe1=merged_df['e_summarization'].tolist() #[reference]
refe2=merged_df['a_summarization'].tolist() #[reference]
refe3=merged_df['t5_summarization'].tolist() #[reference]
refe4=merged_df['ss_summarization'].tolist() #[reference]

print("Rogue for Extractive Summarization")
print_rogue_scores(hypo,refe1)    
print("Rogue for Abstractive Summarization")
print_rogue_scores(hypo,refe2)  
print("Rogue for T5 Summarization")
print_rogue_scores(hypo,refe3)        
print("Rogue for SS Summarization")
print_rogue_scores(hypo,refe4) 


Rogue for Extractive Summarization
		rouge-1:	P: 30.99	R: 19.04	F1: 22.28
		rouge-2:	P:  6.76	R:  3.53	F1:  4.42
		rouge-3:	P:  2.55	R:  1.19	F1:  1.57
		rouge-4:	P:  1.34	R:  0.58	F1:  0.80
		rouge-l:	P: 25.10	R: 15.85	F1: 18.69
		rouge-w:	P: 14.69	R:  3.52	F1:  5.41
Rogue for Abstractive Summarization
		rouge-1:	P: 28.89	R: 23.81	F1: 24.65
		rouge-2:	P:  5.76	R:  4.66	F1:  4.89
		rouge-3:	P:  1.85	R:  1.37	F1:  1.52
		rouge-4:	P:  0.73	R:  0.47	F1:  0.57
		rouge-l:	P: 24.68	R: 20.42	F1: 21.44
		rouge-w:	P: 14.74	R:  5.23	F1:  7.36
Rogue for T5 Summarization
		rouge-1:	P: 30.20	R: 31.76	F1: 28.18
		rouge-2:	P:  5.99	R:  6.78	F1:  5.85
		rouge-3:	P:  1.87	R:  2.01	F1:  1.83
		rouge-4:	P:  0.80	R:  0.79	F1:  0.79
		rouge-l:	P: 27.06	R: 27.53	F1: 25.56
		rouge-w:	P: 16.43	R:  8.23	F1:  9.84
Rogue for SS Summarization
		rouge-1:	P: 28.20	R: 24.60	F1: 24.93
		rouge-2:	P:  5.81	R:  5.26	F1:  5.35
		rouge-3:	P:  2.22	R:  1.96	F1:  2.05
		rouge-4:	P:  0.99	R:  0.88	F1:  0.93
		rouge-l:	P: 24.

In [12]:
end_time = datetime.now()

In [13]:
print('Duration: {}'.format(end_time - start_time))

Duration: 0:11:04.749577
