In [24]:
import pandas as pd
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from heapq import nlargest
import evaluate

In [4]:
def generate_summary(text, n=5):
    sentences = sent_tokenize(text)
    
    tfidf = TfidfVectorizer(stop_words='english')
    matrix = tfidf.fit_transform(sentences)
    
    scores = cosine_similarity(matrix[-1], matrix[:-1])[0]
    summary_sentences = nlargest(n, range(len(scores)), key=scores.__getitem__)
    
    summary_tfidf = ' '.join([sentences[i] for i in sorted(summary_sentences)])
    
    return summary_tfidf

In [5]:
text = '''
Weather is the day-to-day or hour-to-hour change in the atmosphere. 
Weather includes wind, lightning, storms, hurricanes, tornadoes (also known as twisters), rain, hail, snow, and lots more. 
Energy from the Sun affects the weather too. 
Climate tells us what kinds of weather usually happen in an area at different times of the year. 
Changes in weather can affect our mood and life. We wear different clothes and do different things in different weather conditions. 
We choose different foods in different seasons.
Weather stations around the world measure different parts of weather. 
Ways to measure weather are wind speed, wind direction, temperature and humidity. 
People try to use these measurements to make weather forecasts for the future. 
These people are scientists that are called meteorologists. 
They use computers to build large mathematical models to follow weather trends.'''

summary = generate_summary(text, 5)

print(summary)

Energy from the Sun affects the weather too. Changes in weather can affect our mood and life. We wear different clothes and do different things in different weather conditions. Weather stations around the world measure different parts of weather. People try to use these measurements to make weather forecasts for the future.


In [6]:
df = pd.read_csv('dataset/cnn_dailymail/test.csv')

In [7]:
df.head()

Unnamed: 0,id,article,highlights
0,92c514c913c0bdfe25341af9fd72b29db544099b,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...
1,2003841c7dc0e7c5b1a248f9cd536d727f27a45a,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
2,91b7d2311527f5c2b63a65ca98d21d9c92485149,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,caabf9cbdf96eb1410295a673e953d304391bfbb,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...
4,3da746a7d9afcaa659088c8366ef6347fe6b53ea,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6..."


In [8]:
df.drop('id', axis=1, inplace=True)

In [9]:
df.head()

Unnamed: 0,article,highlights
0,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...
1,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
2,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...
4,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6..."


In [10]:
df.shape

(11490, 2)

In [11]:
df['article'][26]

"A bomb technician has used his expertise to create an Easter egg hunt for blind children - using beeping eggs. David Hyche, now a special agent at the Bureau of Alcohol, Tobacco and Firearms (ATF), developed the technology nine years ago when his four-month-old daughter turned blind. A transmitter is placed inside plastic eggs that let off a high-pitched beeping sound. Now Hyche has passed on the idea to the Alabama Institute for Deaf and Blind in Birmingham. Festive invention: Blind children in Birmingham, Alabama, have celebrated Easter early by hunting for plastic eggs that let of a beeping sound. They trade the eggs in for candy . The device: Bomb expert David Hyche put a transmitter in a plastic egg so his daughter could join the fun . 'With my daughter, one of her first phrases was, 'I do it myself.' She wants to do it by herself and most of these kids want to do that too,' he told WBRC. 'It's logical for people who are explosives investigators. It's just a battery, beeper and a

In [12]:
df['highlights'][26]

'Bomb expert David Hyche created the eggs when his daughter turned blind .\nThe children follow sound of a beeper in a plastic egg, trade it for candy .\nHe has now passed on the idea to Alabama Institute for Deaf and Blind .'

In [13]:
generate_summary(df['article'][26], 5)

"A bomb technician has used his expertise to create an Easter egg hunt for blind children - using beeping eggs. Now Hyche has passed on the idea to the Alabama Institute for Deaf and Blind in Birmingham. Festive invention: Blind children in Birmingham, Alabama, have celebrated Easter early by hunting for plastic eggs that let of a beeping sound. 'We love to give the opportunity for our children with disabilities and their families to come out and have the same opportunity as everybody else does,' said Tamara Harrison with the Alabama Institute. Hyche (pictured) came up with the device when his daughter turned blind nine years ago ."

In [14]:
df['summary'] = df['article'].apply(generate_summary)

In [15]:
df.head()

Unnamed: 0,article,highlights,summary
0,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...,Tests conducted by the FAA use planes with a 3...
1,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...,A drunk teenage boy had to be rescued by secur...
2,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of agreeing a ...
3,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...,Liverpool target Neto is also wanted by PSG an...
4,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6...",And while the Kardashian women are known for e...


In [16]:
def compute_rouge_score(generated, reference):
    rouge = evaluate.load("rouge")
    #We need to add '\n' to each line before send it to ROUGE
    generated_ = ["\n".join(sent_tokenize(s.strip())) for s in generated]
    reference_ = ["\n".join(sent_tokenize(s.strip())) for s in reference]
    
    return rouge.compute(
        predictions=generated_,
        references=reference_,
        use_stemmer=True,
    )

In [20]:
print(compute_rouge_score(df['highlights'], df['summary']))

{'rouge1': np.float64(0.312049844034635),
 'rouge2': np.float64(0.11843451553002296),
 'rougeL': np.float64(0.19533262045750047),
 'rougeLsum': np.float64(0.2871502398185847)}

In [28]:
compute_rouge_score([df['highlights'][26]], [df['summary'][26]])

{'rouge1': np.float64(0.44897959183673464),
 'rouge2': np.float64(0.20689655172413796),
 'rougeL': np.float64(0.24489795918367346),
 'rougeLsum': np.float64(0.40816326530612246)}

In [27]:
print([df['highlights'][26]], [df['summary'][26]])

['Bomb expert David Hyche created the eggs when his daughter turned blind .\nThe children follow sound of a beeper in a plastic egg, trade it for candy .\nHe has now passed on the idea to Alabama Institute for Deaf and Blind .'] ["A bomb technician has used his expertise to create an Easter egg hunt for blind children - using beeping eggs. Now Hyche has passed on the idea to the Alabama Institute for Deaf and Blind in Birmingham. Festive invention: Blind children in Birmingham, Alabama, have celebrated Easter early by hunting for plastic eggs that let of a beeping sound. 'We love to give the opportunity for our children with disabilities and their families to come out and have the same opportunity as everybody else does,' said Tamara Harrison with the Alabama Institute. Hyche (pictured) came up with the device when his daughter turned blind nine years ago ."]
