In [None]:
#| default_exp summarizer

# SUMMARIZER

> API details.

In [None]:
#|hide
from nbdev.showdoc import *

In [None]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter
from heapq import nlargest

You need to download the language core prior to running this

In [None]:
#nlp = spacy.load('en_core_web_trf')
nlp = spacy.load('en_core_web_sm')

load a sample document

In [None]:
#with open('rofi-twitch.rst', 'r') as f:
#    doc = nlp(f.read())

text = """
I would guess not.

to me, the "libertarian hacker" de facto standard peaked in the mid 00s, when everyone was enthusiastic about the Internet and Ron Paul and Larry Lessig and Information Wanting To Be Free and 09F9 and BitTorrent. then social media happened and now the Pauls are evil Republicans just like the rest of em, free speech is dangerous and should be restricted and limited on all platforms at all costs, and identity politics (of all flavors) are more important than everyone being a pseudonymous handle on the Internet, treated equally by default. there used to be this feeling that the future of the Internet was just going to be fuckin awesome because it would connect people in this crazy libertarianesque online network of computers, and what could come of that but societal advancement, increased creative output, and a general world-improving fair and equal exchange of ideas?

I rarely, if ever, see any of that kind of idealism anymore, both in theory and in practice, and I miss it dearly. 
"""

doc2 = nlp(text)

text2 = """On the day that the tax was announced, share prices fell by 2.2%. But there was leakage of information prior to the announcement, which might explain the 5.35% price decline in the 30 days prior to the announcement. When the tax was doubled, prices again fell by another 1%. These declines were in line with the capitalized value of future tax payments resulting from expected trades. It was further felt that the taxes on fixed-income securities only served to increase the cost of government borrowing, providing another argument against the tax.[1]

Even though the tax on fixed-income securities was much lower than that on equities, the impact on market trading was much more dramatic. During the first week of the tax, the volume of bond trading fell by 85%, even though the tax rate on five-year bonds was only 0.003%. The volume of futures trading fell by 98% and the options trading market disappeared.[1] 60% of the trading volume of the eleven most actively traded Swedish share classes moved to the UK after the announcement in 1986 that the tax rate would double. 30% of all Swedish equity trading moved offshore. By 1990, more than 50% of all Swedish trading had moved to London. Foreign investors reacted to the tax by moving their trading offshore while domestic investors reacted by reducing the number of their equity trades.[2] According to Anders Borg who served as finance minister in the Swedish government from 2006 to 2014, "between 90%-99% of traders in bonds, equities and derivatives moved out of Stockholm to London."""
doc = nlp(text2)

Check the number sentences.

In [None]:
len(list(doc.sents))

11

Filter tokens

In [None]:
keyword = list()
stopwords = list(STOP_WORDS)
pos_tag = ['PROPN', 'ADJ', 'NOUN', 'VERB']
for token in doc:
    if(token.text in stopwords or token.text in punctuation):
        continue
    if(token.pos_ in pos_tag):
        keyword.append(token.text)

Calculate the frequency

In [None]:
freq_word = Counter(keyword)
freq_word.most_common(5)  # top 5

[('tax', 8), ('trading', 8), ('fell', 4), ('Swedish', 4), ('moved', 4)]

Normalization

In [None]:
max_freq = Counter(keyword).most_common(1)[0][1]
for word in freq_word.keys():
    freq_word[word] = (freq_word[word]/max_freq)
freq_word.most_common(5)

[('tax', 1.0),
 ('trading', 1.0),
 ('fell', 0.5),
 ('Swedish', 0.5),
 ('moved', 0.5)]

score the sentences

In [None]:
sent_strength = dict()
for sent in doc.sents:
    for word in sent:
        if word.text in freq_word.keys():
            if sent in sent_strength.keys():
                sent_strength[sent] += freq_word[word.text]
            else:
                sent_strength[sent] = freq_word[word.text]
sent_strength

{On the day that the tax was announced, share prices fell by 2.2%.: 2.25,
 But there was leakage of information prior to the announcement, which might explain the 5.35% price decline in the 30 days prior to the announcement.: 1.5,
 When the tax was doubled, prices again fell by another 1%.: 1.875,
 These declines were in line with the capitalized value of future tax payments resulting from expected trades.: 2.125,
 It was further felt that the taxes on fixed-income securities only served to increase the cost of government borrowing, providing another argument against the tax.[1]
 
 Even though the tax on fixed-income securities was much lower than that on equities, the impact on market trading was much more dramatic.: 5.875,
 During the first week of the tax, the volume of bond trading fell by 85%, even though the tax rate on five-year bonds was only 0.003%.: 4.75,
 The volume of futures trading fell by 98% and the options trading market disappeared.[1] 60% of the trading volume of the

Summarize with nlargest

In [None]:
summarized_sentences = nlargest(3, sent_strength, key=sent_strength.get)
summarized_sentences

[The volume of futures trading fell by 98% and the options trading market disappeared.[1] 60% of the trading volume of the eleven most actively traded Swedish share classes moved to the UK after the announcement in 1986 that the tax rate would double.,
 It was further felt that the taxes on fixed-income securities only served to increase the cost of government borrowing, providing another argument against the tax.[1]
 
 Even though the tax on fixed-income securities was much lower than that on equities, the impact on market trading was much more dramatic.,
 During the first week of the tax, the volume of bond trading fell by 85%, even though the tax rate on five-year bonds was only 0.003%.]

In [None]:
final_sentences = [ w.text for w in summarized_sentences ]
summary = ' '.join(final_sentences)
summary

'The volume of futures trading fell by 98% and the options trading market disappeared.[1] 60% of the trading volume of the eleven most actively traded Swedish share classes moved to the UK after the announcement in 1986 that the tax rate would double. It was further felt that the taxes on fixed-income securities only served to increase the cost of government borrowing, providing another argument against the tax.[1]\n\nEven though the tax on fixed-income securities was much lower than that on equities, the impact on market trading was much more dramatic. During the first week of the tax, the volume of bond trading fell by 85%, even though the tax rate on five-year bonds was only 0.003%.'