In [1]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter
from heapq import nlargest

Init Plugin
Init Graph Optimizer
Init Kernel


In [2]:
nlp = spacy.load('en_core_web_lg')

In [3]:
text = '''
Johannes Gutenberg (1398 – 1468) was a German goldsmith and publisher who introduced printing to Europe. His introduction of mechanical movable type printing to Europe started the Printing Revolution and is widely regarded as the most important event of the modern period. It played a key role in the scientific revolution and laid the basis for the modern knowledge-based economy and the spread of learning to the masses.

Gutenberg's many contributions to printing are: the invention of a process for mass-producing movable type, the use of oil-based ink for printing books, adjustable molds, and the use of a wooden printing press. His truly epochal invention was the combination of these elements into a practical system that allowed the mass production of printed books and was economically viable for printers and readers alike.

In Renaissance Europe, the arrival of mechanical movable type printing introduced the era of mass communication which permanently altered the structure of society. The relatively unrestricted circulation of information—including revolutionary ideas—transcended borders, and captured the masses in the Reformation. The sharp increase in literacy broke the monopoly of the literate elite on education and learning and bolstered the emerging middle class.
'''

In [4]:
doc = nlp(text)

In [5]:
keywords = []
stopwords = list(STOP_WORDS)
pos_tag = ['ADJ','PROPN','NOUN','VERB']
for token in doc:
    if(token.text in stopwords or token.text in punctuation):
        continue
    if(token.pos_ in pos_tag):
        keywords.append(token.text)

In [6]:
freq_word = Counter(keywords)
freq_word.most_common(5)

[('printing', 6), ('Europe', 3), ('movable', 3), ('type', 3), ('Gutenberg', 2)]

In [7]:
max_freq = Counter(keywords).most_common(1)[0][1]
for word in freq_word.keys():
    freq_word[word] = freq_word[word]/max_freq
freq_word.most_common(5)

[('printing', 1.0),
 ('Europe', 0.5),
 ('movable', 0.5),
 ('type', 0.5),
 ('Gutenberg', 0.3333333333333333)]

In [8]:
sent_strength={}
for sent in doc.sents:
    for word in sent:
        if word.text in freq_word.keys():
            if sent in sent_strength.keys():
                sent_strength[sent]+=freq_word[word.text]
            else:
                sent_strength[sent] = freq_word[word.text]
print(sent_strength)

{
Johannes Gutenberg (1398 – 1468) was a German goldsmith and publisher who introduced printing to Europe.: 2.833333333333333, His introduction of mechanical movable type printing to Europe started the Printing Revolution and is widely regarded as the most important event of the modern period.: 4.499999999999999, It played a key role in the scientific revolution and laid the basis for the modern knowledge-based economy and the spread of learning to the masses.

: 3.0, Gutenberg's many contributions to printing are: the invention of a process for mass-producing movable type, the use of oil-based ink for printing books, adjustable molds, and the use of a wooden printing press.: 7.833333333333334, His truly epochal invention was the combination of these elements into a practical system that allowed the mass production of printed books and was economically viable for printers and readers alike.

: 2.833333333333333, In Renaissance Europe, the arrival of mechanical movable type printing int

In [9]:
summarized = nlargest(3,sent_strength, key=sent_strength.get)
print(' '.join([w.text for w in summarized]))

Gutenberg's many contributions to printing are: the invention of a process for mass-producing movable type, the use of oil-based ink for printing books, adjustable molds, and the use of a wooden printing press. In Renaissance Europe, the arrival of mechanical movable type printing introduced the era of mass communication which permanently altered the structure of society. His introduction of mechanical movable type printing to Europe started the Printing Revolution and is widely regarded as the most important event of the modern period.


In [10]:
from transformers import BartTokenizer, BartForConditionalGeneration
model_name = 'facebook/bart-large-cnn'
input_text = text

tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)
inputs = tokenizer.batch_encode_plus(
    [input_text], 
    return_tensors='pt', 
    max_length=1024, 
    truncation=True
)
summary_ids = model.generate(
    inputs['input_ids'], 
    num_beams=4, 
    max_length=100, 
    early_stopping=True
)
summary = tokenizer.decode(summary_ids.squeeze(), skip_special_tokens=True)
print("Summary:", summary)


Summary: Johannes Gutenberg (1398 – 1468) was a German goldsmith and publisher. His introduction of mechanical movable type printing to Europe started the Printing Revolution and is widely regarded as the most important event of the modern period. It played a key role in the scientific revolution and laid the basis for the modern knowledge-based economy.
