# 0. Install dependencies

In [None]:
# Install PyTorch
!pip install torch==1.8.2+cu111 torchvision==0.9.2+cu111 torchaudio===0.8.2 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html

In [None]:
# Install transformers
!pip install transformers

# 1. Import and Load Model

In [38]:
# Importing dependencies from transformers
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

In [39]:
# Load tokenizer 
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")

In [40]:
# Load model 
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")

# 2. Perform Abstractive Summarization

In [41]:
text = """
Python is an interpreted high-level general-purpose programming language. Its design philosophy emphasizes code readability with its use of significant indentation. Its language constructs as well as its object-oriented approach aim to help programmers write clear, logical code for small and large-scale projects.[30]

Python is dynamically-typed and garbage-collected. It supports multiple programming paradigms, including structured (particularly, procedural), object-oriented and functional programming. It is often described as a "batteries included" language due to its comprehensive standard library.[31]

Guido van Rossum began working on Python in the late 1980s, as a successor to the ABC programming language, and first released it in 1991 as Python 0.9.0.[32] Python 2.0 was released in 2000 and introduced new features, such as list comprehensions and a garbage collection system using reference counting. Python 3.0 was released in 2008 and was a major revision of the language that is not completely backward-compatible. Python 2 was discontinued with version 2.7.18 in 2020.[33]

Python consistently ranks as one of the most popular programming languages.[34][35][36][37]"""

In [42]:
# Create tokens - number representation of our text
tokens = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")

In [44]:
# Input tokens
tokens

{'input_ids': tensor([[11994,   117,   142, 15186,   281,   121,  3393,   956,   121, 14621,
          3661,  1261,   107,  3096,   354,  4679, 18390,   929, 39331,   122,
           203,   207,   113,  1225, 58339,   107,  3096,  1261, 38059,   130,
           210,   130,   203,  2951,   121,  8321,  1014,  2560,   112,   225,
         19003,  1094,   786,   108,  8789,   929,   118,   360,   111,   423,
           121,  5129,   844,   107,  4101,  4311,  1100, 11994,   117, 22717,
           121,  7155,   252,   111,  9041,   121, 83800,   107,   168,  3000,
          1079,  3661, 50877,   108,   330,  7314,   143, 24899,   108, 22000,
           312,  2951,   121,  8321,   111,  3819,  3661,   107,   168,   117,
           432,  2540,   130,   114,   198, 18077,   144, 32300,   953,   194,
          1261,   640,   112,   203,  2250,   971,  2400,   107,  4101, 10822,
          1100, 58937,  4406,  7366,  3707,  1219,   375,   124, 11994,   115,
           109,  1095,  5940,   116,  

In [45]:
# Summarize 
summary = model.generate(**tokens)

In [47]:
# Output summary tokens
summary[0]

tensor([    0, 11994,   117,   114,  3661,  1261,  1184,   141, 58937,  4406,
         7366,  3707,   107,     1])

In [48]:
# Decode summary
tokenizer.decode(summary[0])

'Python is a programming language developed by Guido van Rossum.'