#0. Install dependencies

In [1]:
# Install PyTorch
!pip3 install torch==1.12.1 torchvision==0.13.1 torchaudio===0.12.1 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cpu

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/, https://download.pytorch.org/whl/lts/1.8/cpu


In [2]:
# Install transformers
!pip install transformers


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


#1. Import and Load Model

In [4]:
# Importing dependencies from transformers
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

In [5]:
# Load tokenizer 
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")

Downloading:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

In [6]:
# Load model 
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")

Downloading:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

#2. Perform Abstractive Summarization

In [13]:
text = """
Jakarta - Sejak kasus COVID-19 melandai, beberapa negara kini telah membuka border untuk para turis. Hal ini tentunya menjadi kabar baik buat para pelancong Indonesia yang rindu bepergian ke luar negeri.
Buat kamu yang sudah tak sabar melancong ke negeri impian, kamu bisa lho liburan namun tetap hemat. Salah satunya dengan memanfaatkan promo menarik di travel fair. Pasalnya, travel fair biasanya menghadirkan banyak destinasi yang sedang promo hingga travel agent yang menawarkan harga dan paket menarik.

Bukan hanya itu, ada juga potongan harga yang bisa berupa diskon, voucher, atau bahkan cashback. Dengan begitu, kamu bisa mendapatkan tiket pesawat dan paket wisata dengan harga yang lebih terjangkau.

Baca artikel detiknews, "Pergi ke Luar Negeri Makin Hemat Pakai Promo Travel Fair, Ada Cicilan 0%!" selengkapnya https://news.detik.com/adv-nhl-detikcom/d-6302792/pergi-ke-luar-negeri-makin-hemat-pakai-promo-travel-fair-ada-cicilan-0.

Download Apps Detikcom Sekarang https://apps.detik.com/detik/"""

In [14]:
# Create tokens - number representation of our text
tokens = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")

In [15]:
# Input tokens
tokens

{'input_ids': tensor([[24367,   233,  8653, 40419,  9994,   116,  1579,  4585, 44078, 11545,
           213,  2567,  8025,   108,   129,  6311, 20612, 57905,  7816,   110,
         58367,   110,   144, 76467,   213, 28929,  3934,  3660, 39418,  8357,
         31010,  1659,   107, 11816,   115,   457,  7276, 65313,  1024,  6186,
          5106,  9994,  4839,   110, 76542, 24181,  2130,  8357, 41576,  1321,
         65524,  7226, 21423, 37861,  1858,   129,  3752, 40689, 13793, 23853,
          2915, 57905, 17206,   107, 14751,  2130, 95741, 21423,   110,   116,
         66165, 39569, 10446,  4839,   213,  6965, 65524, 13793, 57905, 17206,
         18791,  3262,   108, 95741, 76218,  3834,  7462, 19651, 45355,  7936,
         21418, 12544, 24454,   178, 11771,   107, 41069,  4199, 65313, 45665,
           213,  1121, 10368,  2130,  9504,  6720,  1024,   304, 20716,  4218,
           905,  2335,   107, 22694,  1114, 19228,   108,   905,  2335,  9983,
         28717,  1024, 34665, 20728,  

In [16]:
# Summarize 
summary = model.generate(**tokens)



In [17]:
# Output summary tokens
summary[0]

tensor([    0,  8732,  2915, 31126, 17206,   285, 11771,  7552, 18320,   121,
        88033,   121, 18134,   121, 17086,   121,  9094,   121, 32047, 71520,
          110,   144, 76467,   213, 28929,  3934,  3660,  1321,  8357, 31010,
         1659,   107,     1])

In [18]:
# Decode summary
tokenizer.decode(summary[0])

'Luar Negeri Hemat Pakai-promo-travel-fair-ada-cicilan telah membuka borderan para turis.'

#save Model

In [20]:
import joblib
with open('summary.pkl', 'wb') as file_1:
  joblib.dump(summary, file_1)

In [25]:
with open('model.pkl', 'wb') as file_2:
  joblib.dump(summary, file_2)