In [2]:
%%capture
!pip install datasets rouge-score nltk transformers sentencepiece

In [50]:
#GPU check
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Tue Mar 29 01:50:54 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P0    33W / 250W |   3471MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [49]:
torch.cuda.empty_cache()

In [3]:
# Make sure your version of Transformers is at 4.11.0
# to run the following code correctly:
import datasets
import transformers
import torch

In [4]:
torch_device ='cuda'
#torch_device = 'cpu' (run this line if no gpu attached)

In [None]:
#load original model
from transformers import EncoderDecoderModel
model = EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-uncased", "bert-base-uncased").to(torch_device)

In [6]:
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [7]:
#prefix = "summarize: "
prefix = ""

##XSUM

In [8]:
#load test dataset
test_dataset = datasets.load_dataset('xsum', split='test')

Using custom data configuration default
Reusing dataset xsum (/root/.cache/huggingface/datasets/xsum/default/1.2.0/32c23220eadddb1149b16ed2e9430a05293768cfffbdfd151058697d4c11f934)


In [9]:
small_test = test_dataset.select(list(range(2000)))
small_test

Dataset({
    features: ['document', 'summary', 'id'],
    num_rows: 2000
})

In [10]:
# set special tokens
model.config.decoder_start_token_id = tokenizer.cls_token_id
model.config.eos_token_id = tokenizer.sep_token_id
model.config.pad_token_id = tokenizer.pad_token_id

In [11]:
# generate summary
batch_size = 32
max_input_length=512
max_target_length=64

def generate_summary(batch):
    sentences = [prefix + text for text in batch['document']]
    inputs = tokenizer(sentences, padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [12]:
results = small_test.map(generate_summary, batched=True, batch_size=batch_size, remove_columns=["document"])

  0%|          | 0/63 [00:00<?, ?ba/s]

In [13]:
results

Dataset({
    features: ['summary', 'id', 'pred'],
    num_rows: 2000
})

In [14]:
metric = datasets.load_metric("rouge")
rouge_score = metric.compute(predictions=results["pred"], references=results["summary"], use_stemmer=True)
# Extract a few results
rouge_mean = {key: value.mid.fmeasure * 100 for key, value in rouge_score.items()}

In [15]:
rouge_mean

{'rouge1': 0.22296665889473274,
 'rouge2': 0.0,
 'rougeL': 0.22488414634782963,
 'rougeLsum': 0.2250258805325749}

In [16]:
#load fine-tuned model
!cp '/content/drive/My Drive/weights/bert2bert-xsum.zip' '/content'

In [19]:
!unzip bert2bert-xsum.zip

Archive:  bert2bert-xsum.zip
   creating: checkpoint-25000/
  inflating: checkpoint-25000/special_tokens_map.json  
  inflating: checkpoint-25000/vocab.txt  
  inflating: checkpoint-25000/training_args.bin  
  inflating: checkpoint-25000/optimizer.pt  
  inflating: checkpoint-25000/config.json  
  inflating: checkpoint-25000/tokenizer.json  
  inflating: checkpoint-25000/tokenizer_config.json  
  inflating: checkpoint-25000/scheduler.pt  
  inflating: checkpoint-25000/trainer_state.json  
  inflating: checkpoint-25000/pytorch_model.bin  
  inflating: checkpoint-25000/rng_state.pth  
  inflating: checkpoint-25000/scaler.pt  


In [17]:
finetuned_model = EncoderDecoderModel.from_pretrained('checkpoint-25000').to(torch_device)

In [18]:
# generate summary
batch_size = 16
def finetuned_summary(batch):
    sentences = [prefix + text for text in batch['document']]
    inputs = tokenizer(sentences, padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = finetuned_model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [19]:
finetuned_results = small_test.map(finetuned_summary, batched=True, batch_size=batch_size, remove_columns=["document"])

  0%|          | 0/125 [00:00<?, ?ba/s]

In [20]:
finetuned_score = metric.compute(predictions=finetuned_results["pred"], references=finetuned_results["summary"], use_stemmer=True)
# Extract a few results
finetuned_rouge_mean = {key: value.mid.fmeasure * 100 for key, value in finetuned_score.items()}

In [21]:
rouge_mean

{'rouge1': 0.22296665889473274,
 'rouge2': 0.0,
 'rougeL': 0.22488414634782963,
 'rougeLsum': 0.2250258805325749}

In [22]:
finetuned_rouge_mean

{'rouge1': 27.98110118553714,
 'rouge2': 9.202841131004089,
 'rougeL': 20.73698015393977,
 'rougeLsum': 20.731187037716563}

In [23]:
for i in range(20, 30):
    print("Original Text: %s" % small_test[i]['document'])
    print("\nActual Summary: %s" % results[i]['summary'])
    print("\nOriginal Prediction : %s" % results[i]['pred'])
    print("\nfinetuned Prediction : %s" % finetuned_results[i]["pred"])
    print("=====================================================================\n")

Original Text: Pakistan's telecoms regulator said the ban was no longer necessary because Google, which owns YouTube, had now launched a Pakistan-specific version.
YouTube has denied claims that the authorities can filter content.
Many young Pakistanis have welcomed the lifting of the ban but some activists want details of the deal with Google.
They say there should be greater transparency of the terms agreed between Google and the government.
A Pakistan Telecommunication Authority (PTA) official confirmed to the BBC that all internet service providers had been directed to open access to YouTube.
The Pakistan Telecommunication Company Ltd posted on its Facebook page on Monday: "Welcome Back YouTube".
Pakistan's ministry of information technology said: "Google has provided an online web process through which requests for blocking access of offending material can be made by the PTA to Google directly.
"Google/YouTube will accordingly restrict access to the said offending material for use

##CNN

In [27]:
#load test dataset
test_cnn = datasets.load_dataset('cnn_dailymail', '3.0.0', split='test')

Reusing dataset cnn_dailymail (/root/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/3cb851bf7cf5826e45d49db2863f627cba583cbc32342df7349dfe6c38060234)


In [28]:
small_test = test_cnn.select(list(range(2000)))
small_test

Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 2000
})

In [29]:
# generate summary
batch_size = 32
max_input_length=512
max_target_length=128

def generate_summary(batch):
    sentences = [prefix + text for text in batch['article']]
    inputs = tokenizer(sentences, padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [30]:
results = small_test.map(generate_summary, batched=True, batch_size=batch_size, remove_columns=["article"])

  0%|          | 0/63 [00:00<?, ?ba/s]

In [31]:
results

Dataset({
    features: ['highlights', 'id', 'pred'],
    num_rows: 2000
})

In [32]:
rouge_score = metric.compute(predictions=results["pred"], references=results["highlights"], use_stemmer=True)
# Extract a few results
rouge_mean = {key: value.mid.fmeasure * 100 for key, value in rouge_score.items()}

In [33]:
rouge_mean

{'rouge1': 0.16997281521712082,
 'rouge2': 0.0,
 'rougeL': 0.16660270667901517,
 'rougeLsum': 0.1691854999415212}

In [38]:
#load fine-tuned model
!cp '/content/drive/My Drive/weights/bert2bert-cnn.zip' '/content'

In [39]:
!unzip bert2bert-cnn.zip

Archive:  bert2bert-cnn.zip
   creating: bert2bert-finetuned-cnn/checkpoint-35890/
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/tokenizer_config.json  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/special_tokens_map.json  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/rng_state.pth  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/pytorch_model.bin  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/trainer_state.json  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/config.json  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/scaler.pt  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/vocab.txt  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/optimizer.pt  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/tokenizer.json  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/scheduler.pt  
  inflating: bert2bert-finetuned-cnn/checkpoint-35890/training_args.bin  


In [34]:
finetuned_model = EncoderDecoderModel.from_pretrained('bert2bert-finetuned-cnn/checkpoint-35890').to(torch_device)

In [35]:
# generate summary
batch_size = 8
def finetuned_summary(batch):
    sentences = [prefix + text for text in batch['article']]
    inputs = tokenizer(sentences, padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = finetuned_model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [36]:
finetuned_results = small_test.map(finetuned_summary, batched=True, batch_size=batch_size, remove_columns=["article"])

  0%|          | 0/250 [00:00<?, ?ba/s]

In [37]:
finetuned_score = metric.compute(predictions=finetuned_results["pred"], references=finetuned_results["highlights"], use_stemmer=True)
# Extract a few results
finetuned_rouge_mean = {key: value.mid.fmeasure * 100 for key, value in finetuned_score.items()}

In [38]:
rouge_mean

{'rouge1': 0.16997281521712082,
 'rouge2': 0.0,
 'rougeL': 0.16660270667901517,
 'rougeLsum': 0.1691854999415212}

In [39]:
finetuned_rouge_mean

{'rouge1': 35.92138610985789,
 'rouge2': 14.887069295211388,
 'rougeL': 24.396204081686655,
 'rougeLsum': 30.04363921901078}

In [40]:
for i in range(0, 15):
    print("Original Text: %s" % small_test[i]["article"])
    print("\nActual Summary: %s" % results[i]["highlights"])
    print("\nOriginal Prediction : %s" % results[i]['pred'])
    print("\nfinetuned Prediction : %s" % finetuned_results[i]["pred"])
    print("=====================================================================\n")

Original Text: (CNN)James Best, best known for his portrayal of bumbling sheriff Rosco P. Coltrane on TV's "The Dukes of Hazzard," died Monday after a brief illness. He was 88. Best died in hospice in Hickory, North Carolina, of complications from pneumonia, said Steve Latshaw, a longtime friend and Hollywood colleague. Although he'd been a busy actor for decades in theater and in Hollywood, Best didn't become famous until 1979, when "The Dukes of Hazzard's" cornpone charms began beaming into millions of American homes almost every Friday night. For seven seasons, Best's Rosco P. Coltrane chased the moonshine-running Duke boys back and forth across the back roads of fictitious Hazzard County, Georgia, although his "hot pursuit" usually ended with him crashing his patrol car. Although Rosco was slow-witted and corrupt, Best gave him a childlike enthusiasm that got laughs and made him endearing. His character became known for his distinctive "kew-kew-kew" chuckle and for goofy catchphras

##Wikihow

In [41]:
#load test dataset
test_wiki = datasets.load_dataset("wikihow", "all", "/content/drive/MyDrive/dataset", split='test')

Downloading builder script:   0%|          | 0.00/2.97k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

Using custom data configuration all-6f5101161f12f62f


Downloading and preparing dataset wikihow/all (download: 5.21 MiB, generated: 524.29 MiB, post-processed: Unknown size, total: 529.50 MiB) to /root/.cache/huggingface/datasets/wikihow/all-6f5101161f12f62f/1.2.0/5343fc81d685acaa086c9cc19eb8706206cd1f8b315792b04c1d7b92091c305e...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.27M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/75.2k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/75.2k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/157252 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5599 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5577 [00:00<?, ? examples/s]

Dataset wikihow downloaded and prepared to /root/.cache/huggingface/datasets/wikihow/all-6f5101161f12f62f/1.2.0/5343fc81d685acaa086c9cc19eb8706206cd1f8b315792b04c1d7b92091c305e. Subsequent calls will reuse this data.


In [42]:
small_test = test_wiki.select(list(range(2000)))
small_test

Dataset({
    features: ['text', 'headline', 'title'],
    num_rows: 2000
})

In [43]:
# generate summary
batch_size = 32
max_input_length=512
max_target_length=128

def generate_summary(batch):
    sentences = [prefix + text for text in batch['text']]
    inputs = tokenizer(sentences, padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [44]:
results = small_test.map(generate_summary, batched=True, batch_size=batch_size, remove_columns=["text"])

  0%|          | 0/63 [00:00<?, ?ba/s]

In [45]:
results

Dataset({
    features: ['headline', 'title', 'pred'],
    num_rows: 2000
})

In [46]:
rouge_score = metric.compute(predictions=results["pred"], references=results["headline"], use_stemmer=True)
# Extract a few results
rouge_mean = {key: value.mid.fmeasure * 100 for key, value in rouge_score.items()}

In [47]:
rouge_mean

{'rouge1': 0.009998420990017628,
 'rouge2': 0.0,
 'rougeL': 0.008009476575042336,
 'rougeLsum': 0.009464239749954037}

In [51]:
#load fine-tuned model
!cp '/content/drive/My Drive/weights/bert2bert-wikihow.zip' '/content'

In [52]:
!unzip bert2bert-wikihow.zip

Archive:  bert2bert-wikihow.zip
   creating: bert2bert-finetuned-wiki/checkpoint-19657/
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/tokenizer_config.json  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/special_tokens_map.json  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/rng_state.pth  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/pytorch_model.bin  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/trainer_state.json  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/config.json  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/scaler.pt  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/vocab.txt  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/optimizer.pt  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/tokenizer.json  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/scheduler.pt  
  inflating: bert2bert-finetuned-wiki/checkpoint-19657/training_args.bin  


In [53]:
finetuned_model = EncoderDecoderModel.from_pretrained('bert2bert-finetuned-wiki/checkpoint-19657/').to(torch_device)

In [54]:
# generate summary
batch_size = 8
def finetuned_summary(batch):
    sentences = [prefix + text for text in batch['text']]
    inputs = tokenizer(sentences, padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = finetuned_model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [55]:
finetuned_results = small_test.map(finetuned_summary, batched=True, batch_size=batch_size, remove_columns=["text"])

  0%|          | 0/250 [00:00<?, ?ba/s]

In [56]:
finetuned_score = metric.compute(predictions=finetuned_results["pred"], references=finetuned_results["headline"], use_stemmer=True)
# Extract a few results
finetuned_rouge_mean = {key: value.mid.fmeasure * 100 for key, value in finetuned_score.items()}

In [57]:
rouge_mean

{'rouge1': 0.009998420990017628,
 'rouge2': 0.0,
 'rougeL': 0.008009476575042336,
 'rougeLsum': 0.009464239749954037}

In [58]:
finetuned_rouge_mean

{'rouge1': 29.70853728033289,
 'rouge2': 9.226757974679831,
 'rougeL': 21.0225797056748,
 'rougeLsum': 26.815993171309337}

In [59]:
for i in range(20, 30):
    print("Original Text: %s" % small_test[i]['text'])
    print("\nActual Summary: %s" % results[i]['headline'])
    print("\nOriginal Prediction : %s" % results[i]['pred'])
    print("\nfinetuned Prediction : %s" % finetuned_results[i]["pred"])
    print("=====================================================================\n")

Original Text: In general, dwarf hamsters have thickset bodies, large cheek pouches, and short tails. Make sure it does not have any nasal or eye discharge, nor any other signs of illness. Russian dwarf hamsters are particularly prone to diabetes. A hamster suffering from this condition will drink a lot of water and urinate more frequently than other dwarf hamsters.Be sure to consult your veterinarian if you suspect that your hamster has diabetes.
 A recently weaned or an extremely stressed out dwarf hamster may come down with a disease called “wet tail.” Your hamster experiences diarrhea — the excessive moisture from this causes its tail to become literally wet. Consult a veterinarian for a proper diagnosis and treatment.


Tyzzer’s disease causes diarrhea in young or stressed hamsters. This is a disease that needs veterinarian treatment. Certain antibiotics can cause and exacerbate this condition, so don’t treat your hamster on your own., Like dogs and cats, dwarf hamsters can suffer