In [1]:
%%capture
!pip install datasets rouge-score nltk transformers

In [2]:
# Make sure your version of Transformers is at 4.11.0
# to run the following code correctly:
import datasets
import transformers
import torch

In [6]:
torch_device ='cuda'
#torch_device = 'cpu' (run this line if no gpu attached)

In [4]:
#load original model
from transformers import BartTokenizer, BartForConditionalGeneration

In [5]:
model = BartForConditionalGeneration.from_pretrained('facebook/bart-base').to(torch_device)
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

Downloading:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/532M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

##XSUM

In [3]:
#load test dataset
test_dataset = datasets.load_dataset('xsum', split='test')

Downloading:   0%|          | 0.00/2.05k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/954 [00:00<?, ?B/s]

Using custom data configuration default


Downloading and preparing dataset xsum/default (download: 245.38 MiB, generated: 507.60 MiB, post-processed: Unknown size, total: 752.98 MiB) to /root/.cache/huggingface/datasets/xsum/default/1.2.0/32c23220eadddb1149b16ed2e9430a05293768cfffbdfd151058697d4c11f934...


  0%|          | 0/2 [00:00<?, ?it/s]

Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.00M [00:00<?, ?B/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset xsum downloaded and prepared to /root/.cache/huggingface/datasets/xsum/default/1.2.0/32c23220eadddb1149b16ed2e9430a05293768cfffbdfd151058697d4c11f934. Subsequent calls will reuse this data.


Dataset({
    features: ['document', 'summary', 'id'],
    num_rows: 11334
})

In [7]:
small_test = test_dataset.select(list(range(2000)))
small_test

Dataset({
    features: ['document', 'summary', 'id'],
    num_rows: 2000
})

In [9]:
# generate summary
batch_size = 64
max_input_length=1024
max_target_length=128

def generate_summary(batch):
    inputs = tokenizer(batch["document"], padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [10]:
results = small_test.map(generate_summary, batched=True, batch_size=batch_size, remove_columns=["document"])

  0%|          | 0/32 [00:00<?, ?ba/s]

In [11]:
results

Dataset({
    features: ['summary', 'id', 'pred'],
    num_rows: 2000
})

In [13]:
metric = datasets.load_metric("rouge")
rouge_score = metric.compute(predictions=results["pred"], references=results["summary"], use_stemmer=True)
# Extract a few results
rouge_mean = {key: value.mid.fmeasure * 100 for key, value in rouge_score.items()}

In [14]:
rouge_mean

{'rouge1': 14.62414328638449,
 'rouge2': 1.4515544752033445,
 'rougeL': 11.44741810587041,
 'rougeLsum': 11.449200779698582}

In [21]:
#load fine-tuned model
!cp '/content/drive/My Drive/weights/bart-base-finetuned-xsum.zip' '/content'

In [22]:
!unzip bart-base-finetuned-xsum.zip

Archive:  bart-base-finetuned-xsum.zip
   creating: content/bart-base-finetuned-xsum/checkpoint-50000/
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/training_args.bin  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/pytorch_model.bin  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/tokenizer.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/tokenizer_config.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/rng_state.pth  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/special_tokens_map.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/optimizer.pt  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/scaler.pt  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/scheduler.pt  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/config.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-50000/vocab.json  
  inflating:

In [23]:
!ls content/bart-base-finetuned-xsum/

checkpoint-50000


In [50]:
finetuned_model = BartForConditionalGeneration.from_pretrained('content/bart-base-finetuned-xsum/checkpoint-50000').to(torch_device)

In [48]:
# generate summary

def finetuned_summary(batch):
    inputs = tokenizer(batch["document"], padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = finetuned_model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [51]:
finetuned_results = small_test.map(finetuned_summary, batched=True, batch_size=batch_size, remove_columns=["document"])

  0%|          | 0/32 [00:00<?, ?ba/s]

In [53]:
finetuned_score = metric.compute(predictions=finetuned_results["pred"], references=finetuned_results["summary"], use_stemmer=True)
# Extract a few results
finetuned_rouge_mean = {key: value.mid.fmeasure * 100 for key, value in finetuned_score.items()}

In [56]:
rouge_mean

{'rouge1': 14.62414328638449,
 'rouge2': 1.4515544752033445,
 'rougeL': 11.44741810587041,
 'rougeLsum': 11.449200779698582}

In [55]:
finetuned_rouge_mean

{'rouge1': 37.087308264359066,
 'rouge2': 15.985963309791218,
 'rougeL': 30.603897089190102,
 'rougeLsum': 30.587630970580626}

In [61]:
for i in range(20, 30):
    print("Original Text: %s" % small_test[i]['document'])
    print("\nActual Summary: %s" % results[i]['summary'])
    print("\nOriginal Prediction : %s" % results[i]['pred'])
    print("\nfinetuned Prediction : %s" % finetuned_results[i]["pred"])
    print("=====================================================================\n")

Original Text: Pakistan's telecoms regulator said the ban was no longer necessary because Google, which owns YouTube, had now launched a Pakistan-specific version.
YouTube has denied claims that the authorities can filter content.
Many young Pakistanis have welcomed the lifting of the ban but some activists want details of the deal with Google.
They say there should be greater transparency of the terms agreed between Google and the government.
A Pakistan Telecommunication Authority (PTA) official confirmed to the BBC that all internet service providers had been directed to open access to YouTube.
The Pakistan Telecommunication Company Ltd posted on its Facebook page on Monday: "Welcome Back YouTube".
Pakistan's ministry of information technology said: "Google has provided an online web process through which requests for blocking access of offending material can be made by the PTA to Google directly.
"Google/YouTube will accordingly restrict access to the said offending material for use

##CNN

In [59]:
#load test dataset
test_cnn = datasets.load_dataset('cnn_dailymail', '3.0.0', split='test')

Downloading:   0%|          | 0.00/3.51k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

Downloading and preparing dataset cnn_dailymail/3.0.0 (download: 558.32 MiB, generated: 1.28 GiB, post-processed: Unknown size, total: 1.82 GiB) to /root/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/3cb851bf7cf5826e45d49db2863f627cba583cbc32342df7349dfe6c38060234...


  0%|          | 0/5 [00:00<?, ?it/s]

Downloading:   0%|          | 0.00/159M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/376M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/572k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/12.3M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/661k [00:00<?, ?B/s]

  0%|          | 0/5 [00:00<?, ?it/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset cnn_dailymail downloaded and prepared to /root/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/3cb851bf7cf5826e45d49db2863f627cba583cbc32342df7349dfe6c38060234. Subsequent calls will reuse this data.


In [62]:
small_test = test_cnn.select(list(range(2000)))
small_test

Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 2000
})

In [63]:
# generate summary
batch_size = 64
max_input_length=1024
max_target_length=128

def generate_summary(batch):
    inputs = tokenizer(batch["article"], padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [64]:
results = small_test.map(generate_summary, batched=True, batch_size=batch_size, remove_columns=["article"])

  0%|          | 0/32 [00:00<?, ?ba/s]

In [65]:
results

Dataset({
    features: ['highlights', 'id', 'pred'],
    num_rows: 2000
})

In [66]:
rouge_score = metric.compute(predictions=results["pred"], references=results["highlights"], use_stemmer=True)
# Extract a few results
rouge_mean = {key: value.mid.fmeasure * 100 for key, value in rouge_score.items()}

In [67]:
rouge_mean

{'rouge1': 18.79545887515891,
 'rouge2': 5.993962777104293,
 'rougeL': 14.59186726686926,
 'rougeLsum': 16.829598898040754}

In [None]:
#load fine-tuned model
!cp '/content/drive/My Drive/weights/bart-base-finetuned-cnn.zip' '/content'

In [68]:
!ls content/bart-base-finetuned-cnn/

checkpoint-70000


In [69]:
finetuned_model = BartForConditionalGeneration.from_pretrained('content/bart-base-finetuned-cnn/checkpoint-70000').to(torch_device)

In [71]:
# generate summary

def finetuned_summary(batch):
    inputs = tokenizer(batch["article"], padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = finetuned_model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [72]:
finetuned_results = small_test.map(finetuned_summary, batched=True, batch_size=batch_size, remove_columns=["article"])

  0%|          | 0/32 [00:00<?, ?ba/s]

In [73]:
finetuned_score = metric.compute(predictions=finetuned_results["pred"], references=finetuned_results["highlights"], use_stemmer=True)
# Extract a few results
finetuned_rouge_mean = {key: value.mid.fmeasure * 100 for key, value in finetuned_score.items()}

In [74]:
rouge_mean

{'rouge1': 18.79545887515891,
 'rouge2': 5.993962777104293,
 'rougeL': 14.59186726686926,
 'rougeLsum': 16.829598898040754}

In [75]:
finetuned_rouge_mean

{'rouge1': 25.18524875514095,
 'rouge2': 11.749750296991015,
 'rougeL': 20.917009625620153,
 'rougeLsum': 23.44655136901535}

In [78]:
for i in range(0, 15):
    print("Original Text: %s" % small_test[i]["article"])
    print("\nActual Summary: %s" % results[i]["highlights"])
    print("\nOriginal Prediction : %s" % results[i]['pred'])
    print("\nfinetuned Prediction : %s" % finetuned_results[i]["pred"])
    print("=====================================================================\n")

Original Text: (CNN)James Best, best known for his portrayal of bumbling sheriff Rosco P. Coltrane on TV's "The Dukes of Hazzard," died Monday after a brief illness. He was 88. Best died in hospice in Hickory, North Carolina, of complications from pneumonia, said Steve Latshaw, a longtime friend and Hollywood colleague. Although he'd been a busy actor for decades in theater and in Hollywood, Best didn't become famous until 1979, when "The Dukes of Hazzard's" cornpone charms began beaming into millions of American homes almost every Friday night. For seven seasons, Best's Rosco P. Coltrane chased the moonshine-running Duke boys back and forth across the back roads of fictitious Hazzard County, Georgia, although his "hot pursuit" usually ended with him crashing his patrol car. Although Rosco was slow-witted and corrupt, Best gave him a childlike enthusiasm that got laughs and made him endearing. His character became known for his distinctive "kew-kew-kew" chuckle and for goofy catchphras

##Wikihow

In [79]:
#load test dataset
test_wiki = datasets.load_dataset("wikihow", "all", "/content/drive/MyDrive/dataset", split='test')

Downloading:   0%|          | 0.00/2.97k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

Using custom data configuration all-6f5101161f12f62f


Downloading and preparing dataset wikihow/all (download: 5.21 MiB, generated: 524.29 MiB, post-processed: Unknown size, total: 529.50 MiB) to /root/.cache/huggingface/datasets/wikihow/all-6f5101161f12f62f/1.2.0/5343fc81d685acaa086c9cc19eb8706206cd1f8b315792b04c1d7b92091c305e...


  0%|          | 0/3 [00:00<?, ?it/s]

Downloading:   0%|          | 0.00/1.27M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/75.2k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/75.2k [00:00<?, ?B/s]

  0%|          | 0/3 [00:00<?, ?it/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset wikihow downloaded and prepared to /root/.cache/huggingface/datasets/wikihow/all-6f5101161f12f62f/1.2.0/5343fc81d685acaa086c9cc19eb8706206cd1f8b315792b04c1d7b92091c305e. Subsequent calls will reuse this data.


In [81]:
small_test = test_wiki.select(list(range(2000)))
small_test

Dataset({
    features: ['text', 'headline', 'title'],
    num_rows: 2000
})

In [82]:
# generate summary
batch_size = 64
max_input_length=1024
max_target_length=128

def generate_summary(batch):
    inputs = tokenizer(batch["text"], padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [83]:
results = small_test.map(generate_summary, batched=True, batch_size=batch_size, remove_columns=["text"])

  0%|          | 0/32 [00:00<?, ?ba/s]

In [84]:
results

Dataset({
    features: ['headline', 'title', 'pred'],
    num_rows: 2000
})

In [85]:
rouge_score = metric.compute(predictions=results["pred"], references=results["headline"], use_stemmer=True)
# Extract a few results
rouge_mean = {key: value.mid.fmeasure * 100 for key, value in rouge_score.items()}

In [86]:
rouge_mean

{'rouge1': 15.751977306061498,
 'rouge2': 3.1952431535948698,
 'rougeL': 12.0711709329583,
 'rougeLsum': 14.391696891844092}

In [87]:
#load fine-tuned model
!cp '/content/drive/My Drive/weights/bart-base-finetuned-wiki.zip' '/content'

In [88]:
!unzip bart-base-finetuned-wiki.zip

Archive:  bart-base-finetuned-wiki.zip
   creating: content/bart-base-finetuned-xsum/checkpoint-39000/
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/special_tokens_map.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/vocab.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/training_args.bin  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/optimizer.pt  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/config.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/tokenizer.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/tokenizer_config.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/scheduler.pt  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/trainer_state.json  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/pytorch_model.bin  
  inflating: content/bart-base-finetuned-xsum/checkpoint-39000/rng_state.pth  
  i

In [91]:
!ls content/bart-base-finetuned-xsum/

checkpoint-39000  checkpoint-50000


In [92]:
finetuned_model = BartForConditionalGeneration.from_pretrained('content/bart-base-finetuned-xsum/checkpoint-39000').to(torch_device)

In [93]:
# generate summary

def finetuned_summary(batch):
    inputs = tokenizer(batch["text"], padding="max_length", truncation=True, max_length=max_input_length, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch_device)
    attention_mask = inputs.attention_mask.to(torch_device)

    outputs = finetuned_model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch["pred"] = output_str
    return batch

In [94]:
finetuned_results = small_test.map(finetuned_summary, batched=True, batch_size=batch_size, remove_columns=["text"])

  0%|          | 0/32 [00:00<?, ?ba/s]

In [95]:
finetuned_score = metric.compute(predictions=finetuned_results["pred"], references=finetuned_results["headline"], use_stemmer=True)
# Extract a few results
finetuned_rouge_mean = {key: value.mid.fmeasure * 100 for key, value in finetuned_score.items()}

In [96]:
rouge_mean

{'rouge1': 15.751977306061498,
 'rouge2': 3.1952431535948698,
 'rougeL': 12.0711709329583,
 'rougeLsum': 14.391696891844092}

In [97]:
finetuned_rouge_mean

{'rouge1': 25.60128280408085,
 'rouge2': 10.591335967201598,
 'rougeL': 21.726969404451882,
 'rougeLsum': 25.02461838299319}

In [98]:
for i in range(20, 30):
    print("Original Text: %s" % small_test[i]['text'])
    print("\nActual Summary: %s" % results[i]['headline'])
    print("\nOriginal Prediction : %s" % results[i]['pred'])
    print("\nfinetuned Prediction : %s" % finetuned_results[i]["pred"])
    print("=====================================================================\n")

Original Text: In general, dwarf hamsters have thickset bodies, large cheek pouches, and short tails. Make sure it does not have any nasal or eye discharge, nor any other signs of illness. Russian dwarf hamsters are particularly prone to diabetes. A hamster suffering from this condition will drink a lot of water and urinate more frequently than other dwarf hamsters.Be sure to consult your veterinarian if you suspect that your hamster has diabetes.
 A recently weaned or an extremely stressed out dwarf hamster may come down with a disease called “wet tail.” Your hamster experiences diarrhea — the excessive moisture from this causes its tail to become literally wet. Consult a veterinarian for a proper diagnosis and treatment.


Tyzzer’s disease causes diarrhea in young or stressed hamsters. This is a disease that needs veterinarian treatment. Certain antibiotics can cause and exacerbate this condition, so don’t treat your hamster on your own., Like dogs and cats, dwarf hamsters can suffer