## Smart Summarizer using Abstractive method : BART

### Install Libraries

In [2]:
!pip install tensorflow==2.11
!pip install rouge
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==2.11
  Downloading tensorflow-2.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (588.3 MB)
[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m535.8/588.3 MB[0m [31m103.5 MB/s[0m eta [36m0:00:01[0m
[?25h[31mERROR: Operation cancelled by user[0m[31m
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting contractions
  Downloading contractions-0.1.73-py2.py3-none-any.whl (8.7 kB)
Collecting textsearch>=0.0.21
  Downloading textsearch-0.0.24-py2.py3-none-any.whl (7.6 kB)
Collecting anyascii
  Downloading anyascii-0.3.2-py3-none

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from rouge import Rouge
rouge = Rouge()
import transformers   
from transformers import BartTokenizer, BartForConditionalGeneration

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


### Data Reading

In [5]:
test = pd.read_csv("/content/drive/MyDrive/data/test.csv", nrows=100000)

### BART Implementation

In [6]:
# Load pre-trained BART tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

In [23]:
def generate_summary(article):
    input_ids = tokenizer.encode(article, return_tensors='pt')
    output = bart_model.generate(
        input_ids,
        max_length=100,
        num_beams=4,
        no_repeat_ngram_size=3,
        early_stopping=True
    )
    bart_summary = tokenizer.decode(output[0], skip_special_tokens=True)
    return bart_summary

# Generate summaries for first 3 articles and evaluate performance using ROUGE scores
rouge = Rouge()
for i in range(3):
    article = test['article'][i]
    reference_summary = test['highlights'][i]
    generated_summary = generate_summary(article)
    scores = rouge.get_scores(generated_summary, reference_summary)
    print('Article', i+1, 'ROUGE Scores:')
    print(scores)
    print('') # blank line between scores

Article 1 ROUGE Scores:
[{'rouge-1': {'r': 0.36363636363636365, 'p': 0.25, 'f': 0.2962962914677641}, 'rouge-2': {'r': 0.15151515151515152, 'p': 0.09090909090909091, 'f': 0.11363635894886383}, 'rouge-l': {'r': 0.36363636363636365, 'p': 0.25, 'f': 0.2962962914677641}}]

Article 2 ROUGE Scores:
[{'rouge-1': {'r': 0.5588235294117647, 'p': 0.4418604651162791, 'f': 0.4935064885748019}, 'rouge-2': {'r': 0.3142857142857143, 'p': 0.2391304347826087, 'f': 0.2716049333638166}, 'rouge-l': {'r': 0.5294117647058824, 'p': 0.4186046511627907, 'f': 0.46753246260077597}}]

Article 3 ROUGE Scores:
[{'rouge-1': {'r': 0.42857142857142855, 'p': 0.32432432432432434, 'f': 0.3692307643266272}, 'rouge-2': {'r': 0.16129032258064516, 'p': 0.11904761904761904, 'f': 0.13698629648339292}, 'rouge-l': {'r': 0.39285714285714285, 'p': 0.2972972972972973, 'f': 0.3384615335573965}}]

