# **Summarization: Summarizing news articles with the help of transformers summarization**

In [1]:
!pip install transformers datasets sacrebleu rouge_score py7zr -q
!pip install nltk rouge_score



### Import all necessary libraries

In [12]:
import gradio as gr
from transformers import pipeline

import matplotlib.pyplot as plt

from datasets import load_metric

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

import nltk
from nltk.tokenize import sent_tokenize

import pandas as pd



from rouge_score import rouge_scorer


### Reading the CSV file with the news articles needed for this demonstration

In [24]:
#Read comment in next column!!!

df= pd.read_csv("Summarization_Training.csv", sep=";")
df_test= pd.read_csv("Summarization_Test.csv", sep=";")



### Randomly selecting 30 articles and adding them to a list for later selection

In [25]:
#Randomly select 15 articles from the CSV-file
random_rows = df.sample(n=30, random_state=42)

#Create a list of the 15 articles
highlights = random_rows['highlights'].to_list()
print(highlights)

['Seven demonstrators were arrested for trying to enter the parade path on Sixth Avenue .\nThe hashtag #StopTheParade was used\xa0on Twitter Wednesday night to organize the protest .\nAbout 50 demonstrators met outside New York Public Library on Fifth Avenue before the parade .\nThousands of people spent the night blocking traffic around the city and chanting slogans .\nLaw enforcement sources told The Post a lack of arrests during other protests this week inspired the movement .', 'A monkey and gorilla walk into Dublin zoo - and become the best of mates .\nThe red-capped African mangabey only recently arrived at the zoo .\nBut before long, the pair appeared to be hugging and having a chit chat .', 'Clashes continue in Venezuela .\nPresident Nicolas Maduro says he has a peace plan .\nThree people have been killed in Caracas violence .', 'Time is 5:20 at St Michael and All Angels Church in West Yorkshire .\nHealth and safety ban has been imposed on winding Victorian clock .\nRetired pol

### Initializing the summarization pipeline

In [15]:
#Initialize the summarization pipeline
sum_bart = pipeline("summarization", model="facebook/bart-large-cnn", truncation=True)
sum_pegasus =  pipeline("summarization", model="google/pegasus-cnn_dailymail",truncation=True)
sum_distilbart =  pipeline("summarization", model="sshleifer/distilbart-cnn-12-6",truncation=True )

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Calculating Rouge Scores

In [16]:
text = df["article"][0]
df["highlights"][0]

'John and .\nAudrey Cook were discovered alongside their daughter, Maureen .\nThey were found at Tremarle Home Park in Cornwall .\nInvestigators say the three died of carbon monoxide .\npoisoning .'

In [17]:
pipe_out = sum_distilbart(text)
pipe_out[0]["summary_text"]

" Bodies of John and Audrey Cook were discovered alongside their daughter, Maureen . Investigators say the three died along with the family's pet dog, of carbon monoxide poisoning from a cooker . The inquests have now opened into the deaths last Saturday in Camborne, Cornwall . It is also believed there was no working carbon . monoxide detector in the static caravan ."

In [19]:
    reference_summary = df["highlights"][0]  # Gerçek bir referans özeti ekleyin
    generated_summary = pipe_out[0]["summary_text"]
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference_summary, generated_summary)
    print("ROUGE-1 F1 Score:", scores['rouge1'].fmeasure)
    print("ROUGE-2 F1 Score:", scores['rouge2'].fmeasure)
    print("ROUGE-L F1 Score:", scores['rougeL'].fmeasure)

ROUGE-1 F1 Score: 0.4827586206896552
ROUGE-2 F1 Score: 0.3764705882352941
ROUGE-L F1 Score: 0.43678160919540227


### Defining the news_summarizer function which is the core component of this code

In [38]:
#Define the summarization function
def news_summarizer(article, target_len,summarizer):
    #Dictionary with the possible summarization lengths
    size_mapper= {'Short' :50, "Medium" :100, "Long" :200}
    summarizer_mapper={"Model-Bart": sum_bart,"Model-Pegasus": sum_pegasus,"Model-distilbart":sum_distilbart}
    target = target_len
    #Find the index of the article in the dataframe
    index = df[df['highlights']== article].index[0]
    #Summarize the article at the given index with the desired length
    article_at_i = df["article"][index]
    result = summarizer_mapper[summarizer](article_at_i, max_length=size_mapper[target_len])
    
    result[0]["summary_text"] = result[0]["summary_text"].replace(" .<n>",".")
    
    reference_summary = df["highlights"][index]  # Gerçek bir referans özeti ekleyin
    generated_summary = result[0]["summary_text"]
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference_summary, generated_summary)
    #Return the summary
    return  df["highlights"][index],result[0]['summary_text'],"ROUGE-1 F1 Score: {}\n ROUGE-2 F1 Score: {}\n ROUGE-L F1 Score: {}\n ".format (scores["rouge1"].fmeasure, scores["rouge2"].fmeasure,scores["rougeL"].fmeasure)

### Finally defining the Gradio UI to use our code, and launching it

In [39]:
#Create the interface
radio = gr.Radio(['Short', 'Medium', 'Long'], label='Choose your desired summarization length')
dropdown = gr.Dropdown(highlights, label='Choose an article you would like to have summarized')
radio_summarizer =gr.Radio(["Model-Bart","Model-Pegasus","Model-distilbart"] ,label="Choose your desired model")


demo = gr.Interface(
    fn=news_summarizer,
    inputs=[ dropdown, radio,radio_summarizer],
    outputs=["text","text","text",],
    title="News article summarizer"
    )

#Launch the interface
demo.launch()

Running on local URL:  http://127.0.0.1:7869

To create a public link, set `share=True` in `launch()`.




Your min_length=56 must be inferior than your max_length=50.
Your min_length=56 must be inferior than your max_length=50.


## **The entire code in one piece:**

In [None]:
!pip install transformers datasets sacrebleu rouge_score py7zr -q
!pip install nltk rouge_score
import gradio as gr
from transformers import pipeline
import matplotlib.pyplot as plt
from datasets import load_metric
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import nltk
from nltk.tokenize import sent_tokenize
import pandas as pd
from rouge_score import rouge_scorer

#Read comment in next column!!!
df= pd.read_csv("Summarization_Training.csv", sep=";")

#Randomly select 15 articles from the CSV-file
random_rows = df.sample(n=15, random_state=42)

#Create a list of the 15 articles
highlights = random_rows['highlights'].to_list()



#Initialize the summarization pipeline
sum_bart = pipeline("summarization", model="facebook/bart-large-cnn", truncation=True)
sum_pegasus =  pipeline("summarization", model="google/pegasus-cnn_dailymail",truncation=True)
sum_distilbart =  pipeline("summarization", model="sshleifer/distilbart-cnn-12-6",truncation=True )

#Define the summarization function
def news_summarizer(article, target_len,summarizer):
    #Dictionary with the possible summarization lengths
    size_mapper= {'Short' :50, "Medium" :100, "Long" :200}
    #Dictionary with the possible Models
    summarizer_mapper={"Model-Bart": sum_bart,"Model-Pegasus": sum_pegasus,"Model-distilbart":sum_distilbart}
    target = target_len
    #Find the index of the article in the dataframe
    index = df[df['highlights']== article].index[0]
    #Summarize the article at the given index with the desired length
    article_at_i = df["article"][index]

    result = summarizer_mapper[summarizer](article_at_i, max_length=size_mapper[target_len])
    
    result[0]["summary_text"] = result[0]["summary_text"].replace(" .<n>",".")
    #calculating rouge score
    reference_summary = df["highlights"][index]  
    generated_summary = result[0]["summary_text"]
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference_summary, generated_summary)
    #Return the summary and rouge metrics
    return result[0]['summary_text'],"ROUGE-1 F1 Score: {}\n ROUGE-2 F1 Score: {}\n ROUGE-L F1 Score: {}\n ".format (scores["rouge1"].fmeasure, scores["rouge2"].fmeasure,scores["rougeL"].fmeasure)




#Create the interface
radio = gr.Radio(['Short', 'Medium', 'Long'], label='Choose your desired summarization length')
dropdown = gr.Dropdown(highlights, label='Choose an article you would like to have summarized')
radio_summarizer =gr.Radio(["Model-Bart","Model-Pegasus","Model-distilbart"] ,label="Choose your desired model")


demo = gr.Interface(
    fn=news_summarizer,
    inputs=[ dropdown, radio,radio_summarizer],
    outputs=["text","text"],
    title="News article summarizer"
    )

#Launch the interface
demo.launch()